annotate bin/align.pm @ 61:9185ca0a7b43 draft

Updated package according to recommendations.
author pierre.pouchin
date Wed, 16 Jan 2019 08:18:13 -0500
parents 9645d995fb3c
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
61
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
1 package align;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
2
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
3 use strict;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
4 use warnings;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
5 use File::Basename;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
6 use String::Random;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
7
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
8 use FindBin;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
9 use lib $FindBin::Bin;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
10 use Rcall qw ( histogram );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
11
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
12 use Exporter;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
13 our @ISA = qw( Exporter );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
14 our @EXPORT = qw( &rpms_rpkm_te &BWA_call &to_build &get_unique &sam_sorted_bam &get_hash_alignment &sam_to_bam_bg &sam_count &sam_count_mis &rpms_rpkm &get_fastq_seq &extract_sam );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
15
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
16 sub to_build
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
17 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
18 my ( $toBuildTabP, $log, $newdir ) = @_;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
19
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
20 foreach my $pairs ( @{ $toBuildTabP } )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
21 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
22 if ( $pairs->[0] == 1 )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
23 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
24 my $sym = $newdir.basename(${$pairs->[1]}).'_symlink.fa';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
25 symlink( ${$pairs->[1]}, $sym );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
26 ${$pairs->[1]} = $sym;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
27 build_index ( ${$pairs->[1]}, $log );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
28 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
29 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
30 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
31
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
32 sub build_index
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
33 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
34 my $to_index = shift;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
35 my $log = shift;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
36 my $index_log = $to_index.'_index.err';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
37 `bwa index '$to_index' 2> '$index_log'`;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
38 print $log "Creating index for $to_index\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
39 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
40
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
41 sub get_unique
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
42 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
43 my ( $sam, $s_uni, $out_prefix, $col_prefix, $details, $report ) = @_;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
44
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
45 my $fout = $col_prefix.'_all_mappers.fastq';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
46 my $funi = $col_prefix.'_unique_mappers.fastq';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
47 my $frej = $col_prefix.'_unmapped.fastq';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
48
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
49 my $repartition = $out_prefix.'distribution.txt';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
50 my $png_rep = $out_prefix.'distribution.png';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
51 my ( %duplicates, %genome_hits) ;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
52
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
53 #alignement to the first reference
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
54 my @return = sam_parse( $sam, $fout, $funi, $frej, $s_uni, \%duplicates, \%genome_hits, $report );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
55 my $ref_fai = $return[4];
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
56 my $mappers = $return[5];
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
57 my $mappers_uni = $return[6];
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
58 my $size_mappedHashR = $return[7];
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
59
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
60 if ( $details == 1 )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
61 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
62 #print number of duplicates and hits number
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
63 my ($pourcentage, $total) =(0,0);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
64
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
65 $total += $_ foreach values %{$size_mappedHashR};
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
66 open (my $rep, '>'.$repartition) || die "cannot create $repartition $!\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
67 print $rep "size\tnumber\tpercentage\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
68 foreach my $k (sort{$a cmp $b} keys (%{$size_mappedHashR}))
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
69 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
70 $pourcentage = 0;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
71 $pourcentage = $size_mappedHashR->{$k} / $total * 100 unless $total ==0;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
72
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
73 print $rep "$k\t$size_mappedHashR->{$k}\t";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
74 printf $rep "%.2f\n",$pourcentage;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
75 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
76
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
77 histogram($size_mappedHashR, $png_rep, $total);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
78
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
79
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
80 my $dup = $out_prefix.'dup_mapnum.txt';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
81 my $dup_u = $out_prefix .'dup_unique.txt';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
82 my $dup_r = $out_prefix .'dup_nonmapp.txt';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
83 open(my $tab,">".$dup) || die "cannot open output txt file\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
84 open(my $tab_r,">".$dup_r) || die "cannot open output txt file\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
85 open(my $tab_u,">".$dup_u) || die "cannot open output txt file\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
86 print $tab "sequence\tcount\tmapnum\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
87 print $tab_u "sequence\tcount\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
88 print $tab_r "sequence\tcount\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
89 foreach my $k (sort {$duplicates{$b} <=> $duplicates{$a}}keys %duplicates)
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
90 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
91 $duplicates{$k} = 0 unless exists($duplicates{$k});
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
92 $genome_hits{$k} = 0 unless exists($genome_hits{$k});
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
93 if ($genome_hits{$k} != 0) { print $tab $k."\t".$duplicates{$k}."\t".$genome_hits{$k}."\n"; }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
94 else {print $tab_r $k."\t".$duplicates{$k}."\n";}
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
95 if ($genome_hits{$k} == 1) { print $tab_u $k."\t".$duplicates{$k}."\n"; }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
96 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
97 close $dup; close $dup_r; close $dup_u;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
98 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
99 return ( $ref_fai, $mappers, $mappers_uni );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
100 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
101
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
102 sub sam_parse
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
103 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
104 my ( $sam, $fastq_accepted, $fastq_accepted_unique, $fastq_rejected, $sam_unique, $duplicate_hashR, $best_hit_number_hashR, $report ) = @_ ;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
105 my ($reads, $mappers, $mappersUnique, @garbage, %size_num, %size_num_spe, %number, %numberSens, %numberReverse, %unique_number, %numberNM, %numberM, %size);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
106 $mappers = $mappersUnique = $reads = 0;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
107
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
108 open my $fic, '<', $sam || die "cannot open $sam $!\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
109 open my $accepted, '>', $fastq_accepted || die "cannot create $fastq_accepted $! \n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
110 open my $unique, '>', $fastq_accepted_unique || die "cannot create $fastq_accepted_unique $! \n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
111 open my $rejected, '>', $fastq_rejected || die "cannot create $fastq_rejected $! \n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
112 open my $sam_uni, '>', $sam_unique || die "cannot create $sam_unique $! \n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
113 my $sequence = '';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
114 while(<$fic>)
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
115 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
116 chomp $_;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
117 if ($_ =~ /^\@[A-Za-z][A-Za-z](\t[A-Za-z][A-Za-z0-9]:[ -~]+)+$/ || $_ =~ /^\@CO\t.*/ )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
118 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
119 if ($_ =~ /\@SQ\tSN:(.*)\tLN:(\d*)/)
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
120 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
121 $size{$1} = $2;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
122 $unique_number{$1} = 0;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
123 $number{$1} = 0;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
124 $numberNM{$1} = 0;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
125 $numberM{$1} = 0;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
126 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
127 print $sam_uni $_."\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
128 next;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
129 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
130 $reads++;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
131 my @line = split (/\t/,$_);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
132 $sequence = $line[9];
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
133 if ($line[1] & 16)
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
134 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
135 $sequence =reverse($sequence);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
136 $sequence =~ tr/atgcuATGCU/tacgaTACGA/;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
137 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
138 if ($line[1] == 16 || $line[1] == 0)
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
139 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
140 my $len = length($sequence);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
141 $size_num{$len} ++;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
142 $size_num_spe{$line[2]}{$len}++;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
143 $mappers ++;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
144
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
145 ${$best_hit_number_hashR}{$sequence} = $1 if ($line[13] =~ /X0:i:(\d*)/ || $line[14] =~/X0:i:(\d*)/ );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
146 ${$duplicate_hashR}{$sequence}++;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
147 $number{$line[2]}++;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
148 $numberSens{$line[2]}++ if $line[1] == 0 ;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
149 $numberReverse{$line[2]}++ if $line[1] == 16 ;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
150 print $accepted "\@".$line[0]."\n".$sequence."\n+\n".$line[10]."\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
151
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
152 if ($line[11] eq "XT:A:U")
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
153 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
154 $unique_number{$line[2]}++;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
155 $mappersUnique++;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
156 print $unique "\@".$line[0]."\n".$sequence."\n+\n".$line[10]."\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
157 print $sam_uni $_."\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
158 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
159 if ($_ =~ /.*XM:i:(\d+).*/)
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
160 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
161 if ($1 == 0){$numberNM{$line[2]}++;}else{$numberM{$line[2]}++;}
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
162 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
163 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
164 else
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
165 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
166 ${$best_hit_number_hashR}{$sequence} = 0;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
167 ${$duplicate_hashR}{$sequence}++;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
168 print $rejected "\@".$line[0]."\n".$sequence."\n+\n".$line[10]."\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
169 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
170 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
171 close $fic; close $accepted; close $unique; close $rejected; close $sam_uni;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
172
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
173 print $report "Parsing $sam file\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
174 print $report "\treads: $reads\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
175 print $report "\tmappers: $mappers\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
176 print $report "\tunique mappers: $mappersUnique\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
177 print $report "-----------------------------\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
178 return (\%number, \%unique_number, \%numberSens, \%numberReverse, \%size, $mappers, $mappersUnique, \%size_num, \%size_num_spe, \%numberNM, \%numberM );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
179 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
180
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
181 sub get_hash_alignment
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
182 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
183 my ($index, $mismatches, $accept, $reject, $outA, $outR, $fastq, $number_of_cpus, $name, $sam, $report, $fai_f) = @_ ;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
184 my ($reads, $mappers, $unmapped) = (0,0,0);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
185 my $accep_unique;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
186 BWA_call ( $index, $fastq, $sam, $mismatches, $number_of_cpus, $report );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
187
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
188 open my $fic, '<', $sam || die "cannot open $sam $!\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
189 open my $accepted, '>', $outA || die "cannot open $outA\n" if $accept == 1;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
190 open my $rejected, '>', $outR || die "cannot open $outR\n" if $reject == 1;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
191 open my $fai, '>', $fai_f || die "cannot open $fai_f\n" if $fai_f;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
192 #if ($name eq "snRNAs") {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
193 # open ( $accep_unique, ">".$1."-unique.fastq") if $outR =~ /(.*)\.fastq/;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
194 #}
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
195 my $sequence = '';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
196 while(<$fic>)
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
197 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
198 chomp $_;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
199 if( $_ =~ /^\@[A-Za-z][A-Za-z](\t[A-Za-z][A-Za-z0-9]:[ -~]+)+$/ || $_ =~ /^\@CO\t.*/ )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
200 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
201 if ($fai_f && $_ =~ /\@SQ\tSN:(.*)\tLN:(\d*)/)
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
202 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
203 print $fai $1."\t".$2."\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
204 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
205 next;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
206 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
207 $reads++;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
208 my @line = split (/\t/,$_);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
209 $sequence = $line[9];
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
210 if ($line[1] & 16)
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
211 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
212 $sequence =reverse($sequence);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
213 $sequence =~ tr/atgcuATGCU/tacgaTACGA/;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
214 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
215 if ($line[1] & 16 || $line[1] == 0)
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
216 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
217 $mappers ++;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
218 if ($accept == 1 )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
219 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
220 print $accepted "\@".$line[0]."\n".$sequence."\n+\n".$line[10]."\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
221 # print $accep_unique "\@".$line[0]."\n".$sequence."\n+\n".$line[10]."\n" if ($name eq "snRNAs" && $line[11] eq "XT:A:U");
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
222 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
223 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
224 else
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
225 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
226 print $rejected "\@".$line[0]."\n".$sequence."\n+\n".$line[10]."\n" if $reject == 1;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
227 $unmapped++;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
228 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
229 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
230 # close $accep_unique if ($name eq "bonafide_reads");
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
231 close $fic;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
232 close $accepted if $accept == 1;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
233 close $rejected if $reject ==1;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
234 close $fai if $fai_f;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
235 print $report "\treads: $reads\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
236 print $report "\tmappers: $mappers\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
237 print $report "\tunmapped: $unmapped\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
238 print $report "-----------------------------\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
239 return ($mappers, $unmapped);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
240 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
241
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
242 sub sam_count
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
243 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
244 my $sam = shift;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
245 my ( %number, %size );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
246
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
247 open my $fic, '<', $sam || die "cannot open $sam file $!\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
248 while(<$fic>)
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
249 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
250 chomp $_;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
251 if ($_ =~ /^\@[A-Za-z][A-Za-z](\t[A-Za-z][A-Za-z0-9]:[ -~]+)+$/ || $_ =~ /^\@CO\t.*/ )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
252 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
253 if ($_ =~ /\@SQ\tSN:(.*)\tLN:(\d*)/)
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
254 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
255 $size{$1} = $2;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
256 $number{$1} = 0;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
257 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
258 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
259 else
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
260 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
261 my @line = split (/\t/,$_);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
262 if ( $line[1] & 16 || $line[1] == 0 )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
263 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
264 $number{$line[2]}++;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
265 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
266 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
267 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
268 close $fic;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
269 return ( \%number, \%size );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
270 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
271
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
272 sub sam_count_mis
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
273 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
274
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
275 my $sam = shift;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
276 my ( %number, %numberNM, %numberM, %size);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
277
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
278 open my $fic, '<', $sam || die "cannot open $sam file $!\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
279 while(<$fic>)
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
280 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
281 chomp $_;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
282 if ($_ =~ /^\@[A-Za-z][A-Za-z](\t[A-Za-z][A-Za-z0-9]:[ -~]+)+$/ || $_ =~ /^\@CO\t.*/ )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
283 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
284 if ($_ =~ /\@SQ\tSN:(.*)\tLN:(\d*)/)
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
285 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
286 $size{$1} = $2;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
287 $number{$1} = [0,0,0,0,0,0,0];
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
288 $numberNM{$1} = [0,0,0,0,0,0,0];
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
289 $numberM{$1} = [0,0,0,0,0,0,0];
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
290 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
291 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
292 else
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
293 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
294 my @line = split (/\t/,$_);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
295 my @seq = split //, $line[9];
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
296 if ( $line[1] == 16 || $line[1] == 0 )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
297 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
298 $number{ $line[2] }->[0]++;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
299 if ($line[1] == 0)
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
300 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
301 $number{$line[2]}->[1]++;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
302 $number{$line[2]}->[3]++ if $seq[0] eq 'T';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
303 $number{$line[2]}->[5]++ if $seq[9] eq 'A';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
304 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
305 else
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
306 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
307 $number{$line[2]}->[2]++;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
308 $number{$line[2]}->[4]++ if $seq[9] eq 'A';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
309 $number{$line[2]}->[6]++ if $seq[0] eq 'T';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
310 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
311 if ($_ =~ /.*XM:i:(\d+).*/)
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
312 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
313 if ( $1 == 0 )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
314 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
315 $numberNM{$line[2]}->[0]++;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
316 if ($line[1] == 0)
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
317 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
318 $numberNM{$line[2]}->[1]++;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
319 $numberNM{$line[2]}->[3]++ if $seq[0] eq 'T';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
320 $numberNM{$line[2]}->[5]++ if $seq[9] eq 'A';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
321 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
322 else
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
323 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
324 $numberNM{$line[2]}->[2]++;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
325 $numberNM{$line[2]}->[4]++ if $seq[9] eq 'A';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
326 $numberNM{$line[2]}->[6]++ if $seq[0] eq 'T';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
327 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
328 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
329 else
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
330 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
331 $numberM{$line[2]}->[0]++;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
332 if ($line[1] == 0)
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
333 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
334 $numberM{$line[2]}->[1]++;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
335 $numberM{$line[2]}->[3]++ if $seq[0] eq 'T';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
336 $numberM{$line[2]}->[5]++ if $seq[9] eq 'A';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
337 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
338 else
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
339 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
340 $numberM{$line[2]}->[2]++;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
341 $numberM{$line[2]}->[4]++ if $seq[9] eq 'A';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
342 $numberM{$line[2]}->[6]++ if $seq[0] eq 'T';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
343 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
344 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
345 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
346 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
347 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
348 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
349 return (\%number, \%size, \%numberNM, \%numberM );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
350 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
351
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
352 sub rpms_rpkm_te
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
353 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
354 my ( $counthashR, $sizehashR, $mapped, $out_file, $piRNA_number, $miRNA_number, $bonafide_number ) =@_;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
355 open(my $out, ">".$out_file) || die "cannot open normalized file $! \n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
356 print $out "ID\treads counts\tRPKM";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
357 print $out "\tper million of piRNAs" if ($piRNA_number != 0);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
358 print $out "\tper million of miRNAs" if ($miRNA_number != 0);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
359 print $out "\tper million of bonafide reads" if ($bonafide_number != 0);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
360 print $out "\tsense reads counts\treverse reads counts";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
361 print $out "\t% of sense 1U\t% of sense 10A\t% of reverse 1U\t% of reverse 10A\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
362 foreach my $k ( sort keys %{$counthashR} )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
363 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
364 my ($rpkm, $pirna, $mirna, $bonafide) = (0,0,0,0);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
365
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
366 $rpkm = ( $counthashR->{$k}->[0] * 1000000000) / ( $sizehashR->{$k} * $mapped) if ( $sizehashR->{$k} * $mapped) != 0 ;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
367 print $out $k."\t".$counthashR->{$k}->[0]."\t"; printf $out "%.2f",$rpkm;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
368
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
369 if ($piRNA_number != 0 )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
370 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
371 $pirna = ( $counthashR->{$k}->[0] * 1000000) / $piRNA_number;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
372 printf $out "\t%.2f",$pirna;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
373 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
374 if ($miRNA_number != 0 )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
375 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
376 $mirna = ( $counthashR->{$k}->[0] * 1000000) / $miRNA_number;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
377 printf $out "\t%.2f",$mirna;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
378 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
379 if ($bonafide_number != 0 )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
380 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
381 $bonafide = ( $counthashR->{$k}->[0] * 1000000) / $bonafide_number;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
382 printf $out "\t%.2f",$bonafide;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
383 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
384
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
385 print $out "\t".$counthashR->{$k}->[1]."\t".$counthashR->{$k}->[2] ;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
386 my $S1U = 0;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
387 $S1U = $counthashR->{$k}->[3] / $counthashR->{$k}->[1] * 100 if $counthashR->{$k}->[1] != 0;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
388 my $R1U = 0;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
389 $R1U = $counthashR->{$k}->[6] / $counthashR->{$k}->[2] * 100 if $counthashR->{$k}->[2] != 0;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
390 my $S10A = 0;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
391 $S10A = $counthashR->{$k}->[5] / $counthashR->{$k}->[1] * 100 if $counthashR->{$k}->[1] != 0;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
392 my $R10A = 0;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
393 $R10A = $counthashR->{$k}->[4] / $counthashR->{$k}->[2] * 100 if $counthashR->{$k}->[2] != 0;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
394 print $out "\t".$S1U."\t".$S10A."\t".$R1U."\t".$R10A;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
395
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
396 print $out "\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
397 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
398 close $out;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
399 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
400
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
401
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
402 sub sam_to_bam_bg
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
403 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
404 my ( $sam, $scale, $number_of_cpus ) = @_;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
405 my ( $bam_sorted, $bedgraphM, $bedgraphP, $view_err, $sort_err ) = ( '', '', '', '', '' );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
406 if ( $sam =~ /(.*?).sam$/ )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
407 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
408 $bam_sorted = $1.'_sorted.bam';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
409 $bedgraphP= $1.'_plus.bedgraph';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
410 $bedgraphM = $1.'_minus.bedgraph';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
411 $view_err = $1.'_view.err';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
412 $sort_err = $1.'_sort.err';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
413 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
414 `samtools view -Shb --threads $number_of_cpus '$sam' 2> '$view_err' | samtools sort -O BAM --threads $number_of_cpus /dev/stdin 2> '$sort_err' > '$bam_sorted'`;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
415 `bedtools genomecov -scale $scale -strand + -bga -ibam '$bam_sorted' > '$bedgraphP'`;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
416 `bedtools genomecov -scale $scale -strand - -bga -ibam '$bam_sorted' > '$bedgraphM'`;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
417 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
418
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
419 sub sam_sorted_bam
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
420 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
421 my ( $sam, $number_of_cpus ) = @_;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
422 my ( $bam_sorted, $view_err, $sort_err ) = ( '', '', '' );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
423 if ( $sam =~ /(.*?).sam$/ )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
424 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
425 $bam_sorted = $1.'_sorted.bam';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
426 $view_err = $1.'_view.err';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
427 $sort_err = $1.'_sort.err';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
428
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
429 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
430 `samtools view -Shb --threads $number_of_cpus '$sam' 2> '$view_err' | samtools sort -O BAM --threads $number_of_cpus /dev/stdin 2> '$sort_err' > '$bam_sorted'`;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
431 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
432
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
433 sub BWA_call
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
434 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
435 my ( $index, $fastq, $sam, $mismatches, $number_of_cpus, $report ) = @_;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
436 my ( $aln_err, $samse_err, $seq_num ) = ( $sam.'_aln.err', $sam.'_samse.err', 0 );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
437 print $report "-----------------------------\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
438 print $report "bwa aln -t $number_of_cpus -n $mismatches '$index' '$fastq' 2> '$aln_err' | bwa samse $index /dev/stdin '$fastq' 2> '$samse_err' > '$sam'\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
439 `bwa aln -t $number_of_cpus -n $mismatches '$index' '$fastq' 2> '$aln_err' | bwa samse $index /dev/stdin '$fastq' 2> '$samse_err' > '$sam' `;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
440 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
441
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
442 sub rpms_rpkm
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
443 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
444 my ( $counthashR, $sizehashR, $mapped, $out_file, $piRNA_number, $miRNA_number, $bonafide_number ) =@_;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
445 open(my $out, ">".$out_file) || die "cannot open normalized file $! \n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
446 print $out "ID\treads counts\tRPKM";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
447 print $out "\tper million of piRNAs" if ($piRNA_number != 0);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
448 print $out "\tper million of miRNAs" if ($miRNA_number != 0);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
449 print $out "\tper million of bonafide reads" if ($bonafide_number != 0);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
450 print $out "\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
451 foreach my $k ( sort keys %{$counthashR} )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
452 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
453 my ($rpkm, $pirna, $mirna, $bonafide) = (0,0,0,0);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
454
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
455 $rpkm = ( $counthashR->{$k} * 1000000000) / ( $sizehashR->{$k} * $mapped) if ( $sizehashR->{$k} * $mapped) != 0 ;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
456 print $out $k."\t".$counthashR->{$k}."\t"; printf $out "%.2f",$rpkm;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
457
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
458 if ($piRNA_number != 0 )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
459 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
460 $pirna = ( $counthashR->{$k} * 1000000) / $piRNA_number;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
461 printf $out "\t%.2f",$pirna;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
462 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
463 if ($miRNA_number != 0 )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
464 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
465 $mirna = ( $counthashR->{$k} * 1000000) / $miRNA_number;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
466 printf $out "\t%.2f",$mirna;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
467 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
468 if ($bonafide_number != 0 )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
469 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
470 $bonafide = ( $counthashR->{$k} * 1000000) / $bonafide_number;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
471 printf $out "\t%.2f",$bonafide;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
472 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
473 print $out "\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
474 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
475 close $out;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
476 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
477
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
478 sub extract_sam
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
479 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
480 my ( $hashRef, $sam_in, $sam_out, $sam_uni_out, $fastq_out, $fastq_uni_out ) = @_;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
481
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
482 open my $s_in, '<', $sam_in || die "cannot open $sam_in file $!\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
483
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
484 open my $f_out, '>', $fastq_out || die "cannot create $fastq_out $!\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
485 open my $f_uni_out, '>', $fastq_uni_out || die "cannot create $fastq_uni_out $!\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
486
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
487 open my $s_out, '>', $sam_out || die "cannot create $sam_out file $!\n" if defined ($hashRef);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
488 open my $s_uni_out, '>', $sam_uni_out || die "cannot create $sam_uni_out file $!\n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
489
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
490 my $sequence = '';
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
491 while(<$s_in>)
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
492 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
493 if ($_ =~ /^\@[A-Za-z][A-Za-z](\t[A-Za-z][A-Za-z0-9]:[ -~]+)+$/ || $_ =~ /^\@CO\t.*/ )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
494 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
495 print $s_out $_ if defined ($hashRef);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
496 print $s_uni_out $_;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
497 next;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
498 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
499 my @line = split (/\t/,$_);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
500 $sequence = $line[0];
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
501 if ( (! defined ($hashRef) )|| ( exists $hashRef->{$sequence} && $hashRef->{$sequence} == 1 ) )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
502 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
503 my $arn = $line[9];
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
504 if ($line[1] & 16)
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
505 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
506 $arn =reverse($arn);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
507 $arn =~ tr/atgcuATGCU/tacgaTACGA/;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
508 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
509
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
510 if ( ( $line[1] == 16 || $line[1] == 0 ) )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
511 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
512 print $f_out "\@".$line[0]."\n".$arn."\n+\n".$line[10]."\n" ;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
513 print $s_out $_ if defined ($hashRef);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
514 if ( $line[11] eq "XT:A:U" )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
515 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
516 print $f_uni_out "\@".$line[0]."\n".$arn."\n+\n".$line[10]."\n" ;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
517 print $s_uni_out $_ ;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
518 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
519 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
520 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
521 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
522 close $s_in; close $s_out if defined ($hashRef);
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
523 close $s_uni_out; close $f_out; close $f_uni_out;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
524 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
525
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
526 sub get_fastq_seq
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
527 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
528 my $fastq = shift;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
529 my %hash; my $cmp = 0;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
530
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
531 open my $fic, '<', $fastq || die "cannot open input file $! \n";
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
532 while(<$fic>)
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
533 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
534 chomp $_;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
535 $cmp++;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
536 if ($cmp % 4 == 1)
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
537 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
538 die "file do not contain a @ at line $cmp\n" unless ($_ =~ /^\@/ );
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
539 if ($_ =~ /^\@(.*)\s.*/) { $hash{$1} = 1;}
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
540 elsif ($_ =~ /^\@(.*)/) { $hash{$1} = 1;}
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
541 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
542 elsif ($cmp % 4 == 3 )
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
543 {
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
544 die "file do not contain a + at line $cmp\n" unless $_ =~ /^\+/;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
545 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
546 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
547 close $fic;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
548 return \%hash;
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
549 }
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
550
9185ca0a7b43 Updated package according to recommendations.
pierre.pouchin
parents: 60
diff changeset
551 1;