| 0 | 1 package align; | 
|  | 2 | 
|  | 3 use strict; | 
|  | 4 use warnings; | 
|  | 5 use File::Basename; | 
|  | 6 use String::Random; | 
|  | 7 | 
|  | 8 use FindBin; | 
|  | 9 use lib $FindBin::Bin; | 
|  | 10 use Rcall qw ( histogram ); | 
|  | 11 | 
|  | 12 use Exporter; | 
|  | 13 our @ISA     = qw( Exporter ); | 
|  | 14 our @EXPORT  = qw( &BWA_call &to_build &get_unique &sam_sorted_bam &get_hash_alignment &sam_to_bam_bg &sam_count &sam_count_mis &rpms_rpkm &get_fastq_seq &extract_sam ); | 
|  | 15 | 
|  | 16 sub to_build | 
|  | 17 { | 
| 24 | 18   my ( $toBuildTabP, $log, $newdir ) = @_; | 
| 0 | 19 | 
| 24 | 20   foreach my  $pairs ( @{ $toBuildTabP } ) | 
|  | 21   { | 
|  | 22     if (  $pairs->[0] == 1 ) | 
|  | 23     { | 
|  | 24       my $sym = $newdir.basename(${$pairs->[1]}).'_symlink.fa'; | 
|  | 25       symlink( ${$pairs->[1]}, $sym ); | 
|  | 26       ${$pairs->[1]} = $sym; | 
|  | 27       build_index ( ${$pairs->[1]}, $log ); | 
|  | 28     } | 
|  | 29   } | 
| 0 | 30 } | 
|  | 31 | 
|  | 32 sub build_index | 
|  | 33 { | 
|  | 34 	my $to_index = shift; | 
|  | 35 	my $log = shift; | 
|  | 36 	my $index_log = $to_index.'_index.err'; | 
|  | 37 	`bwa index $to_index 2> $index_log`; | 
|  | 38 	print $log "Creating index for $to_index\n"; | 
|  | 39 } | 
|  | 40 | 
|  | 41 sub get_unique | 
|  | 42 { | 
|  | 43 	my ( $sam, $s_uni, $prefix, $details, $report ) = @_; | 
|  | 44 | 
|  | 45 	my $fout = $prefix.'all.fastq'; | 
|  | 46 	my $funi = $prefix.'unique.fastq'; | 
|  | 47 	my $frej = $prefix.'rejected.fastq'; | 
|  | 48 | 
|  | 49   my $repartition = $prefix.'distribution.txt'; | 
|  | 50 	my $png_rep = $prefix.'distribution.png'; | 
|  | 51 	my ( %duplicates, %genome_hits) ; | 
|  | 52 | 
|  | 53 	#alignement to the first reference | 
|  | 54 	my @return = sam_parse( $sam, $fout, $funi, $frej, $s_uni, \%duplicates, \%genome_hits, $report ); | 
|  | 55 	my $ref_fai = $return[4]; | 
|  | 56 	my $mappers =  $return[5]; | 
|  | 57 	my $mappers_uni = $return[6]; | 
|  | 58 	my $size_mappedHashR = $return[7]; | 
|  | 59 | 
|  | 60 	if ( $details == 1 ) | 
|  | 61 	{ | 
|  | 62 		#print number of duplicates and hits number | 
|  | 63 		my ($pourcentage, $total) =(0,0); | 
|  | 64 | 
|  | 65 		$total += $_ foreach values %{$size_mappedHashR}; | 
|  | 66 		open (my $rep, '>'.$repartition) || die "cannot create $repartition $!\n"; | 
|  | 67 		print $rep "size\tnumber\tpercentage\n"; | 
|  | 68 		foreach my $k (sort{$a cmp $b} keys (%{$size_mappedHashR})) | 
|  | 69 		{ | 
|  | 70 			$pourcentage = 0; | 
|  | 71 			$pourcentage = $size_mappedHashR->{$k} / $total * 100 unless $total ==0; | 
|  | 72 | 
|  | 73 			print $rep "$k\t$size_mappedHashR->{$k}\t"; | 
|  | 74 			printf $rep "%.2f\n",$pourcentage; | 
|  | 75 		} | 
|  | 76 | 
|  | 77 		histogram($size_mappedHashR, $png_rep, $total); | 
|  | 78 | 
|  | 79 | 
|  | 80 		my $dup = $prefix.'dup_mapnum.txt'; | 
|  | 81 		my $dup_u = $prefix .'dup_unique.txt'; | 
|  | 82 		my $dup_r = $prefix .'dup_nonmapp.txt'; | 
|  | 83 		open(my $tab,">".$dup) || die "cannot open output txt file\n"; | 
|  | 84 		open(my $tab_r,">".$dup_r) || die "cannot open output txt file\n"; | 
|  | 85 		open(my $tab_u,">".$dup_u) || die "cannot open output txt file\n"; | 
|  | 86 		print $tab "sequence\tcount\tmapnum\n"; | 
|  | 87 		print $tab_u "sequence\tcount\n"; | 
|  | 88 		print $tab_r "sequence\tcount\n"; | 
|  | 89 		foreach my $k (sort {$duplicates{$b} <=> $duplicates{$a}}keys %duplicates) | 
|  | 90 		{ | 
|  | 91 			$duplicates{$k} = 0 unless exists($duplicates{$k}); | 
|  | 92 			$genome_hits{$k} = 0 unless exists($genome_hits{$k}); | 
|  | 93 			if ($genome_hits{$k} != 0) { print $tab $k."\t".$duplicates{$k}."\t".$genome_hits{$k}."\n"; } | 
|  | 94 			else {print $tab_r $k."\t".$duplicates{$k}."\n";} | 
|  | 95 			if ($genome_hits{$k} == 1) { print $tab_u $k."\t".$duplicates{$k}."\n"; } | 
|  | 96 		} | 
|  | 97 	close $dup; close $dup_r; close $dup_u; | 
|  | 98 	} | 
|  | 99 	return ( $ref_fai, $mappers, $mappers_uni ); | 
|  | 100 } | 
|  | 101 | 
|  | 102 sub sam_parse | 
|  | 103 { | 
|  | 104 	my ( $sam, $fastq_accepted, $fastq_accepted_unique, $fastq_rejected, $sam_unique, $duplicate_hashR, $best_hit_number_hashR, $report ) = @_ ; | 
|  | 105 	my ($reads, $mappers, $mappersUnique, @garbage, %size_num, %size_num_spe, %number, %numberSens, %numberReverse, %unique_number, %numberNM, %numberM, %size); | 
|  | 106 	$mappers = $mappersUnique = $reads = 0; | 
|  | 107 | 
|  | 108 	open my $fic, '<', $sam || die "cannot open $sam $!\n"; | 
|  | 109 	open my $accepted, '>', $fastq_accepted || die "cannot create $fastq_accepted $! \n"; | 
|  | 110 	open my $unique, '>', $fastq_accepted_unique || die "cannot create $fastq_accepted_unique $! \n"; | 
|  | 111 	open my $rejected, '>', $fastq_rejected || die "cannot create $fastq_rejected $! \n"; | 
|  | 112 	open my $sam_uni, '>', $sam_unique || die "cannot create $sam_unique $! \n"; | 
|  | 113 	my $sequence = ''; | 
|  | 114 	while(<$fic>) | 
|  | 115 	{ | 
|  | 116 		chomp $_; | 
|  | 117 		if ($_ =~ /^\@[A-Za-z][A-Za-z](\t[A-Za-z][A-Za-z0-9]:[ -~]+)+$/ || $_ =~ /^\@CO\t.*/ ) | 
|  | 118 		{ | 
|  | 119 			if ($_ =~ /\@SQ\tSN:(.*)\tLN:(\d*)/) | 
|  | 120 			{ | 
|  | 121 				$size{$1} = $2; | 
|  | 122 				$unique_number{$1} = 0; | 
|  | 123 				$number{$1} = 0; | 
|  | 124 				$numberNM{$1} = 0; | 
|  | 125 				$numberM{$1} = 0; | 
|  | 126 			} | 
|  | 127 			print $sam_uni $_."\n"; | 
|  | 128 			next; | 
|  | 129 		} | 
|  | 130 		$reads++; | 
|  | 131 		my @line = split (/\t/,$_); | 
|  | 132 		$sequence = $line[9]; | 
|  | 133 		if ($line[1] & 16) | 
|  | 134 		{ | 
|  | 135 			$sequence =reverse($sequence); | 
|  | 136 			$sequence =~ tr/atgcuATGCU/tacgaTACGA/; | 
|  | 137 		} | 
|  | 138 		if ($line[1] == 16 || $line[1] == 0) | 
|  | 139 		{ | 
|  | 140 			my $len = length($sequence); | 
|  | 141 			$size_num{$len} ++; | 
|  | 142 			$size_num_spe{$line[2]}{$len}++; | 
|  | 143 			$mappers ++; | 
|  | 144 | 
|  | 145 			${$best_hit_number_hashR}{$sequence} = $1  if  ($line[13] =~ /X0:i:(\d*)/ ||  $line[14] =~/X0:i:(\d*)/ ); | 
|  | 146 			${$duplicate_hashR}{$sequence}++; | 
|  | 147 			$number{$line[2]}++; | 
|  | 148 			$numberSens{$line[2]}++ if $line[1] == 0 ; | 
|  | 149 			$numberReverse{$line[2]}++ if $line[1] == 16 ; | 
|  | 150 			print $accepted "\@".$line[0]."\n".$sequence."\n+\n".$line[10]."\n"; | 
|  | 151 | 
|  | 152 			if ($line[11] eq "XT:A:U") | 
|  | 153 			{ | 
|  | 154 				$unique_number{$line[2]}++; | 
|  | 155 				$mappersUnique++; | 
|  | 156 				print $unique "\@".$line[0]."\n".$sequence."\n+\n".$line[10]."\n"; | 
|  | 157 				print $sam_uni $_."\n"; | 
|  | 158 			} | 
|  | 159 			if ($_ =~ /.*XM:i:(\d+).*/) | 
|  | 160 			{ | 
|  | 161 				if ($1 == 0){$numberNM{$line[2]}++;}else{$numberM{$line[2]}++;} | 
|  | 162 			} | 
|  | 163 		} | 
|  | 164 		else | 
|  | 165 		{ | 
|  | 166 			${$best_hit_number_hashR}{$sequence} = 0; | 
|  | 167 			${$duplicate_hashR}{$sequence}++; | 
|  | 168 			print $rejected "\@".$line[0]."\n".$sequence."\n+\n".$line[10]."\n"; | 
|  | 169 		} | 
|  | 170 	} | 
|  | 171 	close $fic; close $accepted; close $unique; close $rejected; close $sam_uni; | 
|  | 172 | 
|  | 173 	print $report "Parsing $sam file\n"; | 
|  | 174   print $report "\treads: $reads\n"; | 
|  | 175 	print $report "\tmappers: $mappers\n"; | 
|  | 176 	print $report "\tunique mappers: $mappersUnique\n"; | 
|  | 177 	print $report "-----------------------------\n"; | 
|  | 178 	return (\%number, \%unique_number, \%numberSens, \%numberReverse, \%size, $mappers, $mappersUnique, \%size_num, \%size_num_spe, \%numberNM, \%numberM ); | 
|  | 179 } | 
|  | 180 | 
|  | 181 sub get_hash_alignment | 
|  | 182 { | 
|  | 183   my ($index, $mismatches, $accept, $reject, $outA, $outR, $fastq, $number_of_cpus, $name, $sam, $report, $fai_f) = @_ ; | 
|  | 184   my ($reads, $mappers, $unmapped) = (0,0,0); | 
|  | 185   my $accep_unique; | 
|  | 186   BWA_call ( $index, $fastq, $sam, $mismatches, $number_of_cpus, $report ); | 
|  | 187 | 
|  | 188   open my $fic, '<', $sam || die "cannot open $sam $!\n"; | 
|  | 189   open my $accepted, '>', $outA || die "cannot open $outA\n"  if $accept == 1; | 
|  | 190   open my $rejected, '>', $outR || die "cannot open $outR\n"  if $reject == 1; | 
|  | 191   open my $fai, '>', $fai_f || die "cannot open $fai_f\n" if $fai_f; | 
|  | 192   #if ($name eq "snRNAs") { | 
|  | 193   #  open ( $accep_unique, ">".$1."-unique.fastq") if $outR =~ /(.*)\.fastq/; | 
|  | 194   #} | 
|  | 195   my $sequence = ''; | 
|  | 196   while(<$fic>) | 
|  | 197   { | 
|  | 198     chomp $_; | 
|  | 199     if( $_ =~ /^\@[A-Za-z][A-Za-z](\t[A-Za-z][A-Za-z0-9]:[ -~]+)+$/ || $_ =~ /^\@CO\t.*/ ) | 
|  | 200     { | 
|  | 201     	if ($fai_f && $_ =~ /\@SQ\tSN:(.*)\tLN:(\d*)/) | 
|  | 202       { | 
|  | 203         print $fai $1."\t".$2."\n"; | 
|  | 204       } | 
|  | 205       next; | 
|  | 206     } | 
|  | 207     $reads++; | 
|  | 208     my @line = split (/\t/,$_); | 
|  | 209     $sequence = $line[9]; | 
|  | 210     if ($line[1] & 16) | 
|  | 211     { | 
|  | 212       $sequence =reverse($sequence); | 
|  | 213       $sequence =~ tr/atgcuATGCU/tacgaTACGA/; | 
|  | 214     } | 
|  | 215     if ($line[1] & 16 || $line[1] == 0) | 
|  | 216     { | 
|  | 217       $mappers ++; | 
|  | 218       if ($accept == 1 ) | 
|  | 219       { | 
|  | 220         print $accepted "\@".$line[0]."\n".$sequence."\n+\n".$line[10]."\n"; | 
|  | 221    #     print $accep_unique "\@".$line[0]."\n".$sequence."\n+\n".$line[10]."\n" if ($name eq "snRNAs" && $line[11] eq "XT:A:U"); | 
|  | 222       } | 
|  | 223     } | 
|  | 224     else | 
|  | 225     { | 
|  | 226       print $rejected "\@".$line[0]."\n".$sequence."\n+\n".$line[10]."\n" if $reject == 1; | 
|  | 227       $unmapped++; | 
|  | 228     } | 
|  | 229   } | 
|  | 230  # close $accep_unique if ($name eq "bonafide_reads"); | 
|  | 231   close $fic; | 
|  | 232   close $accepted  if $accept == 1; | 
|  | 233   close $rejected if $reject ==1; | 
|  | 234   close $fai if $fai_f; | 
|  | 235   print $report "\treads: $reads\n"; | 
|  | 236   print $report "\tmappers: $mappers\n"; | 
|  | 237   print $report "\tunmapped: $unmapped\n"; | 
|  | 238   print $report "-----------------------------\n"; | 
|  | 239   return ($mappers, $unmapped); | 
|  | 240 } | 
|  | 241 | 
|  | 242 sub sam_count | 
|  | 243 { | 
|  | 244   my $sam = shift; | 
|  | 245   my ( %number, %size ); | 
|  | 246 | 
|  | 247   open  my $fic, '<', $sam || die "cannot open $sam file $!\n"; | 
|  | 248   while(<$fic>) | 
|  | 249   { | 
|  | 250     chomp $_; | 
|  | 251     if ($_ =~ /^\@[A-Za-z][A-Za-z](\t[A-Za-z][A-Za-z0-9]:[ -~]+)+$/ || $_ =~ /^\@CO\t.*/ ) | 
|  | 252     { | 
|  | 253       if ($_ =~ /\@SQ\tSN:(.*)\tLN:(\d*)/) | 
|  | 254       { | 
|  | 255         $size{$1} = $2; | 
|  | 256         $number{$1} = 0; | 
|  | 257       } | 
|  | 258     } | 
|  | 259     else | 
|  | 260     { | 
|  | 261   		my @line = split (/\t/,$_); | 
|  | 262  	  	if ( $line[1]  & 16 || $line[1] == 0 ) | 
|  | 263  	  	{ | 
|  | 264  	  		$number{$line[2]}++; | 
|  | 265  	  	} | 
|  | 266  	 	} | 
|  | 267 	} | 
|  | 268   close $fic; | 
|  | 269   return ( \%number, \%size ); | 
|  | 270 } | 
|  | 271 | 
|  | 272 sub sam_count_mis | 
|  | 273 { | 
|  | 274 | 
|  | 275 	my $sam = shift; | 
|  | 276 	my ( %number, %numberNM, %numberM, %size); | 
|  | 277 | 
|  | 278 	open  my $fic, '<', $sam || die "cannot open $sam file $!\n"; | 
|  | 279   while(<$fic>) | 
|  | 280   { | 
|  | 281 		chomp $_; | 
|  | 282     if ($_ =~ /^\@[A-Za-z][A-Za-z](\t[A-Za-z][A-Za-z0-9]:[ -~]+)+$/ || $_ =~ /^\@CO\t.*/ ) | 
|  | 283     { | 
|  | 284       if ($_ =~ /\@SQ\tSN:(.*)\tLN:(\d*)/) | 
|  | 285       { | 
|  | 286 				$size{$1} = $2; | 
|  | 287 				$number{$1} = 0; | 
|  | 288 				$numberNM{$1} = 0; | 
|  | 289 				$numberM{$1} = 0; | 
|  | 290 			} | 
|  | 291 		} | 
|  | 292 		else | 
|  | 293 		{ | 
|  | 294 			my @line = split (/\t/,$_); | 
|  | 295 			if ( $line[1]  & 16 || $line[1] == 0 ) | 
|  | 296 			{ | 
|  | 297 				$number{ $line[2] }++; | 
|  | 298     	 	if ($_ =~ /.*XM:i:(\d+).*/) | 
|  | 299     	 	{ | 
|  | 300         	if ( $1 == 0 ){ $numberNM{$line[2]}++; } else { $numberM{$line[2]}++; } | 
|  | 301       	} | 
|  | 302 			} | 
|  | 303 		} | 
|  | 304 	} | 
|  | 305 	return (\%number, \%size, \%numberNM, \%numberM ); | 
|  | 306 } | 
|  | 307 | 
|  | 308 sub sam_to_bam_bg | 
|  | 309 { | 
|  | 310 	my ( $sam, $scale, $number_of_cpus ) = @_; | 
| 19 | 311 	my ( $bam_sorted, $bedgraphM, $bedgraphP, $view_err, $sort_err ) = ( '', '', '', '', '' ); | 
| 0 | 312 	if ( $sam =~ /(.*?).sam$/ ) | 
|  | 313 	{ | 
|  | 314 		$bam_sorted = $1.'_sorted.bam'; | 
|  | 315 		$bedgraphP= $1.'_plus.bedgraph'; | 
|  | 316 		$bedgraphM = $1.'_minus.bedgraph'; | 
| 19 | 317 		$view_err = $1.'_view.err'; | 
|  | 318 		$sort_err = $1.'_sort.err'; | 
| 0 | 319 	} | 
| 19 | 320 	`samtools view -Shb   --threads $number_of_cpus $sam 2> $view_err | samtools sort  -O BAM --threads $number_of_cpus  /dev/stdin 2> $sort_err  > $bam_sorted`; | 
| 10 | 321 	`bedtools genomecov -scale $scale -strand + -bga -ibam $bam_sorted > $bedgraphP`; | 
|  | 322 	`bedtools genomecov -scale $scale -strand - -bga -ibam $bam_sorted > $bedgraphM`; | 
| 0 | 323 } | 
|  | 324 | 
|  | 325 sub sam_sorted_bam | 
|  | 326 { | 
|  | 327 	my ( $sam, $number_of_cpus ) = @_; | 
| 19 | 328 	my ( $bam_sorted, $view_err, $sort_err ) = ( '', '', '' ); | 
| 0 | 329 	if ( $sam =~ /(.*?).sam$/ ) | 
|  | 330 	{ | 
|  | 331 		$bam_sorted = $1.'_sorted.bam'; | 
| 19 | 332 		$view_err = $1.'_view.err'; | 
|  | 333 		$sort_err = $1.'_sort.err'; | 
|  | 334 | 
| 0 | 335 	} | 
| 19 | 336 	`samtools view -Shb   --threads $number_of_cpus $sam 2> $view_err | samtools sort  -O BAM --threads $number_of_cpus  /dev/stdin  2> $sort_err  > $bam_sorted`; | 
| 0 | 337 } | 
|  | 338 | 
|  | 339 sub BWA_call | 
|  | 340 { | 
|  | 341 	my ( $index, $fastq, $sam, $mismatches, $number_of_cpus, $report ) = @_; | 
|  | 342 	my ( $aln_err, $samse_err, $seq_num ) = ( $sam.'_aln.err', $sam.'_samse.err', 0 ); | 
|  | 343 	print $report "-----------------------------\n"; | 
|  | 344 	print $report "bwa aln -t $number_of_cpus -n $mismatches $index $fastq 2> $aln_err | bwa samse $index /dev/stdin $fastq 2> $samse_err > $sam\n"; | 
|  | 345 	`bwa aln -t $number_of_cpus -n $mismatches $index $fastq 2> $aln_err | bwa samse $index /dev/stdin $fastq 2> $samse_err > $sam `; | 
|  | 346 } | 
|  | 347 | 
|  | 348 sub rpms_rpkm | 
|  | 349 { | 
|  | 350   my ( $counthashR, $sizehashR, $mapped, $out_file, $piRNA_number, $miRNA_number, $bonafide_number ) =@_; | 
|  | 351   open(my $out, ">".$out_file) || die "cannot open normalized file $! \n"; | 
|  | 352   print $out "ID\treads counts\tRPKM"; | 
|  | 353   print $out "\tper million of piRNAs" if ($piRNA_number != 0); | 
|  | 354   print $out "\tper million of miRNAs" if ($miRNA_number != 0); | 
|  | 355   print $out "\tper million of bonafide reads" if ($bonafide_number != 0); | 
|  | 356   print $out "\n"; | 
|  | 357   foreach my $k  ( sort keys %{$counthashR} ) | 
|  | 358   { | 
|  | 359     my ($rpkm, $pirna, $mirna, $bonafide) = (0,0,0,0); | 
|  | 360 | 
|  | 361     $rpkm = ( $counthashR->{$k} * 1000000000) / ( $sizehashR->{$k} * $mapped) if ( $sizehashR->{$k} * $mapped) != 0 ; | 
|  | 362     print $out $k."\t".$counthashR->{$k}."\t"; printf $out "%.2f",$rpkm; | 
|  | 363 | 
|  | 364     if ($piRNA_number != 0 ) | 
|  | 365     { | 
|  | 366       $pirna = ( $counthashR->{$k}  * 1000000) / $piRNA_number; | 
|  | 367       printf $out "\t%.2f",$pirna; | 
|  | 368     } | 
|  | 369     if ($miRNA_number != 0 ) | 
|  | 370     { | 
|  | 371       $mirna = ( $counthashR->{$k}  * 1000000) / $miRNA_number; | 
|  | 372       printf $out "\t%.2f",$mirna; | 
|  | 373     } | 
|  | 374     if ($bonafide_number != 0 ) | 
|  | 375     { | 
|  | 376       $bonafide = ( $counthashR->{$k}  * 1000000) / $bonafide_number; | 
|  | 377       printf $out "\t%.2f",$bonafide; | 
|  | 378     } | 
|  | 379     print $out "\n"; | 
|  | 380  	} | 
|  | 381   close $out; | 
|  | 382 } | 
|  | 383 | 
|  | 384 sub extract_sam | 
|  | 385 { | 
|  | 386   my ( $hashRef, $sam_in, $sam_out, $sam_uni_out, $fastq_out, $fastq_uni_out ) = @_; | 
|  | 387 | 
|  | 388 	open my $s_in, '<', $sam_in || die "cannot open $sam_in file $!\n"; | 
|  | 389 | 
|  | 390   open my $f_out, '>', $fastq_out || die "cannot create $fastq_out $!\n"; | 
|  | 391   open my $f_uni_out, '>', $fastq_uni_out || die "cannot create $fastq_uni_out $!\n"; | 
|  | 392 | 
|  | 393   open my $s_out, '>', $sam_out || die "cannot create $sam_out file $!\n" if defined ($hashRef); | 
|  | 394   open my $s_uni_out, '>', $sam_uni_out || die "cannot create $sam_uni_out file $!\n"; | 
|  | 395 | 
|  | 396   my $sequence = ''; | 
|  | 397   while(<$s_in>) | 
|  | 398   { | 
|  | 399     if ($_ =~ /^\@[A-Za-z][A-Za-z](\t[A-Za-z][A-Za-z0-9]:[ -~]+)+$/ || $_ =~ /^\@CO\t.*/ ) | 
|  | 400     { | 
|  | 401       print $s_out $_ if defined ($hashRef); | 
|  | 402       print $s_uni_out $_; | 
|  | 403       next; | 
|  | 404     } | 
|  | 405     my @line = split (/\t/,$_); | 
|  | 406     $sequence = $line[0]; | 
|  | 407     if ( (! defined ($hashRef) )|| (  exists $hashRef->{$sequence}  &&  $hashRef->{$sequence} == 1 ) ) | 
|  | 408     { | 
|  | 409       my $arn  =  $line[9]; | 
|  | 410       if ($line[1] & 16) | 
|  | 411       { | 
|  | 412         $arn =reverse($arn); | 
|  | 413         $arn =~ tr/atgcuATGCU/tacgaTACGA/; | 
|  | 414       } | 
| 19 | 415 | 
| 0 | 416       if  ( ( $line[1] == 16 || $line[1] == 0 ) ) | 
|  | 417 			{ | 
|  | 418       	print $f_out "\@".$line[0]."\n".$arn."\n+\n".$line[10]."\n" ; | 
|  | 419       	print $s_out $_ if defined ($hashRef); | 
|  | 420       	if ( $line[11] eq "XT:A:U" ) | 
|  | 421       	{ | 
|  | 422       		print $f_uni_out "\@".$line[0]."\n".$arn."\n+\n".$line[10]."\n" ; | 
|  | 423       		print $s_uni_out $_ ; | 
|  | 424       	} | 
|  | 425       } | 
|  | 426     } | 
|  | 427   } | 
|  | 428   close $s_in; close $s_out if defined ($hashRef); | 
|  | 429   close $s_uni_out; close $f_out; close $f_uni_out; | 
|  | 430 } | 
|  | 431 | 
|  | 432 sub get_fastq_seq{ | 
|  | 433   my $fastq = shift; | 
|  | 434   my %hash; my $cmp = 0; | 
|  | 435 | 
|  | 436   open my $fic, '<', $fastq || die "cannot open input file $! \n"; | 
|  | 437   while(<$fic>) | 
|  | 438   { | 
|  | 439     chomp $_; | 
|  | 440     $cmp++; | 
|  | 441     if ($cmp % 4 == 1) | 
|  | 442     { | 
|  | 443       die "file do not contain a @ at line $cmp\n" unless ($_ =~ /^\@/ ); | 
|  | 444       if ($_ =~ /^\@(.*)\s.*/) { $hash{$1} = 1;} | 
|  | 445       elsif ($_ =~ /^\@(.*)/) { $hash{$1} = 1;} | 
|  | 446     } | 
|  | 447     elsif ($cmp % 4 == 3 ) | 
|  | 448     { | 
|  | 449       die "file do not contain a + at line $cmp\n" unless $_ =~ /^\+/; | 
|  | 450     } | 
|  | 451   } | 
|  | 452   close $fic; | 
|  | 453   return \%hash; | 
|  | 454 } | 
|  | 455 | 
|  | 456 1; |