Mercurial > repos > dereeper > pangenome_explorer
comparison PanExplorer_workflow/Perl/Naegleria/assignFastqByITS.pl @ 1:032f6b3806a3 draft
Uploaded
| author | dereeper |
|---|---|
| date | Thu, 30 May 2024 11:16:08 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:3cbb01081cde | 1:032f6b3806a3 |
|---|---|
| 1 #!/usr/bin/perl | |
| 2 | |
| 3 use strict; | |
| 4 | |
| 5 my $directory; | |
| 6 if ($ARGV[0]){ | |
| 7 $directory = $ARGV[0]; | |
| 8 } | |
| 9 else{ | |
| 10 print "You must provide as an argument a directory containing fastq.gz file for testing the presence of ITS sequences\nex: perl assignFastqByITS.pl my_fastq_dir\n"; | |
| 11 exit; | |
| 12 } | |
| 13 | |
| 14 my %sequences =( | |
| 15 "1"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGCATCGA", | |
| 16 "2"=> "ATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA", | |
| 17 "3"=> "AAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA", | |
| 18 "4"=> "ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGCATCGA", | |
| 19 "5"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGCATCGA", | |
| 20 "6"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA", | |
| 21 "7"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA", | |
| 22 "8"=>"ATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTTATGGTAAAAAAGGTGTATGGTAAAAAAGGTGAAAACCTTTTTTCCATTTACAAAAAATAACTCTGTGCAATGGAGCACACGGCTCGTGTATCGA" | |
| 23 ); | |
| 24 | |
| 25 | |
| 26 open(LS,"ls $directory/*.fastq.gz |"); | |
| 27 while(<LS>){ | |
| 28 my $file = $_; | |
| 29 $file =~s/\n//g;$file =~s/\r//g; | |
| 30 foreach my $genotype(sort {$a<=>$b} keys(%sequences)){ | |
| 31 my $seq = $sequences{$genotype}; | |
| 32 my $cmd = `zgrep -c '$seq' $file`; | |
| 33 $cmd =~s/\n//g;$cmd =~s/\r//g; | |
| 34 print "$file $genotype $cmd\n"; | |
| 35 } | |
| 36 } | |
| 37 close(LS); |
