Mercurial > repos > dcouvin > hsp60ecctool
comparison hsp60ECCtool/hsp60ECCtool.pl @ 0:6b2386c4cd76 draft default tip
Uploaded
| author | dcouvin |
|---|---|
| date | Mon, 08 Nov 2021 20:35:22 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:6b2386c4cd76 |
|---|---|
| 1 #!/usr/bin/perl | |
| 2 | |
| 3 use strict; | |
| 4 use warnings; | |
| 5 #use Bio::SeqIO; | |
| 6 | |
| 7 #parameters | |
| 8 my $input = $ARGV[0]; #Sequences input file | |
| 9 my $database = $ARGV[1]; #database | |
| 10 my $ident = $ARGV[2]; # percentage identity | |
| 11 my @arrayID = (); | |
| 12 my $output = $ARGV[3]; #"result.tsv"; | |
| 13 my $blastFile = $ARGV[4]; #"blastOut"; #output of BLAST | |
| 14 | |
| 15 if (-e $input and -e $database){ | |
| 16 #create database with makeblastdb | |
| 17 `makeblastdb -in $database -dbtype nucl`; | |
| 18 #blastn | |
| 19 `blastn -query $input -db $database -perc_identity $ident -task 'megablast' -evalue '0.001' -out $blastFile -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles'`; | |
| 20 } | |
| 21 | |
| 22 #my $blastFile = "$input.blastOut"; #output of BLAST | |
| 23 | |
| 24 | |
| 25 #my $seqIO = Bio::SeqIO->new(-format=>'Fasta', -file=>$input); | |
| 26 | |
| 27 #while (my $seq = $seqIO->next_seq()) { | |
| 28 #my $seqID = $seq->id; | |
| 29 #my $seqNuc = $seq->seq; | |
| 30 #push @arrayID, $seqID; | |
| 31 #$hSeq{$seqID} = $seqNuc; | |
| 32 #my @seqArray = split //, $seqNuc; | |
| 33 #} | |
| 34 | |
| 35 open (GS, $input) or die $!; | |
| 36 | |
| 37 while (my $line = <GS>) { | |
| 38 if ($line =~ />([\w\.]*)\s/){ | |
| 39 my $current_id = $1; #we retrieve the name | |
| 40 push @arrayID, $current_id; | |
| 41 } | |
| 42 } | |
| 43 | |
| 44 | |
| 45 #Print final result | |
| 46 open (OUT, '>', $output) or die $!; | |
| 47 print OUT "SequenceID\tHsp60 Cluster\tSutton GG et al. associated species and clade\tWu W et al. associated species or taxon\t% identity\tReference assembly\n"; | |
| 48 #header, Hsp60 Cluster Sutton GG et al. associated species and clade Wu W et al. associated species or taxon % identity Reference assembly | |
| 49 | |
| 50 foreach my $elem (@arrayID) { | |
| 51 | |
| 52 my $line = `grep '$elem' '$blastFile' | head -n 1`; | |
| 53 #split line | |
| 54 if($line){ | |
| 55 my @tab1 = split(/\t/, $line); | |
| 56 my $bigString = $tab1[1]; #cluster elements | |
| 57 my @tab2 = split(/__/, $bigString); | |
| 58 my $assembly = $tab2[3]; #assembly | |
| 59 $assembly =~ tr/_/\./; | |
| 60 | |
| 61 print OUT "$elem\t$tab2[0]\t$tab2[1]\t$tab2[2]\t$tab1[2]\t$assembly\n"; | |
| 62 } | |
| 63 else{ | |
| 64 print OUT "$elem\tNA\tNA\tNA\tNA\tNA\n"; | |
| 65 } | |
| 66 | |
| 67 } | |
| 68 | |
| 69 #print "First line = $line\n\n"; | |
| 70 | |
| 71 #$line = `grep '$arrayID[1]' '$blastFile' | head -n 1`; | |
| 72 #print "Second line is = $line\n"; | |
| 73 | |
| 74 | |
| 75 #unlink glob "*fa*"; | |
| 76 #unlink glob "*blast*"; | |
| 77 | |
| 78 | |
| 79 close (OUT); | |
| 80 #close(E); | |
| 81 | |
| 82 |
