0
|
1 #!/usr/bin/perl
|
|
2
|
|
3 use strict;
|
|
4 use warnings;
|
|
5 #use Bio::SeqIO;
|
|
6
|
|
7 #parameters
|
|
8 my $input = $ARGV[0]; #Sequences input file
|
|
9 my $database = $ARGV[1]; #database
|
|
10 my $ident = $ARGV[2]; # percentage identity
|
|
11 my @arrayID = ();
|
|
12 my $output = $ARGV[3]; #"result.tsv";
|
|
13 my $blastFile = $ARGV[4]; #"blastOut"; #output of BLAST
|
|
14
|
|
15 if (-e $input and -e $database){
|
|
16 #create database with makeblastdb
|
|
17 `makeblastdb -in $database -dbtype nucl`;
|
|
18 #blastn
|
|
19 `blastn -query $input -db $database -perc_identity $ident -task 'megablast' -evalue '0.001' -out $blastFile -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles'`;
|
|
20 }
|
|
21
|
|
22 #my $blastFile = "$input.blastOut"; #output of BLAST
|
|
23
|
|
24
|
|
25 #my $seqIO = Bio::SeqIO->new(-format=>'Fasta', -file=>$input);
|
|
26
|
|
27 #while (my $seq = $seqIO->next_seq()) {
|
|
28 #my $seqID = $seq->id;
|
|
29 #my $seqNuc = $seq->seq;
|
|
30 #push @arrayID, $seqID;
|
|
31 #$hSeq{$seqID} = $seqNuc;
|
|
32 #my @seqArray = split //, $seqNuc;
|
|
33 #}
|
|
34
|
|
35 open (GS, $input) or die $!;
|
|
36
|
|
37 while (my $line = <GS>) {
|
|
38 if ($line =~ />([\w\.]*)\s/){
|
|
39 my $current_id = $1; #we retrieve the name
|
|
40 push @arrayID, $current_id;
|
|
41 }
|
|
42 }
|
|
43
|
|
44
|
|
45 #Print final result
|
|
46 open (OUT, '>', $output) or die $!;
|
|
47 print OUT "SequenceID\tHsp60 Cluster\tSutton GG et al. associated species and clade\tWu W et al. associated species or taxon\t% identity\tReference assembly\n";
|
|
48 #header, Hsp60 Cluster Sutton GG et al. associated species and clade Wu W et al. associated species or taxon % identity Reference assembly
|
|
49
|
|
50 foreach my $elem (@arrayID) {
|
|
51
|
|
52 my $line = `grep '$elem' '$blastFile' | head -n 1`;
|
|
53 #split line
|
|
54 if($line){
|
|
55 my @tab1 = split(/\t/, $line);
|
|
56 my $bigString = $tab1[1]; #cluster elements
|
|
57 my @tab2 = split(/__/, $bigString);
|
|
58 my $assembly = $tab2[3]; #assembly
|
|
59 $assembly =~ tr/_/\./;
|
|
60
|
|
61 print OUT "$elem\t$tab2[0]\t$tab2[1]\t$tab2[2]\t$tab1[2]\t$assembly\n";
|
|
62 }
|
|
63 else{
|
|
64 print OUT "$elem\tNA\tNA\tNA\tNA\tNA\n";
|
|
65 }
|
|
66
|
|
67 }
|
|
68
|
|
69 #print "First line = $line\n\n";
|
|
70
|
|
71 #$line = `grep '$arrayID[1]' '$blastFile' | head -n 1`;
|
|
72 #print "Second line is = $line\n";
|
|
73
|
|
74
|
|
75 #unlink glob "*fa*";
|
|
76 #unlink glob "*blast*";
|
|
77
|
|
78
|
|
79 close (OUT);
|
|
80 #close(E);
|
|
81
|
|
82
|