comparison hsp60ECCtool/hsp60ECCtool.pl @ 0:6b2386c4cd76 draft default tip

Uploaded
author dcouvin
date Mon, 08 Nov 2021 20:35:22 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:6b2386c4cd76
1 #!/usr/bin/perl
2
3 use strict;
4 use warnings;
5 #use Bio::SeqIO;
6
7 #parameters
8 my $input = $ARGV[0]; #Sequences input file
9 my $database = $ARGV[1]; #database
10 my $ident = $ARGV[2]; # percentage identity
11 my @arrayID = ();
12 my $output = $ARGV[3]; #"result.tsv";
13 my $blastFile = $ARGV[4]; #"blastOut"; #output of BLAST
14
15 if (-e $input and -e $database){
16 #create database with makeblastdb
17 `makeblastdb -in $database -dbtype nucl`;
18 #blastn
19 `blastn -query $input -db $database -perc_identity $ident -task 'megablast' -evalue '0.001' -out $blastFile -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles'`;
20 }
21
22 #my $blastFile = "$input.blastOut"; #output of BLAST
23
24
25 #my $seqIO = Bio::SeqIO->new(-format=>'Fasta', -file=>$input);
26
27 #while (my $seq = $seqIO->next_seq()) {
28 #my $seqID = $seq->id;
29 #my $seqNuc = $seq->seq;
30 #push @arrayID, $seqID;
31 #$hSeq{$seqID} = $seqNuc;
32 #my @seqArray = split //, $seqNuc;
33 #}
34
35 open (GS, $input) or die $!;
36
37 while (my $line = <GS>) {
38 if ($line =~ />([\w\.]*)\s/){
39 my $current_id = $1; #we retrieve the name
40 push @arrayID, $current_id;
41 }
42 }
43
44
45 #Print final result
46 open (OUT, '>', $output) or die $!;
47 print OUT "SequenceID\tHsp60 Cluster\tSutton GG et al. associated species and clade\tWu W et al. associated species or taxon\t% identity\tReference assembly\n";
48 #header, Hsp60 Cluster Sutton GG et al. associated species and clade Wu W et al. associated species or taxon % identity Reference assembly
49
50 foreach my $elem (@arrayID) {
51
52 my $line = `grep '$elem' '$blastFile' | head -n 1`;
53 #split line
54 if($line){
55 my @tab1 = split(/\t/, $line);
56 my $bigString = $tab1[1]; #cluster elements
57 my @tab2 = split(/__/, $bigString);
58 my $assembly = $tab2[3]; #assembly
59 $assembly =~ tr/_/\./;
60
61 print OUT "$elem\t$tab2[0]\t$tab2[1]\t$tab2[2]\t$tab1[2]\t$assembly\n";
62 }
63 else{
64 print OUT "$elem\tNA\tNA\tNA\tNA\tNA\n";
65 }
66
67 }
68
69 #print "First line = $line\n\n";
70
71 #$line = `grep '$arrayID[1]' '$blastFile' | head -n 1`;
72 #print "Second line is = $line\n";
73
74
75 #unlink glob "*fa*";
76 #unlink glob "*blast*";
77
78
79 close (OUT);
80 #close(E);
81
82