annotate hsp60ECCtool/hsp60ECCtool.pl @ 0:6b2386c4cd76 draft default tip

Uploaded
author dcouvin
date Mon, 08 Nov 2021 20:35:22 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
1 #!/usr/bin/perl
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
2
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
3 use strict;
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
4 use warnings;
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
5 #use Bio::SeqIO;
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
6
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
7 #parameters
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
8 my $input = $ARGV[0]; #Sequences input file
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
9 my $database = $ARGV[1]; #database
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
10 my $ident = $ARGV[2]; # percentage identity
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
11 my @arrayID = ();
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
12 my $output = $ARGV[3]; #"result.tsv";
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
13 my $blastFile = $ARGV[4]; #"blastOut"; #output of BLAST
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
14
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
15 if (-e $input and -e $database){
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
16 #create database with makeblastdb
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
17 `makeblastdb -in $database -dbtype nucl`;
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
18 #blastn
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
19 `blastn -query $input -db $database -perc_identity $ident -task 'megablast' -evalue '0.001' -out $blastFile -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles'`;
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
20 }
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
21
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
22 #my $blastFile = "$input.blastOut"; #output of BLAST
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
23
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
24
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
25 #my $seqIO = Bio::SeqIO->new(-format=>'Fasta', -file=>$input);
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
26
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
27 #while (my $seq = $seqIO->next_seq()) {
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
28 #my $seqID = $seq->id;
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
29 #my $seqNuc = $seq->seq;
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
30 #push @arrayID, $seqID;
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
31 #$hSeq{$seqID} = $seqNuc;
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
32 #my @seqArray = split //, $seqNuc;
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
33 #}
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
34
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
35 open (GS, $input) or die $!;
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
36
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
37 while (my $line = <GS>) {
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
38 if ($line =~ />([\w\.]*)\s/){
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
39 my $current_id = $1; #we retrieve the name
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
40 push @arrayID, $current_id;
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
41 }
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
42 }
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
43
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
44
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
45 #Print final result
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
46 open (OUT, '>', $output) or die $!;
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
47 print OUT "SequenceID\tHsp60 Cluster\tSutton GG et al. associated species and clade\tWu W et al. associated species or taxon\t% identity\tReference assembly\n";
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
48 #header, Hsp60 Cluster Sutton GG et al. associated species and clade Wu W et al. associated species or taxon % identity Reference assembly
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
49
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
50 foreach my $elem (@arrayID) {
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
51
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
52 my $line = `grep '$elem' '$blastFile' | head -n 1`;
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
53 #split line
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
54 if($line){
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
55 my @tab1 = split(/\t/, $line);
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
56 my $bigString = $tab1[1]; #cluster elements
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
57 my @tab2 = split(/__/, $bigString);
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
58 my $assembly = $tab2[3]; #assembly
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
59 $assembly =~ tr/_/\./;
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
60
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
61 print OUT "$elem\t$tab2[0]\t$tab2[1]\t$tab2[2]\t$tab1[2]\t$assembly\n";
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
62 }
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
63 else{
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
64 print OUT "$elem\tNA\tNA\tNA\tNA\tNA\n";
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
65 }
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
66
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
67 }
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
68
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
69 #print "First line = $line\n\n";
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
70
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
71 #$line = `grep '$arrayID[1]' '$blastFile' | head -n 1`;
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
72 #print "Second line is = $line\n";
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
73
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
74
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
75 #unlink glob "*fa*";
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
76 #unlink glob "*blast*";
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
77
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
78
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
79 close (OUT);
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
80 #close(E);
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
81
6b2386c4cd76 Uploaded
dcouvin
parents:
diff changeset
82