diff hsp60ECCtool/hsp60ECCtool.pl @ 0:6b2386c4cd76 draft default tip

Uploaded
author dcouvin
date Mon, 08 Nov 2021 20:35:22 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hsp60ECCtool/hsp60ECCtool.pl	Mon Nov 08 20:35:22 2021 +0000
@@ -0,0 +1,82 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+#use Bio::SeqIO;
+
+#parameters
+my $input = $ARGV[0]; #Sequences input file
+my $database = $ARGV[1]; #database
+my $ident = $ARGV[2]; # percentage identity
+my @arrayID = ();
+my $output = $ARGV[3]; #"result.tsv";
+my $blastFile = $ARGV[4]; #"blastOut"; #output of BLAST
+
+if (-e $input and -e $database){
+  #create database with makeblastdb
+  `makeblastdb -in $database -dbtype nucl`;
+  #blastn
+  `blastn -query $input -db $database -perc_identity $ident -task 'megablast' -evalue '0.001' -out $blastFile -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles'`;
+}
+		
+#my $blastFile = "$input.blastOut"; #output of BLAST
+                
+
+#my $seqIO = Bio::SeqIO->new(-format=>'Fasta', -file=>$input);
+			
+#while (my $seq = $seqIO->next_seq()) {
+  #my $seqID = $seq->id;
+  #my $seqNuc = $seq->seq;
+  #push @arrayID, $seqID;
+  #$hSeq{$seqID} = $seqNuc;
+  #my @seqArray = split //, $seqNuc;
+#}
+
+open (GS, $input) or die $!;
+
+while (my $line = <GS>) {
+  if ($line =~ />([\w\.]*)\s/){ 
+    my $current_id = $1; #we retrieve the name
+    push @arrayID, $current_id;
+  }
+}
+
+
+#Print final result
+open (OUT, '>', $output) or die $!;
+print OUT "SequenceID\tHsp60 Cluster\tSutton GG et al. associated species and clade\tWu W et al. associated species or taxon\t% identity\tReference assembly\n"; 
+#header, Hsp60 Cluster	Sutton GG et al. associated species and clade	Wu W et al. associated species or taxon	%      identity	Reference assembly
+
+foreach my $elem (@arrayID) {
+
+  my $line = `grep '$elem' '$blastFile' | head -n 1`;
+  #split line
+  if($line){
+    my @tab1 = split(/\t/, $line);
+    my $bigString = $tab1[1]; #cluster elements
+    my @tab2 = split(/__/, $bigString);
+    my $assembly = $tab2[3]; #assembly
+    $assembly =~ tr/_/\./;
+  
+    print OUT "$elem\t$tab2[0]\t$tab2[1]\t$tab2[2]\t$tab1[2]\t$assembly\n";
+  }
+  else{
+    print OUT "$elem\tNA\tNA\tNA\tNA\tNA\n";
+  }
+
+}
+
+#print "First line = $line\n\n";
+
+#$line = `grep '$arrayID[1]' '$blastFile' | head -n 1`;
+#print "Second line is  = $line\n";
+
+
+#unlink glob "*fa*";
+#unlink glob "*blast*";
+
+ 
+close (OUT);
+#close(E);
+
+