view hsp60ECCtool/hsp60ECCtool.pl @ 0:6b2386c4cd76 draft default tip

Uploaded
author dcouvin
date Mon, 08 Nov 2021 20:35:22 +0000
parents
children
line wrap: on
line source

#!/usr/bin/perl

use strict;
use warnings;
#use Bio::SeqIO;

#parameters
my $input = $ARGV[0]; #Sequences input file
my $database = $ARGV[1]; #database
my $ident = $ARGV[2]; # percentage identity
my @arrayID = ();
my $output = $ARGV[3]; #"result.tsv";
my $blastFile = $ARGV[4]; #"blastOut"; #output of BLAST

if (-e $input and -e $database){
  #create database with makeblastdb
  `makeblastdb -in $database -dbtype nucl`;
  #blastn
  `blastn -query $input -db $database -perc_identity $ident -task 'megablast' -evalue '0.001' -out $blastFile -outfmt '6 std sallseqid score nident positive gaps ppos qframe sframe qseq sseq qlen slen salltitles'`;
}
		
#my $blastFile = "$input.blastOut"; #output of BLAST
                

#my $seqIO = Bio::SeqIO->new(-format=>'Fasta', -file=>$input);
			
#while (my $seq = $seqIO->next_seq()) {
  #my $seqID = $seq->id;
  #my $seqNuc = $seq->seq;
  #push @arrayID, $seqID;
  #$hSeq{$seqID} = $seqNuc;
  #my @seqArray = split //, $seqNuc;
#}

open (GS, $input) or die $!;

while (my $line = <GS>) {
  if ($line =~ />([\w\.]*)\s/){ 
    my $current_id = $1; #we retrieve the name
    push @arrayID, $current_id;
  }
}


#Print final result
open (OUT, '>', $output) or die $!;
print OUT "SequenceID\tHsp60 Cluster\tSutton GG et al. associated species and clade\tWu W et al. associated species or taxon\t% identity\tReference assembly\n"; 
#header, Hsp60 Cluster	Sutton GG et al. associated species and clade	Wu W et al. associated species or taxon	%      identity	Reference assembly

foreach my $elem (@arrayID) {

  my $line = `grep '$elem' '$blastFile' | head -n 1`;
  #split line
  if($line){
    my @tab1 = split(/\t/, $line);
    my $bigString = $tab1[1]; #cluster elements
    my @tab2 = split(/__/, $bigString);
    my $assembly = $tab2[3]; #assembly
    $assembly =~ tr/_/\./;
  
    print OUT "$elem\t$tab2[0]\t$tab2[1]\t$tab2[2]\t$tab1[2]\t$assembly\n";
  }
  else{
    print OUT "$elem\tNA\tNA\tNA\tNA\tNA\n";
  }

}

#print "First line = $line\n\n";

#$line = `grep '$arrayID[1]' '$blastFile' | head -n 1`;
#print "Second line is  = $line\n";


#unlink glob "*fa*";
#unlink glob "*blast*";

 
close (OUT);
#close(E);