diff catchsequence/catchsequence.pl @ 0:37d48392bf22 draft default tip

Uploaded
author dcouvin
date Tue, 21 Sep 2021 16:44:26 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/catchsequence/catchsequence.pl	Tue Sep 21 16:44:26 2021 +0000
@@ -0,0 +1,144 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+#INPUTS_
+# my $Result_RES = $ARGV[0];
+my $sequences = $ARGV[0];
+
+#OUTPUT_
+#my $output = $ARGV[1];
+
+my @list_seq = split(/,/,$sequences);
+#my @list_seq = @ARGV;
+
+my $res = 90.00;
+my $plas = 90.00;
+my $vf = 80.00;
+
+my $percentage = "ident"; # other possibility is "cov"
+my $columPerc = 10; # other possibility is 9
+
+
+
+#Other parameters
+for (my $i = 0; $i <= $#ARGV; $i++) {
+    if ($ARGV[$i]=~/-percent/i or $ARGV[$i]=~/-perc/i) {
+		$percentage = $ARGV[$i+1];
+    }
+    elsif ($ARGV[$i]=~/-res/i) {
+		$res = $ARGV[$i+1];
+    }
+    elsif ($ARGV[$i]=~/-plas/i) {
+		$plas = $ARGV[$i+1];
+    }
+    elsif ($ARGV[$i]=~/-vf/i) {
+                $vf = $ARGV[$i+1];
+    }
+}
+
+##########################################################################################
+ 
+if ($percentage eq "ident"){
+    $columPerc = 10;
+}
+elsif ($percentage eq "cov"){
+    $columPerc = 9;
+}
+
+
+#open (OUT, ">$output");
+print "Sequence\tResistance genes\tPlasmids\tVirulence genes\tST (MLST)\tAlleles (MLST)\n";
+
+foreach my $sequence (@list_seq) { 
+	my $Result_RES = `abricate --db resfinder $sequence > $sequence.RES.txt`; #appel système de la commande abricate avec la BDD ResFinder
+	my $Result_PLA = `abricate --db plasmidfinder $sequence > $sequence.PLA.txt`; #appel système de la commande abricate avec la BDD PlasmidFinder
+	my $Result_VIR = `abricate --db vfdb $sequence > $sequence.VIR.txt`;
+	my $Result_MLST = `mlst $sequence > $sequence.MLST.txt`;
+
+	open (RES, "$sequence.RES.txt");
+	print "$sequence\t";
+
+	while (<RES>) {
+
+    	chomp();
+    		if ($_ !~ m/^#/) {
+    			my @infos = split(/\t/,$_);
+    			my $geneRes = $infos[5]; # resistance gene name (ancienne valeur $infos[4])
+    			my $identity = $infos[$columPerc]; # identity % (ancienne valeur $infos[9])
+  
+    			if ($identity > $res) {
+        			print "$geneRes;";
+    			}
+    		}
+
+	}
+
+	close (RES);
+	print "\t";
+
+
+	open (PLA, "$sequence.PLA.txt") or die "could not open $!";
+
+	while (<PLA>) {
+    	chomp();
+    		if ($_ !~ m/^#/) {
+        		my @infos = split(/\t/,$_);
+        		my $plasmid = $infos[5]; # plasmid name
+        		my $identity = $infos[$columPerc]; # identity %
+ 
+        		if ($identity > $plas) {
+                		print"$plasmid;";
+        		}
+    		}
+
+	}
+	close (PLA);
+	print "\t";
+
+	open (VIR, "$sequence.VIR.txt") or die "could not open $!";
+
+	while (<VIR>) {
+    	chomp();
+    	
+		if ($_ !~ m/^#/) {
+        		my @infos = split(/\t/,$_);
+        		my $geneVir = $infos[5]; # virulence gene name
+        		my $identity = $infos[$columPerc]; # identity %
+ 
+        		if ($identity > $vf) {
+                		print "$geneVir;";
+        		}
+    		}
+
+	}
+	close (VIR);
+	print "\t";
+
+
+	open (MLST, "$sequence.MLST.txt") or die "could not open $!";
+
+        while (<MLST>) {
+        chomp();
+
+        	my @infos = split(/\t/,$_);
+                my $numMLST = $infos[2];
+            	print "$numMLST\t";
+   		
+		for (my $i=3; $i <= $#infos; $i++){
+			print "$infos[$i];";
+		}
+
+        }
+        close (MLST);
+        print "\n";
+}
+
+#close (OUT);
+
+unlink glob ('*.VIR.txt');
+unlink glob ('*.PLA.txt');
+unlink glob ('*.RES.txt');
+unlink glob ('*.MLST.txt');
+