changeset 0:37d48392bf22 draft default tip

Uploaded
author dcouvin
date Tue, 21 Sep 2021 16:44:26 +0000
parents
children
files catchsequence/catchsequence.pl catchsequence/catchsequence.xml
diffstat 2 files changed, 210 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/catchsequence/catchsequence.pl	Tue Sep 21 16:44:26 2021 +0000
@@ -0,0 +1,144 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+#INPUTS_
+# my $Result_RES = $ARGV[0];
+my $sequences = $ARGV[0];
+
+#OUTPUT_
+#my $output = $ARGV[1];
+
+my @list_seq = split(/,/,$sequences);
+#my @list_seq = @ARGV;
+
+my $res = 90.00;
+my $plas = 90.00;
+my $vf = 80.00;
+
+my $percentage = "ident"; # other possibility is "cov"
+my $columPerc = 10; # other possibility is 9
+
+
+
+#Other parameters
+for (my $i = 0; $i <= $#ARGV; $i++) {
+    if ($ARGV[$i]=~/-percent/i or $ARGV[$i]=~/-perc/i) {
+		$percentage = $ARGV[$i+1];
+    }
+    elsif ($ARGV[$i]=~/-res/i) {
+		$res = $ARGV[$i+1];
+    }
+    elsif ($ARGV[$i]=~/-plas/i) {
+		$plas = $ARGV[$i+1];
+    }
+    elsif ($ARGV[$i]=~/-vf/i) {
+                $vf = $ARGV[$i+1];
+    }
+}
+
+##########################################################################################
+ 
+if ($percentage eq "ident"){
+    $columPerc = 10;
+}
+elsif ($percentage eq "cov"){
+    $columPerc = 9;
+}
+
+
+#open (OUT, ">$output");
+print "Sequence\tResistance genes\tPlasmids\tVirulence genes\tST (MLST)\tAlleles (MLST)\n";
+
+foreach my $sequence (@list_seq) { 
+	my $Result_RES = `abricate --db resfinder $sequence > $sequence.RES.txt`; #appel système de la commande abricate avec la BDD ResFinder
+	my $Result_PLA = `abricate --db plasmidfinder $sequence > $sequence.PLA.txt`; #appel système de la commande abricate avec la BDD PlasmidFinder
+	my $Result_VIR = `abricate --db vfdb $sequence > $sequence.VIR.txt`;
+	my $Result_MLST = `mlst $sequence > $sequence.MLST.txt`;
+
+	open (RES, "$sequence.RES.txt");
+	print "$sequence\t";
+
+	while (<RES>) {
+
+    	chomp();
+    		if ($_ !~ m/^#/) {
+    			my @infos = split(/\t/,$_);
+    			my $geneRes = $infos[5]; # resistance gene name (ancienne valeur $infos[4])
+    			my $identity = $infos[$columPerc]; # identity % (ancienne valeur $infos[9])
+  
+    			if ($identity > $res) {
+        			print "$geneRes;";
+    			}
+    		}
+
+	}
+
+	close (RES);
+	print "\t";
+
+
+	open (PLA, "$sequence.PLA.txt") or die "could not open $!";
+
+	while (<PLA>) {
+    	chomp();
+    		if ($_ !~ m/^#/) {
+        		my @infos = split(/\t/,$_);
+        		my $plasmid = $infos[5]; # plasmid name
+        		my $identity = $infos[$columPerc]; # identity %
+ 
+        		if ($identity > $plas) {
+                		print"$plasmid;";
+        		}
+    		}
+
+	}
+	close (PLA);
+	print "\t";
+
+	open (VIR, "$sequence.VIR.txt") or die "could not open $!";
+
+	while (<VIR>) {
+    	chomp();
+    	
+		if ($_ !~ m/^#/) {
+        		my @infos = split(/\t/,$_);
+        		my $geneVir = $infos[5]; # virulence gene name
+        		my $identity = $infos[$columPerc]; # identity %
+ 
+        		if ($identity > $vf) {
+                		print "$geneVir;";
+        		}
+    		}
+
+	}
+	close (VIR);
+	print "\t";
+
+
+	open (MLST, "$sequence.MLST.txt") or die "could not open $!";
+
+        while (<MLST>) {
+        chomp();
+
+        	my @infos = split(/\t/,$_);
+                my $numMLST = $infos[2];
+            	print "$numMLST\t";
+   		
+		for (my $i=3; $i <= $#infos; $i++){
+			print "$infos[$i];";
+		}
+
+        }
+        close (MLST);
+        print "\n";
+}
+
+#close (OUT);
+
+unlink glob ('*.VIR.txt');
+unlink glob ('*.PLA.txt');
+unlink glob ('*.RES.txt');
+unlink glob ('*.MLST.txt');
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/catchsequence/catchsequence.xml	Tue Sep 21 16:44:26 2021 +0000
@@ -0,0 +1,66 @@
+<tool id="catchsequence" name="catchSequenceInfo" version="1.0.0">
+  <description>catchSequenceInfo allows to get resistance/virulence/plasmids/mlst information from DNA sequences</description>
+<requirements>
+  <requirement type="package" version="1.0.1">abricate</requirement>
+  <requirement type="package" version="2.19.0">mlst</requirement>
+  <!--<requirement type="package" version="6.6.0">emboss</requirement>
+  <requirement type="package" version="1.3.2">pandas</requirement>-->
+</requirements>
+
+ 
+<command detect_errors="aggressive"><![CDATA[ 
+
+#import re
+        ## Creates symlinks for each input file based on the Galaxy 'element_identifier'
+        ## Used so that a human-readable name appears in the output table (instead of 'dataset_xyz.dat')
+        #set $named_input_files = ''
+        #for $input_file in $input_files
+            ## Add single quotes around each input file identifier
+            #set $_input_file = "'{}'".format($input_file.element_identifier)
+            ln -s '${input_file}' ${_input_file} && 
+            #set $named_input_files = $named_input_files + ',' + $_input_file
+        #end for
+
+	
+  	perl '$__tool_directory__/catchsequence.pl' $named_input_files -perc $percent -res $res -plas $plas -vf $vf > "$output"
+	
+       
+
+]]></command>
+ <!-- perl '$__tool_directory__/nucleScore.pl' $_input_file > "$output"  -->
+ <!-- ./nuclescore.sh ${named_input_files} > "$output" -->
+
+<inputs>
+  <param type="data" name="input_files" format="fasta,fasta.gz,fasta.bz2,gbk,gbk.gz,gbk.bz2" multiple="true" label="Genome fasta files"/>
+
+  <param name="percent" type="select" label="Percentage type:">
+    <option value="ident">Identity</option>
+    <option value="cov">Coverage</option>
+  </param>
+
+  <param name="res" type="text" area="false" value="90.00" label="ResFinder %: " />
+  <param name="plas" type="text" area="false" value="90.00" label="PlasmidFinder %: " />
+  <param name="vf" type="text" area="false" value="80.00" label="VFDB %: " />
+
+</inputs>
+
+ <outputs>
+    <data format="tabular" name="output" />
+ </outputs>
+
+<help>
+catchSequenceInfo allows to get resistance/virulence/plasmids/mlst information from DNA sequences.
+
+This tool uses ABRicate (https://github.com/tseemann/abricate) and MLST (https://github.com/tseemann/mlst) programs to get dedicated information from DNA sequences (such as resistance genes, plasmid genes, virulence genes, allele IDs and MLST number). 
+
+
+</help>
+
+<citations>
+  <citation type="doi">10.1186/1471-2105-11-595</citation>
+  <citation type="doi">10.1093/jac/dks261</citation>
+  <citation type="doi">10.1128/AAC.02412-14</citation>
+  <citation type="doi">10.1093/nar/gkv1239</citation>
+</citations>
+
+</tool>