Mercurial > repos > dcouvin > catchsequenceinfo
changeset 0:37d48392bf22 draft default tip
Uploaded
author | dcouvin |
---|---|
date | Tue, 21 Sep 2021 16:44:26 +0000 |
parents | |
children | |
files | catchsequence/catchsequence.pl catchsequence/catchsequence.xml |
diffstat | 2 files changed, 210 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/catchsequence/catchsequence.pl Tue Sep 21 16:44:26 2021 +0000 @@ -0,0 +1,144 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +#INPUTS_ +# my $Result_RES = $ARGV[0]; +my $sequences = $ARGV[0]; + +#OUTPUT_ +#my $output = $ARGV[1]; + +my @list_seq = split(/,/,$sequences); +#my @list_seq = @ARGV; + +my $res = 90.00; +my $plas = 90.00; +my $vf = 80.00; + +my $percentage = "ident"; # other possibility is "cov" +my $columPerc = 10; # other possibility is 9 + + + +#Other parameters +for (my $i = 0; $i <= $#ARGV; $i++) { + if ($ARGV[$i]=~/-percent/i or $ARGV[$i]=~/-perc/i) { + $percentage = $ARGV[$i+1]; + } + elsif ($ARGV[$i]=~/-res/i) { + $res = $ARGV[$i+1]; + } + elsif ($ARGV[$i]=~/-plas/i) { + $plas = $ARGV[$i+1]; + } + elsif ($ARGV[$i]=~/-vf/i) { + $vf = $ARGV[$i+1]; + } +} + +########################################################################################## + +if ($percentage eq "ident"){ + $columPerc = 10; +} +elsif ($percentage eq "cov"){ + $columPerc = 9; +} + + +#open (OUT, ">$output"); +print "Sequence\tResistance genes\tPlasmids\tVirulence genes\tST (MLST)\tAlleles (MLST)\n"; + +foreach my $sequence (@list_seq) { + my $Result_RES = `abricate --db resfinder $sequence > $sequence.RES.txt`; #appel système de la commande abricate avec la BDD ResFinder + my $Result_PLA = `abricate --db plasmidfinder $sequence > $sequence.PLA.txt`; #appel système de la commande abricate avec la BDD PlasmidFinder + my $Result_VIR = `abricate --db vfdb $sequence > $sequence.VIR.txt`; + my $Result_MLST = `mlst $sequence > $sequence.MLST.txt`; + + open (RES, "$sequence.RES.txt"); + print "$sequence\t"; + + while (<RES>) { + + chomp(); + if ($_ !~ m/^#/) { + my @infos = split(/\t/,$_); + my $geneRes = $infos[5]; # resistance gene name (ancienne valeur $infos[4]) + my $identity = $infos[$columPerc]; # identity % (ancienne valeur $infos[9]) + + if ($identity > $res) { + print "$geneRes;"; + } + } + + } + + close (RES); + print "\t"; + + + open (PLA, "$sequence.PLA.txt") or die "could not open $!"; + + while (<PLA>) { + chomp(); + if ($_ !~ m/^#/) { + my @infos = split(/\t/,$_); + my $plasmid = $infos[5]; # plasmid name + my $identity = $infos[$columPerc]; # identity % + + if ($identity > $plas) { + print"$plasmid;"; + } + } + + } + close (PLA); + print "\t"; + + open (VIR, "$sequence.VIR.txt") or die "could not open $!"; + + while (<VIR>) { + chomp(); + + if ($_ !~ m/^#/) { + my @infos = split(/\t/,$_); + my $geneVir = $infos[5]; # virulence gene name + my $identity = $infos[$columPerc]; # identity % + + if ($identity > $vf) { + print "$geneVir;"; + } + } + + } + close (VIR); + print "\t"; + + + open (MLST, "$sequence.MLST.txt") or die "could not open $!"; + + while (<MLST>) { + chomp(); + + my @infos = split(/\t/,$_); + my $numMLST = $infos[2]; + print "$numMLST\t"; + + for (my $i=3; $i <= $#infos; $i++){ + print "$infos[$i];"; + } + + } + close (MLST); + print "\n"; +} + +#close (OUT); + +unlink glob ('*.VIR.txt'); +unlink glob ('*.PLA.txt'); +unlink glob ('*.RES.txt'); +unlink glob ('*.MLST.txt'); +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/catchsequence/catchsequence.xml Tue Sep 21 16:44:26 2021 +0000 @@ -0,0 +1,66 @@ +<tool id="catchsequence" name="catchSequenceInfo" version="1.0.0"> + <description>catchSequenceInfo allows to get resistance/virulence/plasmids/mlst information from DNA sequences</description> +<requirements> + <requirement type="package" version="1.0.1">abricate</requirement> + <requirement type="package" version="2.19.0">mlst</requirement> + <!--<requirement type="package" version="6.6.0">emboss</requirement> + <requirement type="package" version="1.3.2">pandas</requirement>--> +</requirements> + + +<command detect_errors="aggressive"><![CDATA[ + +#import re + ## Creates symlinks for each input file based on the Galaxy 'element_identifier' + ## Used so that a human-readable name appears in the output table (instead of 'dataset_xyz.dat') + #set $named_input_files = '' + #for $input_file in $input_files + ## Add single quotes around each input file identifier + #set $_input_file = "'{}'".format($input_file.element_identifier) + ln -s '${input_file}' ${_input_file} && + #set $named_input_files = $named_input_files + ',' + $_input_file + #end for + + + perl '$__tool_directory__/catchsequence.pl' $named_input_files -perc $percent -res $res -plas $plas -vf $vf > "$output" + + + +]]></command> + <!-- perl '$__tool_directory__/nucleScore.pl' $_input_file > "$output" --> + <!-- ./nuclescore.sh ${named_input_files} > "$output" --> + +<inputs> + <param type="data" name="input_files" format="fasta,fasta.gz,fasta.bz2,gbk,gbk.gz,gbk.bz2" multiple="true" label="Genome fasta files"/> + + <param name="percent" type="select" label="Percentage type:"> + <option value="ident">Identity</option> + <option value="cov">Coverage</option> + </param> + + <param name="res" type="text" area="false" value="90.00" label="ResFinder %: " /> + <param name="plas" type="text" area="false" value="90.00" label="PlasmidFinder %: " /> + <param name="vf" type="text" area="false" value="80.00" label="VFDB %: " /> + +</inputs> + + <outputs> + <data format="tabular" name="output" /> + </outputs> + +<help> +catchSequenceInfo allows to get resistance/virulence/plasmids/mlst information from DNA sequences. + +This tool uses ABRicate (https://github.com/tseemann/abricate) and MLST (https://github.com/tseemann/mlst) programs to get dedicated information from DNA sequences (such as resistance genes, plasmid genes, virulence genes, allele IDs and MLST number). + + +</help> + +<citations> + <citation type="doi">10.1186/1471-2105-11-595</citation> + <citation type="doi">10.1093/jac/dks261</citation> + <citation type="doi">10.1128/AAC.02412-14</citation> + <citation type="doi">10.1093/nar/gkv1239</citation> +</citations> + +</tool>