diff getSequenceInfo.xml @ 0:19ae17458c14 draft default tip

Uploaded
author dcouvin
date Wed, 15 Sep 2021 21:35:09 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/getSequenceInfo.xml	Wed Sep 15 21:35:09 2021 +0000
@@ -0,0 +1,136 @@
+<tool id="getseqinfo" name="getSequenceInfo tool" version="1.0.1">
+  <description>allows to extract sequence data and specific information from GenBank </description>
+
+<requirements>
+  <requirement type="package" version="1.7.2">perl-bioperl</requirement>
+  <requirement type="package" version="2.32">perl-archive-tar</requirement>
+  <requirement type="package" version="0.45">perl-file-copy-recursive</requirement>
+  <requirement type="package" version="2.16">perl-file-path</requirement>
+  <requirement type="package" version="2.79">perl-net-ftp</requirement>
+  <requirement type="package" version="2.064">perl-io-uncompress-gunzip</requirement>
+  <requirement type="package" version="6.15">perl-lwp-simple</requirement>
+  <requirement type="package" version="1.38_03">perl-posix</requirement>
+</requirements>
+
+<!--<command interpreter="bash">
+    ./getSequenceInfo.sh genbank $availableKingdom $assemblylevel $species $quantity $output1
+   perl $directory/getSequenceInfo.pl -directory $dir -k $availableKingdoms -l $assemblylevel -s $species -n $quantity -o $output
+
+ mv results/folder/result/summary.html $summary_html &&
+       mv results/folder/result/summary.xls $summary_tsv &&
+
+       (mv results/folder/result/Assemby/*.fna ./assembly/ 2> /dev/null || true) &&
+
+       (mv results/folder/result/GenBank/*.gbk ./genbanks/ 2> /dev/null || true)
+
+ var1="genbank_";var2=$availableKingdoms_assembly_summary.txt"
+
+if [ -f "./results/result/${keyword}_folder/${keyword}.fasta" ]; then
+         mv ./results/result/${keyword}_folder/${keyword}.fasta ./assembly/${keyword}.fna;
+       fi 
+
+
+  </command>-->
+
+ <command detect_errors="aggressive"><![CDATA[
+       perl $__tool_directory__/getSequenceInfo.pl -k "$availableKingdoms" 
+       #if $search == "1"
+          -taxid "$species"
+       #else if $search == "0"
+          -s "$species"
+       #end if
+       -level "$assemblyLevel"
+       #if $component == "" and $keyword != ''
+       -c "$keyword"
+       #else if $component != "" and $keyword == ""
+       -c "$component"
+       #else if $component != "" and $keyword != ""
+       -c "${component},${keyword}"
+       #end if    
+       -n $quantity -date "$date" 
+       -path "${__tool_directory__}/genbank_${availableKingdoms}_assembly_summary.txt" -o ./results &&  
+       mkdir ./assembly/ &&
+       mkdir ./genbanks/ &&
+       mkdir ./reports/ && 
+       mv ./results/result/summary.xls $summary_tsv &&           
+       mv ./results/result/GenBank/*.gbff ./genbanks/ &&
+       mv ./results/result/Assembly/*.fna ./assembly/ &&
+       mv ./results/result/Report/*.txt ./reports/ &&
+       if [ -f "./results/result/${component}_folder/${component}.fasta" ]; then
+         mv ./results/result/${component}_folder/${component}.fasta ./assembly/${component}.fna;
+       fi &&
+       if [ -f "./results/result/${keyword}_folder/${keyword}.fasta" ]; then
+         mv ./results/result/${keyword}_folder/${keyword}.fasta ./assembly/${keyword}.fna;
+       fi 
+       && rm -r ./results/
+      
+]]></command>
+
+
+<inputs>
+
+<param name="availableKingdoms" type="select" label="Select the kingdom:">
+  <option value="bacteria">bacteria</option>
+  <option value="protozoa">protozoa</option>
+  <option value="viral">viral</option>
+</param>
+
+<param name="search" type="select" display="radio" label="Choose between Species (organism's name) or NCBI Taxonomy ID (TaxID)">
+            <option value="0">Species</option>
+            <option value="1">TaxID</option>
+</param>
+<param name="species" type="text" area="false" label="Species or NCBI TaxID (eg. Escherichia coli):"/>
+
+<param name="assemblyLevel" type="select" label="Assembly level:">
+  <option value="Complete Genome">Complete Genome</option>
+  <option value="Complete Genome,Chromosome,Scaffold,Contig">All</option>
+</param>
+
+<param name="component" type="select" label="Sequence component:">
+  <option value=""></option>
+  <option value="plasmid">Plasmid</option>
+  <option value="chromosome">Chromosome</option>
+  <!--<option value="keyword"><param name="keyword" type="text" area="false" label="OR use a keyword:"/></option>-->
+</param>
+<param name="keyword" type="text" area="false" label="OR use a keyword (sequences having a description containing this keyword will be dowloaded in a separate file):"/>
+
+<param name="quantity" type="text" area="false" value="3" label="Number of assemblies:"/>
+<param name="date" type="text" value="0000-00-00" area="false" label="From Release date (yyyy-mm-dd):"/>
+
+
+</inputs>
+
+<outputs>
+  <collection name="output" type="list" label="${tool.name}: GenBank files">
+            <discover_datasets pattern="(?P&lt;name&gt;.+)\.gbff$" directory="./genbanks" format="genbank"/>
+  </collection>
+  <collection name="outputreport" type="list" label="${tool.name}: Report files">
+            <discover_datasets pattern="(?P&lt;name&gt;.+)\.txt$" directory="./reports" format="txt"/>
+  </collection>
+  <collection name="outputfasta" type="list" label="${tool.name}: FASTA files">
+            <discover_datasets pattern="(?P&lt;name&gt;.+)\.fna$" directory="./assembly" format="fasta"/>
+  </collection>
+  
+  <!--<collection name="assemblies" type="list" label="${tool.name} on ${input.element_identifier}: Assembly">
+      <discover_datasets pattern="__name_and_ext__" directory="assembly" />
+  </collection>
+  <collection name="genbanks" type="list" label="${tool.name} on ${input.element_identifier}: GenBank">
+      <discover_datasets pattern="__name_and_ext__" directory="./genbanks" />
+  </collection>-->
+  <!--<data format="html" name="summary_html" label="summary.html"/>-->
+  <data format="tabular" name="summary_tsv" label="${tool.name}: summary.tsv"/>
+</outputs>
+
+
+<help><![CDATA[
+  This tool allows to download sequences in FASTA or GenBank format, and retrieve specific information (such as country, pubmed ID, host, ...).
+  
+  Please note that NCBI assembly_summary files used to download the sequences have been uploaded on September 6th, 2021.
+
+  To get more recent assembly_summary files, please use the command line or the GUI version of the tool.
+
+  GitHub: https://github.com/karubiotools/getSequenceInfo  
+]]></help>
+
+</tool>
+