view getSequenceInfo.xml @ 0:19ae17458c14 draft default tip

Uploaded
author dcouvin
date Wed, 15 Sep 2021 21:35:09 +0000
parents
children
line wrap: on
line source

<tool id="getseqinfo" name="getSequenceInfo tool" version="1.0.1">
  <description>allows to extract sequence data and specific information from GenBank </description>

<requirements>
  <requirement type="package" version="1.7.2">perl-bioperl</requirement>
  <requirement type="package" version="2.32">perl-archive-tar</requirement>
  <requirement type="package" version="0.45">perl-file-copy-recursive</requirement>
  <requirement type="package" version="2.16">perl-file-path</requirement>
  <requirement type="package" version="2.79">perl-net-ftp</requirement>
  <requirement type="package" version="2.064">perl-io-uncompress-gunzip</requirement>
  <requirement type="package" version="6.15">perl-lwp-simple</requirement>
  <requirement type="package" version="1.38_03">perl-posix</requirement>
</requirements>

<!--<command interpreter="bash">
    ./getSequenceInfo.sh genbank $availableKingdom $assemblylevel $species $quantity $output1
   perl $directory/getSequenceInfo.pl -directory $dir -k $availableKingdoms -l $assemblylevel -s $species -n $quantity -o $output

 mv results/folder/result/summary.html $summary_html &&
       mv results/folder/result/summary.xls $summary_tsv &&

       (mv results/folder/result/Assemby/*.fna ./assembly/ 2> /dev/null || true) &&

       (mv results/folder/result/GenBank/*.gbk ./genbanks/ 2> /dev/null || true)

 var1="genbank_";var2=$availableKingdoms_assembly_summary.txt"

if [ -f "./results/result/${keyword}_folder/${keyword}.fasta" ]; then
         mv ./results/result/${keyword}_folder/${keyword}.fasta ./assembly/${keyword}.fna;
       fi 


  </command>-->

 <command detect_errors="aggressive"><![CDATA[
       perl $__tool_directory__/getSequenceInfo.pl -k "$availableKingdoms" 
       #if $search == "1"
          -taxid "$species"
       #else if $search == "0"
          -s "$species"
       #end if
       -level "$assemblyLevel"
       #if $component == "" and $keyword != ''
       -c "$keyword"
       #else if $component != "" and $keyword == ""
       -c "$component"
       #else if $component != "" and $keyword != ""
       -c "${component},${keyword}"
       #end if    
       -n $quantity -date "$date" 
       -path "${__tool_directory__}/genbank_${availableKingdoms}_assembly_summary.txt" -o ./results &&  
       mkdir ./assembly/ &&
       mkdir ./genbanks/ &&
       mkdir ./reports/ && 
       mv ./results/result/summary.xls $summary_tsv &&           
       mv ./results/result/GenBank/*.gbff ./genbanks/ &&
       mv ./results/result/Assembly/*.fna ./assembly/ &&
       mv ./results/result/Report/*.txt ./reports/ &&
       if [ -f "./results/result/${component}_folder/${component}.fasta" ]; then
         mv ./results/result/${component}_folder/${component}.fasta ./assembly/${component}.fna;
       fi &&
       if [ -f "./results/result/${keyword}_folder/${keyword}.fasta" ]; then
         mv ./results/result/${keyword}_folder/${keyword}.fasta ./assembly/${keyword}.fna;
       fi 
       && rm -r ./results/
      
]]></command>


<inputs>

<param name="availableKingdoms" type="select" label="Select the kingdom:">
  <option value="bacteria">bacteria</option>
  <option value="protozoa">protozoa</option>
  <option value="viral">viral</option>
</param>

<param name="search" type="select" display="radio" label="Choose between Species (organism's name) or NCBI Taxonomy ID (TaxID)">
            <option value="0">Species</option>
            <option value="1">TaxID</option>
</param>
<param name="species" type="text" area="false" label="Species or NCBI TaxID (eg. Escherichia coli):"/>

<param name="assemblyLevel" type="select" label="Assembly level:">
  <option value="Complete Genome">Complete Genome</option>
  <option value="Complete Genome,Chromosome,Scaffold,Contig">All</option>
</param>

<param name="component" type="select" label="Sequence component:">
  <option value=""></option>
  <option value="plasmid">Plasmid</option>
  <option value="chromosome">Chromosome</option>
  <!--<option value="keyword"><param name="keyword" type="text" area="false" label="OR use a keyword:"/></option>-->
</param>
<param name="keyword" type="text" area="false" label="OR use a keyword (sequences having a description containing this keyword will be dowloaded in a separate file):"/>

<param name="quantity" type="text" area="false" value="3" label="Number of assemblies:"/>
<param name="date" type="text" value="0000-00-00" area="false" label="From Release date (yyyy-mm-dd):"/>


</inputs>

<outputs>
  <collection name="output" type="list" label="${tool.name}: GenBank files">
            <discover_datasets pattern="(?P&lt;name&gt;.+)\.gbff$" directory="./genbanks" format="genbank"/>
  </collection>
  <collection name="outputreport" type="list" label="${tool.name}: Report files">
            <discover_datasets pattern="(?P&lt;name&gt;.+)\.txt$" directory="./reports" format="txt"/>
  </collection>
  <collection name="outputfasta" type="list" label="${tool.name}: FASTA files">
            <discover_datasets pattern="(?P&lt;name&gt;.+)\.fna$" directory="./assembly" format="fasta"/>
  </collection>
  
  <!--<collection name="assemblies" type="list" label="${tool.name} on ${input.element_identifier}: Assembly">
      <discover_datasets pattern="__name_and_ext__" directory="assembly" />
  </collection>
  <collection name="genbanks" type="list" label="${tool.name} on ${input.element_identifier}: GenBank">
      <discover_datasets pattern="__name_and_ext__" directory="./genbanks" />
  </collection>-->
  <!--<data format="html" name="summary_html" label="summary.html"/>-->
  <data format="tabular" name="summary_tsv" label="${tool.name}: summary.tsv"/>
</outputs>


<help><![CDATA[
  This tool allows to download sequences in FASTA or GenBank format, and retrieve specific information (such as country, pubmed ID, host, ...).
  
  Please note that NCBI assembly_summary files used to download the sequences have been uploaded on September 6th, 2021.

  To get more recent assembly_summary files, please use the command line or the GUI version of the tool.

  GitHub: https://github.com/karubiotools/getSequenceInfo  
]]></help>

</tool>