view fetch_fasta_from_NCBI.xml @ 3:8be88084f89c draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/fetch_fasta_from_ncbi commit ab45487db8cc69750f92a40d763d51ffac940e25
author artbio
date Wed, 08 Nov 2017 13:00:26 -0500
parents 50f5ef3313bb
children c667d0ee39f5
line wrap: on
line source

<tool id="retrieve_fasta_from_NCBI" name="Retrieve FASTA from NCBI" version="2.2.1">
  <description></description>
  <command><![CDATA[
      python '$__tool_directory__'/fetch_fasta_from_NCBI.py
      -i "$queryString"
      -d $dbname
      -l '$logfile'
      $dry_run
      -o '$outfile'
  ]]></command>

  <inputs>
    <param name="queryString" type="text" size="5x80" area="True" value="txid10239[orgn] NOT txid131567[orgn] AND complete[all] NOT partial[title] NOT phage[title]" label="Query to NCBI in entrez format" help="exemple:'Drosophila melanogaster[Organism] AND Gcn5[Title]">
      <sanitizer>
        <valid initial="string.printable">
          <remove value="&quot;"/>
          <remove value="\"/>
        </valid>
        <mapping initial="none">
          <add source="&quot;" target="\&quot;"/>
          <add source="\" target="\\"/>
        </mapping>
      </sanitizer>
    </param>
    <param name="dbname" type="select" label="NCBI database">
      <option value="nuccore">Nucleotide</option>
      <option value="protein">Protein</option>
    </param>
    <param name="dry_run" type="boolean" label="Dry run to get the number of sequences?" truevalue="--count" falsevalue="" checked="false"/>
  </inputs>
  <outputs>
    <data name="outfile" format="fasta" label="${tool.name} (${dbname.value_label}) with queryString '${queryString.value}'" >
      <filter> dry_run == False</filter>
    </data>
    <data format="txt" name="logfile" label="${tool.name}: log"/>
  </outputs>
  <tests>
    <test>
        <param name="queryString" value="9629650[gi]" />
        <param name="dbname" value="nuccore" />
        <output name="outfilename" ftype="fasta" file="output.fa" />
    </test>
    <test>
        <param name="queryString" value="CU929326[Accession]" />
        <param name="dbname" value="nuccore" />
        <param name="date_filter" value="1"/>
        <param name="dry_run" value="True"/>
        <output name="logfile" ftype="txt" file="dry_run.log" compare="sim_size"/>
    </test>
  </tests>
  <help>
**What it does**

This tool retrieves nucleotide/peptide sequences from the corresponding NCBI database for a given entrez query.

The tool is preset with "txid10239[orgn] NOT txid131567[orgn] AND complete NOT partial[title] NOT phage[title]" for metaVisitor use purpose

See `Entrez help`_ for explanation of query formats

Be sure to use the appropriate NCBI query syntax. Always use [] to specify the search fields.

Note that the tool may fail in case of interrupted connexion with the NCBI database (see the log dataset)

**Acknowledgments**

This Galaxy tool has been adapted from the galaxy tool `get_fasta_from_taxon`_.

It is Copyright © 2014-2015 `CNRS and University Pierre et Marie Curie`_ and is released under the `MIT license`_.

.. _Entrez help: https://www.ncbi.nlm.nih.gov/books/NBK3837/#EntrezHelp.Entrez_Searching_Options
.. _get_fasta_from_taxon: https://toolshed.g2.bx.psu.edu/view/crs4/get_fasta_from_taxon
.. _CNRS and University Pierre et Marie Curie: http://www.ibps.upmc.fr/en
.. _MIT license: http://opensource.org/licenses/MIT

  </help>
  <citations>
      <citation type="doi">10.1186/1471-2105-14-73</citation>
  </citations>
</tool>