diff data_manager/fetch_refseq.xml @ 0:8b91891ae805 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_refseq commit a572f1f01161527ff6ed4af05bb2e073a8ca903b
author iuc
date Mon, 01 Oct 2018 15:36:01 -0400
parents
children 937f167631b1
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/fetch_refseq.xml	Mon Oct 01 15:36:01 2018 -0400
@@ -0,0 +1,88 @@
+<tool id="data_manager_fetch_refseq" name="RefSeq data manager" version="0.0.18" tool_type="manage_data">
+    <description>Fetch FASTA data from NCBI RefSeq and update all_fasta data table</description>
+    <requirements>
+        <requirement type="package">python</requirement>
+        <requirement type="package">requests</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+    python $__tool_directory__/fetch_refseq.py
+      #if str( $advanced.advanced_selector ) == 'advanced':
+        '${advanced.compress}'
+      #end if
+      --galaxy_datamanager_filename '${output_file}'
+      --division_names ${division_names}
+      --mol_types ${mol_types}
+      #if str( $pin_date ) != 'NO':
+        --pin_date '${pin_date}'
+      #end if
+    ]]></command>
+    <inputs>
+        <param argument="division_names" type="select" label="RefSeq division" multiple="true">
+            <option value="archea">Archea</option>
+            <option value="bacteria">Bacteria</option>
+            <option value="complete">Complete</option>
+            <option value="fungi">Fungi</option>
+            <option value="invertebrate">Invertebrate</option>
+            <option value="mitochondrion">Mitochondrion</option>
+            <option value="other">Other</option>
+            <option value="plant">Plant</option>
+            <option value="plasmid">Plasmid</option>
+            <option value="plastid">Plastid</option>
+            <option value="protozoa">Protozoa</option>
+            <option value="vertebrate_mammalian">Mammalian Vertebrate</option>
+            <option value="vertebrate_other">Other Vertebrate</option>
+            <option value="viral">Viral</option>
+        </param>
+        <param argument="mol_types" type="select" multiple="true" label="Molecule type" help="Select at least one of genomic, protein or rna sequence">
+            <option value="protein">Protein</option>
+            <option value="genomic">Genomic (DNA)</option>
+            <option value="rna">RNA</option>
+        </param>
+        <conditional name="advanced">
+            <param name="advanced_selector" type="select" label="Advanced Options">
+                <option value="basic" selected="True">Basic</option>
+                <option value="advanced">Advanced</option>
+            </param>
+            <when value="basic">
+            </when>
+            <when value="advanced">
+                <param type="boolean" argument="--compress" truevalue="--compress" falsevalue="" label="Compress FASTA files" 
+                    help="Compress downloaded FASTA files (with gzip). Limits compatibility with tools expecting uncompressed FASTA."/>
+            </when>
+        </conditional>
+        <param argument="--pin_date" type="hidden" value="NO" help="Used for testing"/>
+    </inputs>
+    <outputs>
+        <data name="output_file" format="data_manager_json"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="division_names" value="plastid"/>
+            <param name="mol_types" value="protein"/>
+            <param name="pin_date" value="2018-03-14"/>            
+            <param name="advanced_selector" value="basic"/>
+            <output name="output_file">
+                <assert_contents>
+                    <has_text text="2018-03-14"/>
+                    <has_text text="refseq_plastid"/>
+                    <has_text text="/refseq_plastid."/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+This data manager fetches FASTA format collections of proteins, nucleotides (genomic DNA) and RNA
+from NCBI's RefSeq_ data collection.
+
+RefSeq is released every two months and consists of a number of divisions. Some sequences are shared
+between multiple divisions. This data manager allows the Galaxy administrator to select which
+divisions and which molecule types within each division to download. Once downloaded the
+files are made accessible by adding an entry into the *all_fasta* data table.
+
+.. _RefSeq: https://www.ncbi.nlm.nih.gov/refseq/
+    ]]>
+    </help>
+    <citations>
+        <citation type="doi">10.1093/nar/gkv1189</citation>
+    </citations>
+</tool>