diff retrieve_ensembl_bed.xml @ 0:da1b538b87e5 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
author galaxyp
date Mon, 22 Jan 2018 13:13:47 -0500
parents
children 9c4a48f5d4e7
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/retrieve_ensembl_bed.xml	Mon Jan 22 13:13:47 2018 -0500
@@ -0,0 +1,100 @@
+<tool id="retrieve_ensembl_bed" name="Retrieve Ensembl features in BED format" version="0.1.0">
+    <description>using Ensembl REST API</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <requirements>
+        <expand macro="ensembl_requirements" />
+        <expand macro="bedutil_requirements" />
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        python '$__tool_directory__/retrieve_ensembl_bed.py'  
+            --species '$species'
+            #if $extended_bed:
+                --extended_bed
+            #end if
+            $ucsc_chrom_names
+            #if $biotypes:
+                --biotypes '$biotypes'
+            #end if
+            #if $regions:
+                --regions '$regions'
+            #end if
+            '$transcript_bed'
+    ]]></command>
+    <inputs>
+        <param name="species" type="text" value="" label="Ensembl species" >
+            <help>
+            </help>
+            <expand macro="species_options" />
+            <validator type="regex" message="Enter an Ensembl organism">^\w+.*$</validator>
+        </param>
+        <param name="extended_bed" type="boolean" truevalue=",second_name,cds_start_status,cds_end_status,exon_frames,type,gene_name,second_gene_name,gene_type" falsevalue="" checked="true" 
+               label="Keep extra columns from ensembl BED"/>
+        <param name="ucsc_chrom_names" type="boolean" truevalue="--ucsc_chrom_names" falsevalue="" checked="false" 
+               label="Use the UCSC names for Chromosomes"/>
+        <param name="biotypes" type="text" value="" optional="true" label="Restrict Feature retrieval to these biotypes" >
+            <expand macro="biotypes_help" />
+        </param>
+        <param name="regions" type="text" value="" optional="true" label="Restrict Feature retrieval to comma-separated list of regions" >
+            <help>Each region is specifed as: chr or chr:pos or chr:from-to</help>
+            <validator type="regex" message="">^(\w+(:\d+(-\d+)?)?(,\w+(:\d+(-\d+)?)?)*)?$</validator>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="transcript_bed" format="bed" label="Ensembl ${species} transcripts.bed">
+            <actions>
+                <action name="column_names" type="metadata" 
+                 default="chrom,chromStart,chromEnd,name,score,strand,thickStart,thickEnd,itemRgb,blockCount,blockSizes,blockStarts${extended_bed}"/>
+            </actions>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="species" value="human"/>
+            <param name="biotypes" value="protein_coding"/>
+            <param name="regions" value="1:51194990-51275150"/>
+            <output name="transcript_bed">
+                <assert_contents>
+                    <has_text_matching expression="(chr)?1\t\d+\t\d+\tENST" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+Retrieve Ensembl cDNAs in BED format
+
+usage: retrieve_ensembl_bed.py [-h] [-s SPECIES] [-R REGIONS] [-B BIOTYPES]
+                               [-X] [-U] [-t] [-v] [-d]
+                               output
+
+positional arguments:
+  output                Output BED filepath, or for stdout: "-"
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -s SPECIES, --species SPECIES
+                        Ensembl Species to retrieve
+  -R REGIONS, --regions REGIONS
+                        Restrict Ensembl retrieval to regions e.g.:
+                        X,2:20000-25000,3:100-500+
+  -B BIOTYPES, --biotypes BIOTYPES
+                        Restrict Ensembl biotypes to retrieve
+  -X, --extended_bed    Include the extended columns returned from Ensembl
+  -U, --ucsc_chrom_names
+                        Use the UCSC names for Chromosomes
+  -t, --toplevel        Print Ensembl toplevel for species
+  -v, --verbose         Verbose
+  -d, --debug           Debug
+
+
+Ensembl REST API returns an extended BED format with these additional columns::
+
+  second_name, cds_start_status, cds_end_status, exon_frames, type, gene_name, second_gene_name, gene_type
+
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btu613</citation>
+        <citation type="doi">10.1093/nar/gku1010</citation>
+    </citations>
+</tool>