Mercurial > repos > galaxyp > retrieve_ensembl_bed
comparison retrieve_ensembl_bed.xml @ 0:da1b538b87e5 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 88cf1e923a8c9e5bc6953ad412d15a7c70f054d1
author | galaxyp |
---|---|
date | Mon, 22 Jan 2018 13:13:47 -0500 |
parents | |
children | 9c4a48f5d4e7 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:da1b538b87e5 |
---|---|
1 <tool id="retrieve_ensembl_bed" name="Retrieve Ensembl features in BED format" version="0.1.0"> | |
2 <description>using Ensembl REST API</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <requirements> | |
7 <expand macro="ensembl_requirements" /> | |
8 <expand macro="bedutil_requirements" /> | |
9 </requirements> | |
10 <command detect_errors="exit_code"><![CDATA[ | |
11 python '$__tool_directory__/retrieve_ensembl_bed.py' | |
12 --species '$species' | |
13 #if $extended_bed: | |
14 --extended_bed | |
15 #end if | |
16 $ucsc_chrom_names | |
17 #if $biotypes: | |
18 --biotypes '$biotypes' | |
19 #end if | |
20 #if $regions: | |
21 --regions '$regions' | |
22 #end if | |
23 '$transcript_bed' | |
24 ]]></command> | |
25 <inputs> | |
26 <param name="species" type="text" value="" label="Ensembl species" > | |
27 <help> | |
28 </help> | |
29 <expand macro="species_options" /> | |
30 <validator type="regex" message="Enter an Ensembl organism">^\w+.*$</validator> | |
31 </param> | |
32 <param name="extended_bed" type="boolean" truevalue=",second_name,cds_start_status,cds_end_status,exon_frames,type,gene_name,second_gene_name,gene_type" falsevalue="" checked="true" | |
33 label="Keep extra columns from ensembl BED"/> | |
34 <param name="ucsc_chrom_names" type="boolean" truevalue="--ucsc_chrom_names" falsevalue="" checked="false" | |
35 label="Use the UCSC names for Chromosomes"/> | |
36 <param name="biotypes" type="text" value="" optional="true" label="Restrict Feature retrieval to these biotypes" > | |
37 <expand macro="biotypes_help" /> | |
38 </param> | |
39 <param name="regions" type="text" value="" optional="true" label="Restrict Feature retrieval to comma-separated list of regions" > | |
40 <help>Each region is specifed as: chr or chr:pos or chr:from-to</help> | |
41 <validator type="regex" message="">^(\w+(:\d+(-\d+)?)?(,\w+(:\d+(-\d+)?)?)*)?$</validator> | |
42 </param> | |
43 </inputs> | |
44 <outputs> | |
45 <data name="transcript_bed" format="bed" label="Ensembl ${species} transcripts.bed"> | |
46 <actions> | |
47 <action name="column_names" type="metadata" | |
48 default="chrom,chromStart,chromEnd,name,score,strand,thickStart,thickEnd,itemRgb,blockCount,blockSizes,blockStarts${extended_bed}"/> | |
49 </actions> | |
50 </data> | |
51 </outputs> | |
52 <tests> | |
53 <test> | |
54 <param name="species" value="human"/> | |
55 <param name="biotypes" value="protein_coding"/> | |
56 <param name="regions" value="1:51194990-51275150"/> | |
57 <output name="transcript_bed"> | |
58 <assert_contents> | |
59 <has_text_matching expression="(chr)?1\t\d+\t\d+\tENST" /> | |
60 </assert_contents> | |
61 </output> | |
62 </test> | |
63 </tests> | |
64 <help><![CDATA[ | |
65 Retrieve Ensembl cDNAs in BED format | |
66 | |
67 usage: retrieve_ensembl_bed.py [-h] [-s SPECIES] [-R REGIONS] [-B BIOTYPES] | |
68 [-X] [-U] [-t] [-v] [-d] | |
69 output | |
70 | |
71 positional arguments: | |
72 output Output BED filepath, or for stdout: "-" | |
73 | |
74 optional arguments: | |
75 -h, --help show this help message and exit | |
76 -s SPECIES, --species SPECIES | |
77 Ensembl Species to retrieve | |
78 -R REGIONS, --regions REGIONS | |
79 Restrict Ensembl retrieval to regions e.g.: | |
80 X,2:20000-25000,3:100-500+ | |
81 -B BIOTYPES, --biotypes BIOTYPES | |
82 Restrict Ensembl biotypes to retrieve | |
83 -X, --extended_bed Include the extended columns returned from Ensembl | |
84 -U, --ucsc_chrom_names | |
85 Use the UCSC names for Chromosomes | |
86 -t, --toplevel Print Ensembl toplevel for species | |
87 -v, --verbose Verbose | |
88 -d, --debug Debug | |
89 | |
90 | |
91 Ensembl REST API returns an extended BED format with these additional columns:: | |
92 | |
93 second_name, cds_start_status, cds_end_status, exon_frames, type, gene_name, second_gene_name, gene_type | |
94 | |
95 ]]></help> | |
96 <citations> | |
97 <citation type="doi">10.1093/bioinformatics/btu613</citation> | |
98 <citation type="doi">10.1093/nar/gku1010</citation> | |
99 </citations> | |
100 </tool> |