Mercurial > repos > galaxyp > retrieve_ensembl_bed
diff retrieve_ensembl_bed.xml @ 1:9c4a48f5d4e7 draft default tip
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/proteogenomics/retrieve_ensembl_bed commit 6babd357845126292cb202aaea0f70ff68819525"
author | galaxyp |
---|---|
date | Mon, 07 Oct 2019 16:14:39 -0400 |
parents | da1b538b87e5 |
children |
line wrap: on
line diff
--- a/retrieve_ensembl_bed.xml Mon Jan 22 13:13:47 2018 -0500 +++ b/retrieve_ensembl_bed.xml Mon Oct 07 16:14:39 2019 -0400 @@ -20,6 +20,16 @@ #if $regions: --regions '$regions' #end if + #if $interval_file: + #if $interval_file.ext.find('bed') > -1 + --interval_format bed + #elif $interval_file.ext in ['gff','gtf','gff3'] + --interval_format gff + #else + --interval_format interval + #end if + --interval_file '$interval_file' + #end if '$transcript_bed' ]]></command> <inputs> @@ -40,6 +50,7 @@ <help>Each region is specifed as: chr or chr:pos or chr:from-to</help> <validator type="regex" message="">^(\w+(:\d+(-\d+)?)?(,\w+(:\d+(-\d+)?)?)*)?$</validator> </param> + <param name="interval_file" type="data" format="bed,gff,interval" label="Retrieve the intervals from this file" optional="true"/> </inputs> <outputs> <data name="transcript_bed" format="bed" label="Ensembl ${species} transcripts.bed"> @@ -60,6 +71,16 @@ </assert_contents> </output> </test> + <test> + <param name="species" value="mouse"/> + <param name="biotypes" value="protein_coding"/> + <param name="interval_file" ftype="bed" value="test.bed"/> + <output name="transcript_bed"> + <assert_contents> + <has_text_matching expression="(chr)?1\t\d+\t\d+\tENSMUST" /> + </assert_contents> + </output> + </test> </tests> <help><![CDATA[ Retrieve Ensembl cDNAs in BED format @@ -69,25 +90,34 @@ output positional arguments: - output Output BED filepath, or for stdout: "-" + output Output BED filepath, or for stdout: "-" optional arguments: - -h, --help show this help message and exit + -h, --help show this help message and exit -s SPECIES, --species SPECIES - Ensembl Species to retrieve + Ensembl Species to retrieve -R REGIONS, --regions REGIONS - Restrict Ensembl retrieval to regions e.g.: - X,2:20000-25000,3:100-500+ + Restrict Ensembl retrieval to regions e.g. + X,2:20000-25000,3:100-500+ + -i INTERVAL_FILE, --interval_file INTERVAL_FILE + Regions from a bed, gff, or interval file + + -f {bed,gff,interval}, --interval_format {bed,gff,interval} + Interval format has TAB-separated + columns: Seq, Start, End, Strand + -B BIOTYPES, --biotypes BIOTYPES - Restrict Ensembl biotypes to retrieve - -X, --extended_bed Include the extended columns returned from Ensembl + Restrict Ensembl biotypes to retrieve + -X, --extended_bed Include the extended columns returned from Ensembl -U, --ucsc_chrom_names - Use the UCSC names for Chromosomes - -t, --toplevel Print Ensembl toplevel for species - -v, --verbose Verbose - -d, --debug Debug + Use the UCSC names for Chromosomes + -t, --toplevel Print Ensembl toplevel for species + -v, --verbose Verbose + -d, --debug Debug +**Output** + Ensembl REST API returns an extended BED format with these additional columns:: second_name, cds_start_status, cds_end_status, exon_frames, type, gene_name, second_gene_name, gene_type