Mercurial > repos > galaxyp > uniprotxml_downloader
view uniprotxml_downloader.xml @ 7:4ddc8da62671 draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
author | galaxyp |
---|---|
date | Wed, 11 Dec 2024 13:34:54 +0000 |
parents | a371252a2cf6 |
children |
line wrap: on
line source
<tool id="uniprotxml_downloader" name="UniProt" version="2.5.0" profile="23.1"> <description>download proteome as XML or fasta</description> <macros> <xml name="query_field"> <param name="field" type="select" label="Field"> <option value="taxonomy_name">Taxonomy Name</option> <option value="taxonomy_id">Taxonomy ID</option> <option value="accession">Accession</option> </param> </xml> </macros> <requirements> <requirement type="package" version="2.25.1">requests</requirement> </requirements> <stdio> <exit_code range="1:" level="fatal" description="Error downloading proteome." /> </stdio> <command> <![CDATA[ python '$__tool_directory__/uniprotxml_downloader.py' #if $input_method.input_choice == 'common': --search-id $input_method.organism --field taxonomy_id #if $input_method.reviewed: --reviewed=$input_method.reviewed #end if #elif $input_method.input_choice == 'enter_ids': --field $input_method.field #for $id in $input_method.ids.split(','): --search-id '$id' #end for #elif $input_method.input_choice == 'history': --field $input_method.field --input='${input_method.id_file}' --column=#echo int(str($input_method.column)) - 1# #end if --format $format_cond.format #if $format_cond.format == "tsv" --output_columns #echo ','.join($format_cond.columns) #end if --output '${proteome}' ]]> </command> <inputs> <conditional name="input_method"> <param name="input_choice" type="select" label="Select"> <option value="common">A Common Organism</option> <option value="enter_ids">A manually entered list of accessions or taxonomy IDs/names</option> <option value="history">A history dataset with a column containing accessions or taxonomy IDs/names</option> </param> <when value="common"> <param name="organism" type="select" label="Common Organisms" help="select species for protein database"> <options from_file="uniprot_taxons.loc"> <column name="name" index="0" /> <column name="value" index="1" /> </options> </param> <param name="reviewed" type="select" label="filter by reviewed status" optional="true"> <help><![CDATA[ UniProtKB/TrEMBL (unreviewed)is a large, automatically annotated database that may contain redundant sequences, but there is a higher chance peptides will be identified. UniProtKB/Swiss-Prot (reviewed) is a smaller, manually annotated database with less of a chance peptides will be identified but less sequence redundancy ]]> </help> <option value="yes">UniProtKB/Swiss-Prot (reviewed only)</option> <option value="no">UniProtKB/TrEMBL (unreviewed only)</option> </param> </when> <when value="enter_ids"> <param name="ids" type="text" label="Search ID values" help="Enter one or more IDs (separated by commas) from http://www.uniprot.org/proteomes/"> <validator type="regex" message="OrganismID[,OrganismID]">^\w+( \w+)*(,\w+( \w+)*)*$</validator> </param> <expand macro="query_field"/> </when> <when value="history"> <param name="id_file" type="data" format="tabular,txt" label="Dataset (tab separated) with ID column"/> <param name="column" type="data_column" data_ref="id_file" label="Column with ID"/> <expand macro="query_field"/> </when> </conditional> <conditional name="format_cond"> <param name="format" type="select" label="uniprot output format"> <option value="fasta">fasta</option> <option value="tsv">TSV</option> <option value="xml">xml</option> </param> <when value="fasta"/> <when value="xml"/> <when value="tsv"> <param name="columns" type="select" multiple="true"> <options from_url="https://rest.uniprot.org/configure/uniprotkb/result-fields"> <postprocess_expression type="ecma5.1"><![CDATA[${ var options = []; inputs.forEach(function(group) { var groupName = group.groupName; group.fields.forEach(function(field) { var D = ["accession", "id", "reviewed", "protein_name", "gene_names", "organism_name", "length"]; var selected = D.includes(field.name); options.push([group.groupName + " - " + field.label, field.name, selected]); }); }); return options; }]]></postprocess_expression> </options> </param> </when> </conditional> </inputs> <outputs> <data format="uniprotxml" name="proteome"> <change_format> <when input="format_cond.format" value="fasta" format="fasta" /> <when input="format_cond.format" value="tsv" format="tsv" /> </change_format> </data> </outputs> <tests> <test> <conditional name="input_method"> <param name="input_choice" value="enter_ids"/> <param name="ids" value="1566990"/> </conditional> <conditional name="format_cond"> <param name="format" value="xml"/> </conditional> <output name="proteome" ftype="uniprotxml"> <assert_contents> <has_text text="</uniprot>" /> </assert_contents> </output> <assert_stdout> <has_text_matching expression="UniProt-Release:\d{4}_\d{2}"/> <has_line line="Entries:0"/> <!-- searching by name using an ID --> </assert_stdout> </test> <test> <conditional name="input_method"> <param name="input_choice" value="enter_ids"/> <param name="ids" value="765963,512562"/> <param name="field" value="taxonomy_id"/> </conditional> <conditional name="format_cond"> <param name="format" value="fasta"/> </conditional> <output name="proteome" ftype="fasta"> <assert_contents> <has_text text="Shi470" /> <has_text text="PeCan4" /> </assert_contents> </output> <assert_stdout> <has_text_matching expression="UniProt-Release:\d{4}_\d{2}"/> <has_text_matching expression="Entries:\d+"/> <has_line line="Entries:0" negate="true"/> </assert_stdout> </test> <test> <conditional name="input_method"> <param name="input_choice" value="enter_ids"/> <param name="ids" value="Shi470,PeCan4"/> <param name="field" value="taxonomy_name"/> </conditional> <conditional name="format_cond"> <param name="format" value="fasta" ftype="fasta"/> </conditional> <output name="proteome"> <assert_contents> <has_text text="Shi470" /> <has_text text="PeCan4" /> </assert_contents> </output> </test> <test> <conditional name="input_method"> <param name="input_choice" value="enter_ids"/> <param name="ids" value="E1Q2I0,E1Q3C4"/> <param name="field" value="accession"/> </conditional> <conditional name="format_cond"> <param name="format" value="fasta"/> </conditional> <output name="proteome" ftype="fasta"> <assert_contents> <has_text text="E1Q2I0" /> <has_text text="E1Q3C4" /> </assert_contents> </output> <assert_stdout> <has_text_matching expression="UniProt-Release:\d{4}_\d{2}"/> <has_text_matching expression="Entries:\d+"/> <has_line line="Entries:0" negate="true"/> </assert_stdout> </test> <test> <conditional name="input_method"> <param name="input_choice" value="history"/> <param name="id_file" value="Helicobacter_strains.tsv" ftype="tabular"/> <param name="column" value="1"/> <param name="field" value="taxonomy_name"/> </conditional> <conditional name="format_cond"> <param name="format" value="fasta"/> </conditional> <output name="proteome" ftype="fasta"> <assert_contents> <has_text text="Shi470" /> <has_text text="PeCan4" /> </assert_contents> </output> <assert_stdout> <has_text_matching expression="UniProt-Release:\d{4}_\d{2}"/> <has_text_matching expression="Entries:\d+"/> <has_line line="Entries:0" negate="true"/> </assert_stdout> </test> <test> <conditional name="input_method"> <param name="input_choice" value="history"/> <param name="id_file" value="Helicobacter_strains_ids.tsv" ftype="tabular"/> <param name="column" value="2"/> <param name="field" value="taxonomy_id"/> </conditional> <conditional name="format_cond"> <param name="format" value="fasta"/> </conditional> <output name="proteome" ftype="fasta"> <assert_contents> <has_text text="Shi470" /> <has_text text="PeCan4" /> </assert_contents> </output> <assert_stdout> <has_text_matching expression="UniProt-Release:\d{4}_\d{2}"/> <has_text_matching expression="Entries:\d+"/> <has_line line="Entries:0" negate="true"/> </assert_stdout> </test> <test> <conditional name="input_method"> <param name="input_choice" value="history"/> <param name="id_file" value="Helicobacter_protein_accessions.tsv" ftype="tabular"/> <param name="column" value="1"/> <param name="field" value="accession"/> </conditional> <conditional name="format_cond"> <param name="format" value="fasta"/> </conditional> <output name="proteome" ftype="fasta"> <assert_contents> <has_text text="E1Q2I0" /> <has_text text="E1Q3C4" /> </assert_contents> </output> <assert_stdout> <has_text_matching expression="UniProt-Release:\d{4}_\d{2}"/> <has_text_matching expression="Entries:\d+"/> <has_line line="Entries:0" negate="true"/> </assert_stdout> </test> <!-- tsv output --> <test> <conditional name="input_method"> <param name="input_choice" value="enter_ids"/> <param name="ids" value="765963,512562"/> <param name="field" value="taxonomy_id"/> </conditional> <conditional name="format_cond"> <param name="format" value="tsv"/> </conditional> <output name="proteome" ftype="tsv"> <assert_contents> <has_n_columns n="7" /> <has_text text="Shi470" /> <has_text text="PeCan4" /> </assert_contents> </output> <assert_stdout> <has_text_matching expression="UniProt-Release:\d{4}_\d{2}"/> <has_text_matching expression="Entries:\d+"/> <has_line line="Entries:0" negate="true"/> </assert_stdout> </test> <!-- tsv output non default columns--> <test> <conditional name="input_method"> <param name="input_choice" value="enter_ids"/> <param name="ids" value="765963,512562"/> <param name="field" value="taxonomy_id"/> </conditional> <conditional name="format_cond"> <param name="format" value="tsv"/> <param name="columns" value="accession,sequence"/> </conditional> <output name="proteome" ftype="tsv"> <assert_contents> <has_n_columns n="2" /> <has_text text="Shi470" negate="true"/> <has_text text="B2US14" /> </assert_contents> </output> <assert_stdout> <has_text_matching expression="UniProt-Release:\d{4}_\d{2}"/> <has_text_matching expression="Entries:\d+"/> <has_line line="Entries:0" negate="true"/> </assert_stdout> </test> </tests> <help> <![CDATA[ **UniProt Downloader** Downloads either a UniProtXML file or a fasta file from UniProtKB The Morpheus proteomics search algorithm can use the UniProtXML format as a search database. Available proteomes: http://www.uniprot.org/proteomes/ Available taxon names: http://www.uniprot.org/taxonomy/ Example taxon: http://www.uniprot.org/taxonomy/512562 Example protein: https://www.uniprot.org/uniprotkb/E1Q2I0/entry Description of query fields: https://www.uniprot.org/help/query-fields IDs can be entered as text or read from a column in a tabular dataset from your history. Example IDs and names releated to the Bacteria Helicobacter pylori (strain Shi470) :: - 512562 - Shi470 - Helicobacter pylori - Helicobacter - Helicobacteraceae Example protein accession numbers from Helicobacter pylori: - E1Q2I0 - E1Q3C4 UniProtKB help: http://www.uniprot.org/help/uniprotkb ]]> </help> <citations> <citation type="doi">10.1093/nar/gku989</citation> </citations> </tool>