Mercurial > repos > galaxyp > dbbuilder
view dbbuilder.xml @ 10:e9df53a75f3c draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/dbbuilder commit 1698b28b4cb85d88df910d3e53bbce29e8400359"
author | galaxyp |
---|---|
date | Wed, 25 Nov 2020 17:43:51 +0000 |
parents | c1b437242fee |
children | 8e637098a8ab |
line wrap: on
line source
<tool id="dbbuilder" name="Protein Database Downloader" version="0.3.2"> <description></description> <requirements> <requirement type="package" version="1.20.1">wget</requirement> </requirements> <stdio> <exit_code range="1:" level="fatal" description="Error downloading database." /> <regex match="ERROR" level="fatal" source="stderr" description="Error downloading database." /> </stdio> <!-- TODO: escape quotes. --> <!-- Add NCBI and maxquant contaminants. --> <!-- http://maxquant.org/contaminants.zip --> <!-- ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/mRNA_Prot/human.protein.faa.gz--> <command> <![CDATA[ #if $source.from == "uniprot" #set $url = 'http://www.uniprot.org/uniprot/?query=taxonomy:"' + str($source.taxon) + '"' + str($source.set) + str($source.reviewed) + '&force=yes&format=fasta' + str($source.include_isoform) #set $type = "direct" #elif $source.from == "cRAP" ##set $url = "ftp://ftp.thegpm.org/fasta/cRAP/crap.fasta" #set $url = "https://raw.githubusercontent.com/pravs3683/cRAP/master/cRAP_protein_database.fasta" #set $type = "direct" #elif $source.from == "HMP" #set $url = 'http://downloads.hmpdacc.org/data/reference_genomes/body_sites/' + str($source.site) + '.pep.fsa' #set $type = "direct" #elif $source.from == "HOMD" #set $url = 'ftp://ftp.homd.org/human_oral_microbial_genomic_sequences/current/' + str($source.annotation) #if str($source.annotation).endswith('.tar.gz'): #set $type = "tgz" #elif str($source.annotation).endswith('.zip'): #set $type = "zip" #end if #elif $source.from == "url" #set $url = $source.url #set $type = $source.archive_type #end if #if $type =="direct" wget -nv '$url' -O '${output_database}' --no-check-certificate #elif $type =="zip" wget -nv '$url' -O tmp.zip --no-check-certificate && zcat -c tmp.zip > '${output_database}' #elif $type =="gzip" wget -nv '$url' -O tmp.gz --no-check-certificate && (if `command -v gzcat > /dev/null`; then gzcat tmp.gz; else zcat tmp.gz ; fi) > '${output_database}' #elif $type =="bzip2" wget -nv '$url' -O tmp.bz2 --no-check-certificate && bzcat tmp.bz2 > '${output_database}' #elif $type =="tgz" wget -nv '$url' -O tmp.tar.gz && tar zxfO tmp.tar.gz > '${output_database}' #elif $type =="tbz" wget -nv '$url' -O tmp.tar.bz && tar jxfO tmp.tar.bz > '${output_database}' #end if ]]> </command> <inputs> <conditional name="source"> <param name="from" type="select" label="Download from" help="Select database source. cRAP acts as a database for common MS contaminants. UniProtKB is a cross species collection of functional protein databases"> <option value="uniprot">UniProtKB</option> <option value="cRAP">cRAP (contaminants)</option> <option value="HMP">Human Microbiome Project body sites</option> <option value="HOMD">Human Oral Microbiome Database (HOMD)</option> <option value="url">Custom URL</option> </param> <when value="uniprot"> <param name="taxon" type="select" format="text" help="select species for protein database"> <label>Taxonomy</label> <options from_file="uniprot_taxons.loc"> <column name="name" index="0" /> <column name="value" index="1" /> </options> </param> <param name="reviewed" type="select" help="UniProtKB/TrEMBL (unreviewed)is a large, automatically annotated database- may contain redundant sequences, but there is a higher chance peptides will be identified. UniProtKB/Swiss-Prot (reviewed) is a smaller, manually annotated database- less of a chance peptides will be identified but less sequence redundancy"> <option value="+">UniProtKB</option> <option value="+reviewed%3Ayes">UniProtKB/Swiss-Prot (reviewed only)</option> <option value="+reviewed%3Ano">UniProtKB/TrEMBL (unreviewed only)</option> <sanitizer> <valid> <add value="%"/> </valid> </sanitizer> </param> <param name="set" type="select" label="Proteome Set"> <option value="+">Any</option> <option value="+keyword%3a1185" selected="true">Reference Proteome Set</option> <sanitizer> <valid> <add value="%"/> </valid> </sanitizer> </param> <param name="include_isoform" type="boolean" truevalue="&include=yes" falsevalue="" label="Include isoform data" help="several different forms of a given protein are incorporated into database" /> </when> <when value="cRAP" /> <when value="HMP"> <param name="site" type="select" label="Proteome for body site"> <option value="Airways">HMP airways</option> <option value="Blood">HMP Blood</option> <option value="Gastrointestinal_tract">HMP Gastro-intestinal tract</option> <option value="Oral">HMP Oral</option> <option value="Skin">HMP Skin</option> <option value="Urogenital_tract">HMP urogenital Tract</option> </param> </when> <when value="HOMD"> <param name="annotation" type="select" label="Human Oral Microbiome Proteome"> <option value="oral_microbiome_dynamic.aa.zip">HOMD with dynamic annotation</option> <option value="oral_microbiome.aa.tar.gz">HOMD with static annotation</option> </param> </when> <when value="url"> <param name="url" value="" type="text" label="URL (http, ftp) of Fasta sequences"> <sanitizer> <valid> <add value="%"/> <add value="~"/> </valid> </sanitizer> </param> <param name="archive_type" type="select" label="Fasta source compression type"> <option value="direct" selected="true">fasta file (uncompressed)</option> <option value="gzip">fasta.gz (gzip compressed)</option> <option value="bzip2">fasta.bz2 (bzip2 compressed)</option> <option value="zip">fasta.zip or fasta.Z (Zip compressed)</option> <option value="tgz">fasta.tgz or fasta.tar.gz (tar archive gzip compressed)</option> <option value="tbz">fasta.tbz or fasta.tar.bz (tar archive bzip2 compressed)</option> </param> </when> </conditional> </inputs> <outputs> <data format="fasta" name="output_database" label="Protein Database ${source.from}" /> </outputs> <tests> <test> <param name="from" value="cRAP" /> <output name="output_database"> <assert_contents> <has_text text="KKA1_ECOLX" /> </assert_contents> </output> </test> <test> <param name="from" value="url" /> <param name="url" value="https://raw.githubusercontent.com/pravs3683/cRAP/master/cRAP_protein_database.fasta" /> <param name="archive_type" value="direct" /> <output name="output_database"> <assert_contents> <has_text text="KKA1_ECOLX" /> </assert_contents> </output> </test> </tests> <help> <![CDATA[ **Output** Creates a FASTA file of specified protein sequences for comparison with experimental MS/MS data in search algorithm. **External Links** - Galaxy-P_101_ shows usage Protein Database Downloader tool in the creation of a workflow - UniProtKB_ provides additional information about the UniProt Knowledgebase .. _Galaxy-P_101: http://msi-galaxy-p.readthedocs.org/en/latest/sections/galaxyp_101.html .. _UniProtKB: http://www.uniprot.org/help/uniprotkb **Additional Protein Fasta URLs** *HUMAN GUT METAPROTEOME:* * 61MB gzip http://www.bork.embl.de/~arumugam/Qin_et_al_2010/frequent_microbe_proteins.fasta.gz *MOUSE GUT MICROBIOTA:* * See: http://gigadb.org/dataset/view/id/100114/token/mZlMYJIF04LshpgP ]]> </help> <citations> <citation type="doi">10.1093/nar/gkw1099</citation> <citation type="doi">10.1093/nar/gkv1195 </citation> <citation type="doi">10.1093/database/baq013</citation> <citation type="doi">10.1038/nature11209</citation> <citation type="doi">10.1038/nature11234</citation> </citations> </tool>