comparison meta_proteome_analyzer.xml @ 0:bea389f80d87 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/meta_proteome_analyzer commit 69cfb20d049ddeccef197865cc07eac5363ea8ea
author galaxyp
date Sat, 04 Mar 2017 07:00:01 -0500
parents
children 7fdfbf042ec6
comparison
equal deleted inserted replaced
-1:000000000000 0:bea389f80d87
1 <tool id="meta_proteome_analyzer" name="MetaProteomeAnalyzer" version="1.4.1">
2 <description>
3 functional and taxonomic characterization of proteins
4 </description>
5 <requirements>
6 <requirement type="package" version="1.4.1">mpa-portable</requirement>
7 </requirements>
8 <command>
9 <![CDATA[
10 #set $temp_stderr = "mpa_stderr"
11
12 cwd=`pwd` &&
13 mkdir -p output_dir &&
14 ## copy mpa conf dir to working dir
15 jar_dir=`mpa-portable -get_jar_dir` &&
16 cp -R \$jar_dir/conf . &&
17
18 ## echo the search engines to run
19 #set $search_engines = str($search_engines_options.engines).split(',')
20 echo "$search_engines_options.engines" &&
21 echo "DB: ${input_database.display_name} sequences: ${input_database.metadata.sequences}" &&
22
23 #for $mgf in $peak_lists:
24 #set $input_name = $mgf.display_name.split('/')[-1].replace(".mgf", "") + ".mgf"
25 ln -s -f '${mgf}' '${input_name}' &&
26 #set $encoded_id = $__app__.security.encode_id($mgf.id)
27 echo "Spectrums:${mgf.display_name}(API:${encoded_id}) " &&
28 #end for
29 cp "${input_database}" input_database.fasta &&
30
31 ######################
32 ## MPA ##
33 ######################
34 (mpa-portable de.mpa.cli.CmdLineInterface -Djava.awt.headless=true -Xmx2048m
35 -spectrum_files \$cwd
36 -database input_database.fasta
37 -missed_cleav $missed_cleavages
38 -prec_tol ${precursor_options.prec_tol}${precursor_options.prec_tol_units}
39 -frag_tol ${precursor_options.frag_tol}Da
40 -xtandem #if 'X!Tandem' in $search_engines then 1 else 0#
41 -comet #if 'Comet' in $search_engines then 1 else 0#
42 -msgf #if 'MSGF' in $search_engines then 1 else 0#
43 -output_folder output_dir
44 -threads "\${GALAXY_SLOTS:-12}"
45 2> $temp_stderr) &&
46 mv ./output_dir/*_metaproteins.csv metaproteins.csv &&
47 mv ./output_dir/*_metaprotein_taxa.csv metaprotein_taxa.csv &&
48 mv ./output_dir/*_peptides.csv peptides.csv &&
49 mv ./output_dir/*_proteins.csv proteins.csv &&
50 mv ./output_dir/*_psms.csv psms.csv &&
51 mv ./output_dir/*_spectrum_ids.csv spectrum_ids.csv &&
52 exit_code_for_galaxy=\$? &&
53 cat $temp_stderr 2>&1 &&
54 (exit \$exit_code_for_galaxy)
55
56 ]]>
57 </command>
58 <inputs>
59 <param format="fasta" name="input_database" type="data" label="Protein Database"
60 help="Select FASTA database from history"/>
61 <param name="peak_lists" format="mgf" type="data" multiple="true" label="Input Peak Lists (mgf)"
62 help="Select appropriate MGF dataset(s) from history" />
63 <param name="missed_cleavages" type="integer" value="2" label="Maximum Missed Cleavages"
64 help="Allow peptides to contain up to this many missed enzyme cleavage sites."/>
65 <section name="precursor_options" expanded="false" title="Precursor Options">
66 <param name="prec_tol_units" type="select" label="Precursor Ion Tolerance Units"
67 help="Select based on instrument used, as different machines provide different quality of spectra. ppm is a standard for most precursor ions">
68 <option value="ppm">Parts per million (ppm)</option>
69 <option value="Da">Daltons</option>
70 </param>
71 <param name="prec_tol" type="float" value="10" label="Percursor Ion Tolerance"
72 help="Provide error value for precursor ion, based on instrument used. 10 ppm recommended for Orbitrap instrument"/>
73 <param name="frag_tol" type="float" value="0.5" label="Fragment Tolerance (Daltons)"
74 help="Provide error value for fragment ions, based on instrument used"/>
75 </section>
76 <!-- Search Engine Selection -->
77 <section name="search_engines_options" expanded="false" title="Search Engine Options">
78 <param name="engines" type="select" display="checkboxes" multiple="True" label="DB-Search Engines">
79 <help>Comet and Tide shouldn't both be selected since they use a similar algoritm.</help>
80 <option value="X!Tandem" selected="True">X!Tandem</option>
81 <option value="MSGF">MS-GF+</option>
82 <option value="Comet">Comet</option>
83 </param>
84 </section>
85
86 <!--
87 -generate_metaproteins Turn meta-protein generation (aka. protein grouping) on or off (1: on, 0: off, default is '1').
88 -peptide_rule The peptide rule chosen for meta-protein generation (-1: off, 0: share-one-peptide, 1: shared-peptide-subset, default is '0').
89 -cluster_rule The sequence cluster rule chosen for meta-protein generation (-1: off, 0: UniRef100, 1: UniRef90, 2: UniRef50, default is '-1').
90 -taxonomy_rule The taxonomy rule chosen for meta-protein generation (-1: off, 0: on superkingdom or lower, 1: on kingdom or lower, 2: on phylum or lower, 3: on class or lower, 4: on order or lower, 5: on family or lower, 6: on genus or lower, 7: on species or lower, 8: on subspecies, default is '-1').
91 -iterative_search Turn iterative (aka. two-step) searching on or off (1: on, 0: off, default is '0').
92 -fdr_threshold The applied FDR threshold for filtering the results (default is 0.05 == 5% FDR).
93 -->
94
95
96 </inputs>
97 <outputs>
98 <data format="tabular" name="output_proteins" from_work_dir="proteins.csv" label="${tool.name} on ${on_string}: proteins">
99 <actions>
100 <action name="comment_lines" type="metadata" default="1" />
101 <action name="column_names" type="metadata" default="Protein_No,Protein_Accession,Protein_Description,Protein_Taxonomy,Sequence_Coverage,Peptide_Count,NSAF,emPAI,Spectral_Count,Isoelectric_Point,Molecular_Weight,Protein_Sequence,Peptides" />
102 </actions>
103 </data>
104 <data format="tabular" name="output_peptides" from_work_dir="peptides.csv" label="${tool.name} on ${on_string}: peptides">
105 <actions>
106 <action name="comment_lines" type="metadata" default="1" />
107 <action name="column_names" type="metadata" default="Peptide_Num,Protein_Accessions,Peptide_Sequence,Protein_Count,Spectral_Count,Taxonomic_Group,Taxonomic_Rank,NCBI_Taxonomy_ID" />
108 </actions>
109 </data>
110 <data format="tabular" name="output_PSMs" from_work_dir="psms.csv" label="${tool.name} on ${on_string}: PSMs">
111 <actions>
112 <action name="comment_lines" type="metadata" default="1" />
113 <action name="column_names" type="metadata" default="PSM_Num,Protein_Accessions,Peptide_Sequence,Spectrum_Title,Charge,Search_Engine,q-value,Score" />
114 </actions>
115 </data>
116 <data format="tabular" name="output_spectrum_ids" from_work_dir="spectrum_ids.csv" label="${tool.name} on ${on_string}: spectrum_ids">
117 <actions>
118 <action name="comment_lines" type="metadata" default="1" />
119 <action name="column_names" type="metadata" default="Spectrum_Number,Spectrum_ID,Spectrum_Title,Peptides,Protein_Accessions" />
120 </actions>
121 </data>
122 <data format="tabular" name="output_metaproteins" from_work_dir="metaproteins.csv" label="${tool.name} on ${on_string}: metaproteins">
123 <actions>
124 <action name="comment_lines" type="metadata" default="1" />
125 <action name="column_names" type="metadata" default="Meta-Protein_Num,Meta-Protein_Accession,Meta-Protein_Description,Meta-Protein_Taxonomy,Meta-Protein_UniRef100,Meta-Protein_UniRef90,Meta-Protein_UniRef50,Meta-Protein_KO,Meta-Protein_EC,Peptide_Count,Spectral_Count,Proteins,Peptides" />
126 </actions>
127 </data>
128 <data format="tabular" name="output_metaprotein_taxa" from_work_dir="metaprotein_taxa.csv" label="${tool.name} on ${on_string}: metaprotein_taxa">
129 <actions>
130 <action name="comment_lines" type="metadata" default="1" />
131 <action name="column_names" type="metadata" default="Unclassified,Superkingdom,Kingdom,Phylum,Class,Order,Family,Genus,Species,Subspecies,Num_Peptides,Spectral_Count" />
132 </actions>
133 </data>
134 </outputs>
135 <tests>
136 <test>
137 <param name="peak_lists" value="Test416Ebendorf.mgf" ftype="mgf"/>
138 <param name="input_database" value="searchdb.fa" ftype="fasta"/>
139 <param name="missed_cleavages" value="2"/>
140 <param name="prec_tol" value="ppm"/>
141 <param name="prec_tol" value="10"/>
142 <param name="frag_tol" value="0.5"/>
143 <param name="engines" value="X!Tandem,MSGF,Comet"/>
144 <output name="output_PSMs">
145 <assert_contents>
146 <has_text text="A2SPK1" />
147 </assert_contents>
148 </output>
149 </test>
150 </tests>
151 <help>
152 **What it does**
153
154 =======
155
156 MetaProteomeAnalyzer (MPA) performs identification of proteins and in-depth analysis of metaproteomics (and also proteomics) data. The MPA software currently supports the database search engines Comet, MS-GF+ and X!Tandem taking MGF spectrum files as input data. User-provided FASTA databases (preferably downloaded from UniProtKB) are formatted automatically.
157
158 https://github.com/compomics/meta-proteome-analyzer
159
160 ----
161
162 Outputs
163 =======
164
165 MPA generates 6 tabular outputs:
166
167 * psms
168 * peptides
169 * proteins
170 * spectrum_ids
171 * metaproteins
172 * metaprotein_taxa
173
174
175 ------
176
177 </help>
178 <citations>
179 <citation type="doi">10.1021/pr501246w</citation>
180 </citations>
181 </tool>