changeset 2:0d21fdb9c999

Uploaded
author iracooke
date Mon, 04 Mar 2013 20:48:04 -0500
parents 528bcbb94199
children 1f66146203f4
files README msgfplus_search.xml repository_dependencies.xml tool-data/msgfplus_mods.loc.sample tool-data/pepxml_databases.loc.sample tool_dependencies.xml
diffstat 6 files changed, 233 insertions(+), 26 deletions(-) [+]
line wrap: on
line diff
--- a/README	Mon Mar 04 20:06:45 2013 -0500
+++ b/README	Mon Mar 04 20:48:04 2013 -0500
@@ -1,6 +1,16 @@
-This package is an installer for the MSGF+ search tool.
+This package is a galaxy wrapper for the MSGF+ search tool.
+
+The underlying MSGF+ tool itself should be automatically installed however you must
+satisfy the requirements below before attempting to install.
 
 System Requirements. Ensure that you have the following packages installed on your system
+
+(For protk_msgfplus)
 - unzip
 - java runtime 6 or higher
 
+(For galaxy_protk)
+- The ability to download files (an internet connection): curl wget
+- gcc g++ make autoconf automake libtool pkg-config patch git openssl 
+libreadline6 libreadline6-dev git-core zlib1g zlib1g-dev libssl-dev
+libc6-dev ncurses-dev bison subversion libxml2 libxml2-dev
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/msgfplus_search.xml	Mon Mar 04 20:48:04 2013 -0500
@@ -0,0 +1,140 @@
+<tool id="proteomics_search_msgfplus_1" name="MSGF+ MSMS Search" version="1.0.0">
+
+	<requirements>
+    	<requirement type="package" version="1.1.9">galaxy_protk</requirement>
+    	<requirement type="package" version="20121116">msgfplus</requirement>
+    	<package name="proteowizard" version="3_0_4388">
+   	</requirements>
+
+
+	<description>Run an MSGF+ Search</description>
+
+	<command>
+		#if $database.source_select=="built_in":
+		rvm 1.9.3@protk-1.1.9 do msgfplus_search.rb --galaxy -d $database.dbkey
+		#else #rvm 1.9.3@protk-1.1.9 do msgfplus_search.rb -d $database.fasta_file
+		#end if
+
+		--var-mods='
+		$variable_mods
+		#for $custom_variable_mod in $custom_variable_mods:
+		,${custom_variable_mod.custom_mod}
+		#end for
+		'
+
+		--fix-mods='
+		$fixed_mods
+		#for $custom_fix_mod in $custom_fix_mods:
+		,${custom_fix_mod.custom_mod}
+		#end for
+		'
+
+		$input_file -o $output -r --enzyme=$enzyme --precursor-ion-tol-units=$precursor_tolu -v $missed_cleavages -f $fragment_ion_tol -p $precursor_ion_tol --instrument=$instrument
+		
+		
+		
+	</command>
+
+	<inputs>	
+		<conditional name="database">
+			<param name="source_select" type="select" label="Database source">
+				<option value="built_in">Built-In</option>
+				<option value="input_ref">Your Upload File</option>
+			</param>
+			<when value="built_in">
+				<param name="dbkey" type="select" format="text" >
+					<label>Database</label>
+					<options from_file="pepxml_databases.loc">
+						<column name="name" index="0" />
+						<column name="value" index="2" />
+					</options>
+				</param>
+			</when>
+			<when value="input_ref">
+				<param name="fasta_file" type="data" format="fasta" label="Uploaded FASTA file" />
+			</when>
+		</conditional>
+		
+		<param name="input_file" type="data" format="mzml" multiple="false" label="MSMS File" help="An mzML file with MS/MS data"/>
+
+
+		<param name="variable_mods" format="text" type="select" multiple="true" label="Variable Modifications" help="Hold the appropriate key while
+			clicking to select multiple items">
+			<options from_file="msgfplus_mods.loc">
+				<column name="name" index="0" />
+				<column name="value" index="2" />
+			</options>
+		</param>
+
+		<repeat name="custom_variable_mods" title="Custom Variable Modifications" help="See https://bix-lab.ucsd.edu/pages/viewpage.action?pageId=13533355 for details on how to create these">
+			<param name="custom_mod" type="text">
+			</param>
+		</repeat>
+		
+		
+		<param name="fixed_mods" format="text" type="select" multiple="true" label="Fixed Modifications" help="Hold the appropriate key while
+			clicking to select multiple items">
+			<options from_file="msgfplus_mods.loc">
+				<column name="name" index="0" />
+				<column name="value" index="2" />
+			</options>
+		</param>
+
+		<repeat name="custom_fix_mods" title="Custom Fixed Modifications" help="See https://bix-lab.ucsd.edu/pages/viewpage.action?pageId=13533355 for details on how to create these">
+			<param name="custom_mod" type="text">
+			</param>
+		</repeat>
+		
+		
+
+		<param name="missed_cleavages" type="select" format="text" help="Allow peptides to contain up to this many missed enzyme cleavage sites">
+			<label>Missed Cleavages Allowed</label>
+		    <option value="0">0</option>		
+			<option value="1">1</option>
+			<option value="2">2</option>
+		</param>
+		
+		<param name="enzyme" type="select" format="text">
+		    <label>Enzyme</label>
+		    <option value="Trypsin">Trypsin</option>
+		</param>
+		
+		<param name="instrument" type="select" format="text">
+	    	<label>Instrument Type</label>
+			<option value="2">TOF</option>
+			<option value="0">Low-res LCQ/LTQ</option>
+			<option value="1">High-res LTQ</option>
+		</param>
+
+		<param name="fragment_ion_tol" help="Fragment Ion Tolerance in Daltons" type="float" value="0.65" min="0" max="10000" label="Fragment ion tolerance"/>
+
+		<param name="precursor_ion_tol" help="Precursor Ion Tolerance (Da or ppm)" type="float" value="100" min="0" max="10000" label="Precursor ion tolerance"/>
+		<param name="precursor_tolu" type="select" format="text">
+		    <label>Precursor Ion Tolerance Units</label>
+		    <option value="ppm">ppm</option>		
+			<option value="Da">Da</option>
+		</param>
+		
+	</inputs>
+
+
+	<outputs>
+		<data format="raw_pepxml" name="output" metadata_source="input_file" label="MSGF+_vs_${database.dbkey if $database.has_key('dbkey') else $database.fasta_file.display_name}.${input_file.display_name}.${input_file.display_name}.pepXML"/>
+	</outputs>
+
+
+  <help>
+
+**What it does**
+
+Runs an MS/MS database search using the MSGFPlus search engine. Output is in the form of a pepXML file containing identified peptides along with their raw search scores.
+
+----
+
+**References**
+
+Please see http://proteomics.ucsd.edu/Software/MSGFPlus.html for details of the MSGFPlus search engine and references describing its algorithm
+
+  </help>
+
+</tool>
--- a/repository_dependencies.xml	Mon Mar 04 20:06:45 2013 -0500
+++ b/repository_dependencies.xml	Mon Mar 04 20:48:04 2013 -0500
@@ -1,10 +1,10 @@
 <?xml version="1.0"?>
-<!-- <repositories description="protk-msgfplus requires the protk rubygem to install MSGF+">
-     <repository toolshed="http://10.211.55.28:9009" name="galaxy_protk" owner="iracooke" changeset_revision="2562efef05cb"/>
-</repositories>
- -->
+<repositories description="Proteomics datatypes, MSGF+ and Protk">
+
+     <repository toolshed="http://toolshed.g2.bx.psu.edu" name="proteomics_datatypes" owner="iracooke" changeset_revision="463328a6967f"/>
 
+	<repository toolshed="http://toolshed.g2.bx.psu.edu" name="galaxy_protk" owner="iracooke" changeset_revision="51f7c347c955"/>
 
- <repositories description="Requires the protk rubygem to install MSGF+">
-     <repository toolshed="http://toolshed.g2.bx.psu.edu" name="galaxy_protk" owner="iracooke" changeset_revision="51f7c347c955"/>
-</repositories>
\ No newline at end of file
+     <repository toolshed="http://toolshed.g2.bx.psu.edu" name="protk_msgfplus" owner="iracooke" changeset_revision="528bcbb94199"/>
+
+ </repositories>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/msgfplus_mods.loc.sample	Mon Mar 04 20:48:04 2013 -0500
@@ -0,0 +1,50 @@
+#This file lists the names of inbuilt chemical modifications accepted by msgfplus
+#Each entry consists of 4 tab separated fields like this
+#<Displayed Name>	<DBKey>	<Modification String>	<DBKey>
+#
+#Modification strings should conform to the standard MSGFPlus syntax with the following exception
+#The ModType field is overridden by the msgfplus_search.rb tool. In other words any of these mods
+#May be passed to the tool as a variable or fixed mod and the tool will substitute the appropriate ModType
+#value
+#
+#Standard MSGFPlus syntax is
+#
+# To input a modification, use the following command:
+# Mass or CompositionStr, Residues, ModType, Position, Name (all the five fields are required).
+# CompositionStr (C[Num]H[Num]N[Num]O[Num]S[Num]P[Num])
+# 	- C (Carbon), H (Hydrogen), N (Nitrogen), O (Oxygen), S (Sulfer) and P (Phosphorus) are allowed.
+# 	- Atom can be omitted. The sequence of atoms must be followed. 
+# 	- Negative numbers are allowed.
+# 	- E.g. C2H2O1 (valid), H2C1O1 (invalid) 
+# Mass can be used instead of CompositionStr. It is important to specify accurate masses (integer masses are insufficient).
+# 	- E.g. 15.994915 
+# Residues: affected amino acids (must be upper letters)
+# 	- Must be uppor letters or *
+# 	- Use * if this modification is applicable to any residue. 
+# 	- * should not be "anywhere" modification (e.g. "15.994915, *, opt, any, Oxidation" is not allowed.) 
+# 	- E.g. NQ, *
+# ModType: "fix" for fixed modifications, "opt" for variable modifications (case insensitive)
+# Position: position in the peptide where the modification can be attached. 
+# 	- One of the following five values should be used:
+# 	- any (anywhere), N-term (peptide N-term), C-term (peptide C-term), Prot-N-term (protein N-term), Prot-C-term (protein C-term) 
+# 	- Case insensitive
+# 	- "-" can be omitted
+# 	- E.g. any, Any, Prot-n-Term, ProtNTerm => all valid
+# Name: name of the modification (Unimod PSI-MS name)
+# 	- For proper mzIdentML output, this name should be the same as the Unimod PSI-MS name
+# 	- E.g. Phospho, Acetyl
+#C2H3N1O1,C,fix,any,Carbamidomethyl 		# Fixed Carbamidomethyl C
+# Variable Modifications (default: none)
+#O1,M,opt,any,Oxidation				# Oxidation M
+#15.994915,M,opt,any,Oxidation			# Oxidation M (mass is used instead of CompositionStr)
+#H-1N-1O1,NQ,opt,any,Deamidated			# Negative numbers are allowed.
+#C2H3NO,*,opt,N-term,Carbamidomethyl		# Variable Carbamidomethyl N-term
+#H-2O-1,E,opt,N-term,Pyro_glu			# Pyro-glu from E
+#H-3N-1,Q,opt,N-term,Pyro-glu			# Pyro-glu from Q
+#C2H2O,*,opt,Prot-N-term,Acetyl			# Acetylation Protein N-term
+#C2H2O1,K,opt,any,Acetyl			# Acetylation K
+#CH2,K,opt,any,Methy				# Methylation K
+#HO3P,STY,opt,any,Phospho			# Phosphorylation STY
+
+Carbamidomethyl C	carbamidomethyl_c_	C2H3N1O1,C,opt,any,Carbamidomethyl	carbamidomethyl_c_
+Oxidation M	oxidation_m_	O1,M,opt,any,Oxidation	oxidation_m_
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/pepxml_databases.loc.sample	Mon Mar 04 20:48:04 2013 -0500
@@ -0,0 +1,13 @@
+#This file lists the names of protein databases installed locally in protk. 
+# These are used by omssa and x!tandem as well as the "mascot to pepxml" tool
+# In order to combine search results with Interprophet searches must be run against an identical database
+#
+# Entries should follow the be structured as follows
+# Display_name omssa_tandem_dbname dbkey
+#
+#
+Swissprot	spall_	spall	spall_
+Combined PlasmboDB (falciparum) and Swissprot Human	plasmodb_pfalciparum_sphuman_	plasmodb_pfalciparum_sphuman	plasmodb_pfalciparum_sphuman_
+Swissprot Human	sphuman_	sphuman	sphuman_
+Combined Swissprot/TRembl Human	sptrhuman_	sptrhuman	sptrhuman_
+Swissprot Mouse	spmouse_	spmouse	spmouse_
--- a/tool_dependencies.xml	Mon Mar 04 20:06:45 2013 -0500
+++ b/tool_dependencies.xml	Mon Mar 04 20:48:04 2013 -0500
@@ -1,23 +1,17 @@
 <?xml version="1.0"?>
 <tool_dependency>
-    <package name="msgfplus" version="20130227">
-        <install version="1.0">
-            <actions>
-                <action type="download_by_url">https://dl.dropbox.com/u/226794/galaxy_protk.tar.gz</action>
-                <action type="shell_command">sh install_msgfplus.sh $INSTALL_DIR 1.1.9</action>
-                <action type="move_file">
-                    <source>install_msgfplus.sh</source>
-                    <destination>$INSTALL_DIR</destination>
-                </action>
-                <action type="set_environment">
-                    <environment_variable name="PROTK_MSGFPLUS_ROOT" action="set_to">$INSTALL_DIR</environment_variable>
-                    <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR</environment_variable>
-                </action>
-            </actions>
-        </install>
-        <readme>
-            Installs MSGF+ via the protk installer
-        </readme>
+
+    <package name="galaxy_protk" version="1.1.9">
+	     <repository toolshed="http://toolshed.g2.bx.psu.edu" name="galaxy_protk" owner="iracooke" changeset_revision="51f7c347c955"/>
     </package>
 
+	<package name="proteowizard" version="3_0_4388">
+	     <repository toolshed="http://toolshed.g2.bx.psu.edu" name="protk_proteowizard" owner="iracooke" changeset_revision="6ead3520e93a"/>
+	</package>
+
+    <package name="msgfplus" version="20130227">
+	     <repository toolshed="http://toolshed.g2.bx.psu.edu" name="protk_msgfplus" owner="iracooke" changeset_revision="528bcbb94199"/>
+    </package>
+
+
 </tool_dependency>
\ No newline at end of file