changeset 7:75184e45bf84 draft

Uploaded
author iracooke
date Fri, 13 Jun 2014 18:26:31 -0400
parents f36b84d1ac09
children f567d1818b84
files README mascot.xml mascot_to_pepxml.xml repository_dependencies.xml tool-data/mascot_databases.loc.sample tool-data/mascot_mods.loc.sample tool-data/pepxml_databases.loc.sample
diffstat 7 files changed, 403 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README	Fri Jun 13 18:26:31 2014 -0400
@@ -0,0 +1,9 @@
+This includes tools for running Mascot MS/MS searches
+
+Running this tool requires a working installation of Mascot (commercial software).
+Mascot is available from http://www.matrixscience.com/
+
+Requirements:
+This package uses protk which must be installed separately. 
+
+For instructions please see: https://github.com/iracooke/protk/#galaxy-integration
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mascot.xml	Fri Jun 13 18:26:31 2014 -0400
@@ -0,0 +1,205 @@
+<tool id="proteomics_search_mascot_1" name="Mascot MSMS Search" version="1.0.2">
+
+	<requirements>
+	    <requirement type="package" version="1.3">protk</requirement>
+   </requirements>
+
+	<description>Mascot MS/MS Search</description>
+
+
+	<command>mascot_search.rb 
+
+		#if $database.source_select=="built_in":
+		-d $database.dbkey
+		#else 
+		-d $database.custom_db
+		#end if
+
+		-f $fragment_ion_tol 
+
+		-S $server $input_file 
+
+		-o $output 
+
+		-r
+		
+		## Variable Mods
+
+		--var-mods='
+		$variable_mods
+		'
+		
+		--fix-mods='
+		$fixed_mods
+		'		
+		--allowed-charges=$allowed_charges	
+
+		--enzyme=$enzyme 
+
+		--instrument=$instrument 
+
+		--precursor-ion-tol-units=$precursor_tolu 
+
+		--email=$email 
+
+		-v $missed_cleavages
+
+		#if $security.security_use
+		--use-security
+		--username $security.username
+		--password $security.password
+		#end if
+
+		#if $proxy
+		--proxy $proxy
+		#end if
+
+	</command>
+
+	<inputs>
+	
+    <param name="input_file" type="data" format="mgf" multiple="false" label="MSMS File" help="A Mascot Generic Format file containing MSMS Spectra"/>
+
+	<conditional name="database">
+		<param name="source_select" type="select" label="Database Type">
+			<option value="built_in">Built-In</option>
+			<option value="custom_defined">Custom</option>
+		</param>
+		<when value="built_in">
+			<param name="dbkey" type="select" format="text" >
+				<label>Database</label>
+				<options from_file="mascot_databases.loc">
+					<column name="name" index="0" />
+					<column name="value" index="2" />
+				</options>
+			</param>
+		</when>
+		<when value="custom_defined">
+			<param name="custom_db" type="text" size="80" value="SwissProt" label="Database Name" help="Exact name of a database defined on the Mascot server (case-sensitive)"/>
+		</when>
+	</conditional>
+
+	<param name="variable_mods" format="text" type="select" multiple="true" label="Variable Modifications" help="Multiple values allowed">
+		<options from_file="mascot_mods.loc">
+			<column name="name" index="0" />
+			<column name="value" index="2" />
+		</options>
+	</param>		
+
+	<param name="fixed_mods" format="text" type="select" multiple="true" label="Fixed Modifications" help="Multiple values allowed">
+		<options from_file="mascot_mods.loc">
+			<column name="name" index="0" />
+			<column name="value" index="2" />
+		</options>
+	</param>
+	
+	<param name="missed_cleavages" type="select" format="text">
+		<label>Missed Cleavages Allowed</label>
+	    <option value="0">0</option>		
+		<option value="1">1</option>
+		<option value="2">2</option>
+	</param>
+	
+	<param name="enzyme" type="select" format="text">
+	    <label>Enzyme</label>
+	    <option value="Trypsin">Trypsin</option>
+	    <option value="Trypsin/P">Trypsin/P</option>
+	    <option value="Arg-C">Arg-C</option>
+	    <option value="Asp-N">Asp-N</option>
+	    <option value="Asp-N_ambic">Asp-N_ambic</option>
+	    <option value="Chymotrypsin">Chymotrypsin</option>
+	    <option value="CNBr">CNBr</option>
+	    <option value="CNBr+Trypsin">CNBr+Trypsin</option>
+	    <option value="Formic_acid">Formic_acid</option>
+	    <option value="Lys-C">Lys-C</option>
+	    <option value="Lys-C/P">Lys-C/P</option>
+	   	<option value="LysC+AspN">LysC+AspN</option>
+	   	<option value="Lys-N">Lys-N</option>
+	   	<option value="PepsinA">PepsinA</option>
+	   	<option value="semiTrypsin">semiTrypsin</option>
+	   	<option value="TrypChymo">TrypChymo</option>	   	   	
+	   	<option value="TrypsinMSIPI">TrypsinMSIPI</option>	
+	   	<option value="TrypsinMSIPI/P">TrypsinMSIPI/P</option>
+	   	<option value="V8-DE">V8-DE</option>
+	   	<option value="V8-E">V8-E</option>
+	   	<option value="none">none</option>	   	
+	</param>
+	
+	<param name="allowed_charges" type="select" format="text">
+	    <label>Peptide Charge</label>
+		<option value="8-">8-</option>
+		<option value="7-">7-</option>
+		<option value="6-">6-</option>
+		<option value="5-">5-</option>
+		<option value="4-">4-</option>
+		<option value="3-">3-</option>
+		<option value="2-,3- and 4-">2-,3- and 4-</option>
+		<option value="2- and 3-">2- and 3-</option>
+		<option value="2-">2-</option>
+		<option value="1-,2- and 3-">1-,2- and 3-</option>
+		<option value="1-">1-</option>
+		<option value="Mr">Mr</option>
+		<option value="1+">1+</option>
+	    <option value="1+, 2+ and 3+">1+,2+ and 3+</option>		
+		<option value="2+">2+</option>
+		<option value="2+ and 3+">2+ and 3+</option>
+		<option value="2+,3+ and 4+">2+,3+ and 4+</option>
+		<option value="3+">3+</option>
+		<option value="4+">4+</option>
+		<option value="5+">5+</option>
+		<option value="6+">6+</option>
+		<option value="7+">7+</option>
+		<option value="8+">8+</option>
+	</param>
+	
+	<param name="instrument" type="select" format="text">
+	    <label>Instrument</label>
+		<option value="ESI-QUAD-TOF">ESI-QUAD-TOF</option>
+		<option value="MALDI-TOF-PSD">MALDI-TOF-PSD</option>
+		<option value="ESI-TRAP">ESI-TRAP</option>
+		<option value="ESI-QUAD">ESI-QUAD</option>
+		<option value="ESI-FTICR">ESI-FTICR</option>
+	    <option value="MALDI-TOF-TOF">MALDI-TOF-TOF</option>
+		<option value="ESI-4SECTOR">ESI-4SECTOR</option>
+		<option value="FTMS-ECD">FTMS-ECD</option>
+		<option value="ETD-TRAP">ETD-TRAP</option>
+		<option value="MALDI-QUAD-TOF">MALDI-QUAD-TOF</option>
+		<option value="MALDI-QIT-TOF">MALDI-QIT-TOF</option>
+		<option value="MALDI-ISD">MALDI-ISD</option>
+		<option value="CID+ETD">CID+ETD</option>
+	</param>
+	
+	<param name="fragment_ion_tol" label="Fragment Ion Tolerance" type="float" value="0.65" min="0" max="10000" help="Fragment ion tolerance in Daltons"/>
+
+	<param name="precursor_ion_tol" label="Precursor Ion Tolerance (Da or ppm)" type="float" value="100" min="0" max="10000" help="Enter a value in Daltons or ppm depending on the units chosen below"/>
+	<param name="precursor_tolu" type="select" format="text">
+	    <label>Precursor Ion Tolerance Units</label>
+	    <option value="ppm">ppm</option>		
+		<option value="Da">Da</option>
+	</param>
+
+	<param name="server" type="text" label="URL to the cgi directory on the Mascot Server " size="60" value="http://www.exampleserver.com/mascot/cgi/"/>
+	<param name="proxy" type="text" label="Proxy Server URL including proxy port" size="60" value="" help="eg http://proxy.latrobe.edu.au:8080"/>
+
+	<conditional name="security">
+		<param name="security_use" type="boolean" label="Use Mascot Security?" help="Select this if you need to enter a username and password" truevalue="true" falsevalue="false" />
+		<when value="false" />
+		<when value="true">
+			<param name="username" type="text" label="Username" size="60" value="" help="Username on Mascot Server"/>
+			<param name="password" type="text" label="Password" size="60" value="" help="Mascot Password. Password is encrypted when over the internet but is stored in clear-text on the Galaxy server"/>
+		</when>
+	</conditional>
+	<param name="email" type="text" label="Email " size="60" value="" help=""/>
+	
+	
+
+  </inputs>
+  <outputs>
+    <data format="mascotdat" name="output" metadata_source="input_file" label="mascot_vs_${database.dbkey if $database.has_key('dbkey') else $database.custom_db}.${input_file.display_name}.mascotdat"/>
+  </outputs>
+
+  <help>
+	Run a Mascot MS/MS Ion Search
+  </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mascot_to_pepxml.xml	Fri Jun 13 18:26:31 2014 -0400
@@ -0,0 +1,82 @@
+<tool id="mascot_to_pepxml_1" name="Mascot to pepXML" version="1.0.2">
+	<requirements>
+	    <requirement type="package" version="1.3">protk</requirement>
+	    <requirement type="package" version="4.6.3">trans_proteomic_pipeline</requirement>
+   </requirements>
+
+  <description>Converts a mascot results file to pepXML</description>
+
+<command>mascot_to_pepxml.rb 
+	$input_file 
+
+	-o $output 
+
+	#if $database.source_select=="built_in":
+	-d $database.dbkey
+	#else 
+	-d $database.fasta_file
+	#end if
+
+	#if $explicit_enzyme.explicit_enzyme_use
+	--enzyme $explicit_enzyme.enzyme
+	#end if
+
+	$shortid
+
+</command>
+<inputs>
+
+
+	<param name="input_file" type="data" format="mascotdat" multiple="false" label="Input File" help="Mascot results file"/>
+
+	<conditional name="database">
+		<param name="source_select" type="select" label="Database source" help="A local copy of the database used in the Mascot search">
+			<option value="built_in">Built-In</option>
+			<option value="input_ref">Uploaded fasta file</option>
+		</param>
+		<when value="built_in">
+			<param name="dbkey" type="select" format="text" >
+				<label>Database</label>
+				<options from_file="pepxml_databases.loc">
+					<column name="name" index="0" />
+					<column name="value" index="2" />
+				</options>
+			</param>
+		</when>
+		<when value="input_ref">
+			<param name="fasta_file" type="data" format="fasta" label="Uploaded FASTA file" />
+		</when>
+	</conditional>
+
+	<param name="shortid" type="boolean" label="Short ID" help="Use protein id from the Mascot result file instead reading from the fasta database." truevalue="--shortid" falsevalue="" />
+
+	<conditional name="explicit_enzyme">
+		<param name="explicit_enzyme_use" type="boolean" label="Specify Enzyme" help="If left unchecked the Enzyme will be read from the input file" truevalue="true" falsevalue="false" />
+		<when value="false" />
+		<when value="true">
+			<param name="enzyme" type="text" label="Enzyme" size="80" value="trypsin" help="Semi-cleavage can be specified as semisample_enyzme eg semitrypsin"/>
+		</when>
+	</conditional>
+
+
+
+</inputs>
+<outputs>
+	<data format="raw_pepxml" metadata_source="input_file" name="output" label="${input_file.display_name}.pepXML" />
+</outputs>
+
+	<tests>
+    	<test>
+    		<param name="source_select" value="input_ref"/>
+	      	<param name="fasta_file" value="bsa.fasta"/>
+   		   	<param name="input_file" value="F002832.dat"/>
+      		<output name="output" file="bsa_mascot2xml.pep.xml" compare="sim_size" delta="600" /> 
+    	</test>
+    	
+  	</tests>
+
+<help>
+	Convert mascot results from mascotdat to pepXML
+</help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml	Fri Jun 13 18:26:31 2014 -0400
@@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<repositories description="Proteomics datatypes">
+    <repository changeset_revision="f66f8ca7b7b9" name="proteomics_datatypes" owner="iracooke" toolshed="http://toolshed.g2.bx.psu.edu" />
+ </repositories>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/mascot_databases.loc.sample	Fri Jun 13 18:26:31 2014 -0400
@@ -0,0 +1,13 @@
+#This file lists the names of protein databases installed on Mascot
+#
+#In order to use interprophet to combine results from different search engines
+#it is important that all searches are performed on the same database
+#you should therefore ensure that each database installed on mascot has an equivalent
+#database installed in the Protk databases directory (databases used by omssa and x!tandem)
+#the mascot_to_pepxml tool will ask for this database when performing the conversion.
+#
+# Entries should follow the be structured as follows
+# Display_name dbkey dbNameOnMascot dbkey
+#
+Swissprot	spall_	SwissProt	spall_
+Swissprot Human	sphuman_	SPHuman	sphuman_
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/mascot_mods.loc.sample	Fri Jun 13 18:26:31 2014 -0400
@@ -0,0 +1,77 @@
+#This file lists the names of chemical modifications acceptable for proteomics search engines
+#
+#
+Acetyl (K)	acetyl_k_	Acetyl (K)	acetyl_k_
+Acetyl (N-term)	acetyl_n-term_	Acetyl (N-term)	acetyl_n-term_
+Acetyl (Protein N-term)	acetyl_proteinn-term_	Acetyl (Protein N-term)	acetyl_proteinn-term_
+Amidated (C-term)	amidated_c-term_	Amidated (C-term)	amidated_c-term_
+Amidated (Protein C-term)	amidated_proteinc-term_	Amidated (Protein C-term)	amidated_proteinc-term_
+Ammonia-loss (N-term C)	ammonia-loss_n-termc_	Ammonia-loss (N-term C)	ammonia-loss_n-termc_
+Biotin (K)	biotin_k_	Biotin (K)	biotin_k_
+Biotin (N-term)	biotin_n-term_	Biotin (N-term)	biotin_n-term_
+Carbamidomethyl (C)	carbamidomethyl_c_	Carbamidomethyl (C)	carbamidomethyl_c_
+Carbamyl (K)	carbamyl_k_	Carbamyl (K)	carbamyl_k_
+Carbamyl (N-term)	carbamyl_n-term_	Carbamyl (N-term)	carbamyl_n-term_
+Carboxymethyl (C)	carboxymethyl_c_	Carboxymethyl (C)	carboxymethyl_c_
+Cation:Na (C-term)	cation_na_c-term_	Cation:Na (C-term)	cation_na_c-term_
+Cation:Na (DE)	cation_na_de_	Cation:Na (DE)	cation_na_de_
+Deamidated (NQ)	deamidated_nq_	Deamidated (NQ)	deamidated_nq_
+Deamidated-N (N)	deamidated-n_n_	Deamidated-N (N)	deamidated-n_n_
+Dehydrated (N-term C)	dehydrated_n-termc_	Dehydrated (N-term C)	dehydrated_n-termc_
+Dehydro (C)	dehydro_c_	Dehydro (C)	dehydro_c_
+Dioxidation (M)	dioxidation_m_	Dioxidation (M)	dioxidation_m_
+Ethanolyl (C)	ethanolyl_c_	Ethanolyl (C)	ethanolyl_c_
+ExacTagAmine (K)	exactagamine_k_	ExacTagAmine (K)	exactagamine_k_
+ExacTagThiol (C)	exactagthiol_c_	ExacTagThiol (C)	exactagthiol_c_
+Formyl (N-term)	formyl_n-term_	Formyl (N-term)	formyl_n-term_
+Formyl (Protein N-term)	formyl_proteinn-term_	Formyl (Protein N-term)	formyl_proteinn-term_
+Gln->pyro-Glu (N-term Q)	gln_pyro-glu_n-termq_	Gln->pyro-Glu (N-term Q)	gln_pyro-glu_n-termq_
+Glu->pyro-Glu (N-term E)	glu_pyro-glu_n-terme_	Glu->pyro-Glu (N-term E)	glu_pyro-glu_n-terme_
+Guanidinyl (K)	guanidinyl_k_	Guanidinyl (K)	guanidinyl_k_
+ICAT-C (C)	icat-c_c_	ICAT-C (C)	icat-c_c_
+ICAT-C:13C(9) (C)	icat-c_13c_9__c_	ICAT-C:13C(9) (C)	icat-c_13c_9__c_
+ICPL (K)	icpl_k_	ICPL (K)	icpl_k_
+ICPL (Protein N-term)	icpl_proteinn-term_	ICPL (Protein N-term)	icpl_proteinn-term_
+ICPL:13C(6) (K)	icpl_13c_6__k_	ICPL:13C(6) (K)	icpl_13c_6__k_
+ICPL:13C(6) (Protein N-term)	icpl_13c_6__proteinn-term_	ICPL:13C(6) (Protein N-term)	icpl_13c_6__proteinn-term_
+ICPL:13C(6)2H(4) (K)	icpl_13c_6_2h_4__k_	ICPL:13C(6)2H(4) (K)	icpl_13c_6_2h_4__k_
+ICPL:13C(6)2H(4) (N-term)	icpl_13c_6_2h_4__n-term_	ICPL:13C(6)2H(4) (N-term)	icpl_13c_6_2h_4__n-term_
+ICPL:13C(6)2H(4) (Protein N-term)	icpl_13c_6_2h_4__proteinn-term_	ICPL:13C(6)2H(4) (Protein N-term)	icpl_13c_6_2h_4__proteinn-term_
+ICPL:2H(4) (K)	icpl_2h_4__k_	ICPL:2H(4) (K)	icpl_2h_4__k_
+ICPL:2H(4) (Protein N-term)	icpl_2h_4__proteinn-term_	ICPL:2H(4) (Protein N-term)	icpl_2h_4__proteinn-term_
+iTRAQ4plex (K)	itraq4plex_k_	iTRAQ4plex (K)	itraq4plex_k_
+iTRAQ4plex (N-term)	itraq4plex_n-term_	iTRAQ4plex (N-term)	itraq4plex_n-term_
+iTRAQ4plex (Y)	itraq4plex_y_	iTRAQ4plex (Y)	itraq4plex_y_
+iTRAQ8plex (K)	itraq8plex_k_	iTRAQ8plex (K)	itraq8plex_k_
+iTRAQ8plex (N-term)	itraq8plex_n-term_	iTRAQ8plex (N-term)	itraq8plex_n-term_
+iTRAQ8plex (Y)	itraq8plex_y_	iTRAQ8plex (Y)	itraq8plex_y_
+Label:18O(1) (C-term)	label_18o_1__c-term_	Label:18O(1) (C-term)	label_18o_1__c-term_
+Label:18O(2) (C-term)	label_18o_2__c-term_	Label:18O(2) (C-term)	label_18o_2__c-term_
+Met->Hse (C-term M)	met_hse_c-termm_	Met->Hse (C-term M)	met_hse_c-termm_
+Met->Hsl (C-term M)	met_hsl_c-termm_	Met->Hsl (C-term M)	met_hsl_c-termm_
+Methyl (C-term)	methyl_c-term_	Methyl (C-term)	methyl_c-term_
+Methyl (DE)	methyl_de_	Methyl (DE)	methyl_de_
+Methylthio (C)	methylthio_c_	Methylthio (C)	methylthio_c_
+mTRAQ (K)	mtraq_k_	mTRAQ (K)	mtraq_k_
+mTRAQ (N-term)	mtraq_n-term_	mTRAQ (N-term)	mtraq_n-term_
+mTRAQ (Y)	mtraq_y_	mTRAQ (Y)	mtraq_y_
+mTRAQ:13C(3)15N(1) (K)	mtraq_13c_3_15n_1__k_	mTRAQ:13C(3)15N(1) (K)	mtraq_13c_3_15n_1__k_
+mTRAQ:13C(3)15N(1) (N-term)	mtraq_13c_3_15n_1__n-term_	mTRAQ:13C(3)15N(1) (N-term)	mtraq_13c_3_15n_1__n-term_
+mTRAQ:13C(3)15N(1) (Y)	mtraq_13c_3_15n_1__y_	mTRAQ:13C(3)15N(1) (Y)	mtraq_13c_3_15n_1__y_
+NIPCAM (C)	nipcam_c_	NIPCAM (C)	nipcam_c_
+Oxidation (HW)	oxidation_hw_	Oxidation (HW)	oxidation_hw_
+Oxidation (M)	oxidation_m_	Oxidation (M)	oxidation_m_
+Phospho (ST)	phospho_st_	Phospho (ST)	phospho_st_
+Phospho (Y)	phospho_y_	Phospho (Y)	phospho_y_
+Propionamide (C)	propionamide_c_	Propionamide (C)	propionamide_c_
+Pyridylethyl (C)	pyridylethyl_c_	Pyridylethyl (C)	pyridylethyl_c_
+Pyro-carbamidomethyl (N-term C)	pyro-carbamidomethyl_n-termc_	Pyro-carbamidomethyl (N-term C)	pyro-carbamidomethyl_n-termc_
+Sulfo (S)	sulfo_s_	Sulfo (S)	sulfo_s_
+Sulfo (T)	sulfo_t_	Sulfo (T)	sulfo_t_
+Sulfo (Y)	sulfo_y_	Sulfo (Y)	sulfo_y_
+TMT (K)	tmt_k_	TMT (K)	tmt_k_
+TMT (N-term)	tmt_n-term_	TMT (N-term)	tmt_n-term_
+TMT2plex (K)	tmt2plex_k_	TMT2plex (K)	tmt2plex_k_
+TMT2plex (N-term)	tmt2plex_n-term_	TMT2plex (N-term)	tmt2plex_n-term_
+TMT6plex (K)	tmt6plex_k_	TMT6plex (K)	tmt6plex_k_
+TMT6plex (N-term)	tmt6plex_n-term_	TMT6plex (N-term)	tmt6plex_n-term_
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/pepxml_databases.loc.sample	Fri Jun 13 18:26:31 2014 -0400
@@ -0,0 +1,13 @@
+#This file lists the names of protein databases installed locally in protk. 
+# These are used by omssa and x!tandem as well as the "mascot to pepxml" tool
+# In order to combine search results with Interprophet searches must be run against an identical database
+#
+# Entries should follow the be structured as follows
+# Display_name omssa_tandem_dbname dbkey
+#
+#
+Swissprot	spall_	SwissProt	spall_
+Combined PlasmboDB (falciparum) and Swissprot Human	plasmodb_pfalciparum_sphuman_	plasmodb_pfalciparum_sphuman	plasmodb_pfalciparum_sphuman_
+Swissprot Human	sphuman_	sphuman	sphuman_
+Combined Swissprot/TRembl Human	sptrhuman_	sptrhuman	sptrhuman_
+Swissprot Mouse	spmouse_	spmouse	spmouse_