changeset 17:40ec8770780d

* Added support for pepxml (and more specifically for ProteomeDiscoverer 1.4). Tested with ProteomeDiscoverer 1.4 pepxml. * Improved HTML report of NapQ tool. * Fixed issue that was preventing SEDMAT matching from running in parallel/multi-threaded.
author pieter.lukasse@wur.nl
date Mon, 14 Apr 2014 17:11:33 +0200
parents d56c18ed0f77
children ad911e9aaf33
files Csv2Apml.jar MsFilt.jar NapQ.jar PRIMS.jar ProgenesisConv.jar Quantifere.jar Quantiline.jar README.rst SedMat_cli.jar csv2apml.xml msfilt.xml napq.xml quantifere.xml sedmat.xml
diffstat 14 files changed, 86 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
Binary file Csv2Apml.jar has changed
Binary file MsFilt.jar has changed
Binary file NapQ.jar has changed
Binary file PRIMS.jar has changed
Binary file ProgenesisConv.jar has changed
Binary file Quantifere.jar has changed
Binary file Quantiline.jar has changed
--- a/README.rst	Fri Mar 28 14:43:46 2014 +0100
+++ b/README.rst	Mon Apr 14 17:11:33 2014 +0200
@@ -20,6 +20,11 @@
 ============== ======================================================================
 Date            Changes
 -------------- ----------------------------------------------------------------------
+April  2014    * Added support for pepxml (and more specifically for 
+                 ProteomeDiscoverer 1.4). Tested with ProteomeDiscoverer 1.4 pepxml. 
+               * Improved HTML report of NapQ tool.
+               * Fixed issue that was preventing SEDMAT matching from running  
+                 in parallel/multi-threaded.  
 January 2014   * first release via Tool Shed
 November 2013  * multiple tools used internally at PRI 
 end 2011       * first tool
Binary file SedMat_cli.jar has changed
--- a/csv2apml.xml	Fri Mar 28 14:43:46 2014 +0100
+++ b/csv2apml.xml	Mon Apr 14 17:11:33 2014 +0200
@@ -22,7 +22,7 @@
 		       label="Column name for precursor m/z" />     			
 
 		<param name="rt" type="text" optional="false" size="30" 
-		       label="Column name for precursor rt" />
+		       label="Column name for precursor rt  (nb: rt in minutes)" />
      	
 		<param name="charge" type="text" optional="false" size="30" 
 		       label="Column name for precursor charge (z)" />
--- a/msfilt.xml	Fri Mar 28 14:43:46 2014 +0100
+++ b/msfilt.xml	Mon Apr 14 17:11:33 2014 +0200
@@ -1,4 +1,4 @@
-<tool name="MsFilt" id="msfilt" version="1.0.3">
+<tool name="MsFilt" id="msfilt" version="1.0.4">
 	<description>Filters annotations based MS/MS peptide identification and annotation quality measures</description>
 	<!-- 
 	   For remote debugging start you listener on port 8000 and use the following as command interpreter:
@@ -27,6 +27,10 @@
 	    -addRawIntensityInfo $addRawIntensityInfo
     	-outReport $htmlReportFile
 	    -outReportPicturesPath $htmlReportFile.files_path
+	    #if $containsPepxml.pepxmlInSet == True
+        	-pepxmlDataType $containsPepxml.pepxmlDataType
+			-pepxmlGeneratedBy $containsPepxml.pepxmlGeneratedBy
+        #end if
 	</command>
 	
 	<inputs>
@@ -37,9 +41,35 @@
    		               SEDMAT or Quantiline tools." />
    		
    		<repeat name="annotationSourceFiles" title="(Optional) Peptide identification files" help="Full set of MS/MS peptide identification files, including peptides that could not be quantified.">
-   			<param name="identificationsFile" type="data" format="apml,mzidentml,prims.fileset.zip" label="Identifications file (APML or MZIDENTML or MZIDENTML fileSet)" />
+   			<param name="identificationsFile" type="data" format="apml,pepxml,mzidentml,prims.fileset.zip" label="Identifications file (APML, pepxml, MZIDENTML or MZIDENTML fileSet)" />
    		</repeat>
    		
+   		<!-- ================== PEPXML specific ================== -->
+   		<conditional name="containsPepxml">
+	  		<param name="pepxmlInSet" type="boolean" truevalue="Yes" falsevalue="No" checked="false" 
+	    		label="Identifications set contains one or more files in pepxml format"
+	    		help="Indicate whether one or more (Optional) Peptide identification files is in pepxml format. Support for pepxml is still considered 'beta'."/> 
+   			<when value="Yes">
+	      		<param name="pepxmlDataType" type="select" label=">> Type of data stored in the pepxml"
+	      		       help="Options marked with (*) are ProteomeDiscoverer specific scenarios">
+			    	<option value="" selected="true">--Please select--</option>
+			    	<option value="single_2d" >2D LC-MS runs, one per msms_run_summary</option>
+			    	<option value="multi_2d">(*) 2D LC-MS runs, multiple runs (e.g. rx.F1 to rx.FN) merged as a 'single' msms_run_summary</option>
+			    	<option value="single_1d">1D LC-MS runs, one per msms_run_summary</option>
+				</param>
+				<param name="pepxmlGeneratedBy" type="select" label=">> pepxml generated by"
+					help="Some tools, like ProteomeDiscoverer 1.4, have specific issues in their pepxml generation logic. Correctly indicating the tool used here will ensure known issues are taken 
+					     into consideration when the file is parsed." >
+					<option value="" selected="true">--Please select--</option>
+					<option value="proteome_discoverer_v1.4">ProteomeDiscoverer 1.4</option>
+			    	<option value="other">Other</option> 
+				</param>
+	      	</when>
+	      	<when value="No">
+     		</when>
+     	</conditional>
+   		<!-- ================== END - PEPXML specific ================== -->
+   		
      	<!-- 
      	<param name="maxNrRankings" type="integer" size="10" value="0" label="Maximum nr. of items to leave in the final ranking (set=0 for no limit) " />
      	-->
@@ -62,13 +92,16 @@
 &#xd;&#xa;qmBCOS =&gt; best correlation with other sibling peptide (correl),1
 "/>
 
-		<param name="statisticalMeasuresConfig" type="text" area="true" size="6x70" label="Statistical measures configuration" 
+		<param name="statisticalMeasuresConfig" type="text" area="true" size="8x70" label="Statistical measures configuration" 
 		help="Here you may specify the statistical measures that are found in the ms/ms results (e.g. p or e-values). 
 		The format is: SM alias => SM name,type,mode[min/max]. "
 value="smXTD =&gt; MS:1001330,XSLASH!Tandem:expect,min
 &#xd;&#xa;pvCSVEX =&gt; p_value,CSV_EXPORT,min
 &#xd;&#xa;smAUTO_LIKELIHOOD =&gt; AUTOMOD_LOGLIKELIHOOD,PLGS/Auto-mod,max
 &#xd;&#xa;smLIKELIHOOD =&gt; LOGLIKELIHOOD,PLGS/Databank-search,max
+&#xd;&#xa;smPercoProb =&gt; Percolator: probability,Percolator probability,max
+&#xd;&#xa;smPercoPEP =&gt; Percolator: PEP,Percolator PEP,min
+&#xd;&#xa;smPercoQval =&gt; Percolator: q-Value,Percolator q-Value,max
 "/>
 
      	<param name="filterOutUnannotatedAlignments" type="boolean" checked="true" 
@@ -117,7 +150,9 @@
 		#for $i, $s in enumerate( $annotationSourceFiles )
 			${s.identificationsFile}
 			## also print out the datatype in the next line, based on previously configured datatype
-			#if isinstance( $s.identificationsFile.datatype, $__app__.datatypes_registry.get_datatype_by_extension('apml').__class__):
+			#if isinstance( $s.identificationsFile.datatype, $__app__.datatypes_registry.get_datatype_by_extension('pepxml').__class__):
+				pepxml
+			#elif isinstance( $s.identificationsFile.datatype, $__app__.datatypes_registry.get_datatype_by_extension('apml').__class__):
 				apml
 			#else:
         		mzid
--- a/napq.xml	Fri Mar 28 14:43:46 2014 +0100
+++ b/napq.xml	Mon Apr 14 17:11:33 2014 +0200
@@ -1,4 +1,4 @@
-<tool name="NapQ" id="napq" version="0.0.1">
+<tool name="NapQ" id="napq" version="0.0.3">
 	<description>'no alignment' (alignment-free) peptide quantification</description>
 	<!-- 
 	   For remote debugging start you listener on port 8000 and use the following as command interpreter:
--- a/quantifere.xml	Fri Mar 28 14:43:46 2014 +0100
+++ b/quantifere.xml	Mon Apr 14 17:11:33 2014 +0200
@@ -1,4 +1,4 @@
-<tool name="Quantifere" id="quantifere1" version="1.0.2">
+<tool name="Quantifere" id="quantifere1" version="1.0.3">
 	<description>Protein Inference by Peptide Quantification patterns</description>
 	<!-- 
 	   For remote debugging start you listener on port 8000 and use the following as command interpreter:
@@ -61,7 +61,7 @@
      		</when>
      	</conditional>
    		
-   		<param name="statisticalMeasuresConfig" type="text" area="true" size="6x70" label="Statistical measures configuration" 
+   		<param name="statisticalMeasuresConfig" type="text" area="true" size="8x70" label="Statistical measures configuration" 
 				help="Here you may specify the statistical measures that are found in the ms/ms results (e.g. p or e-values). 
 				The format is: SM alias => SM name,type,mode[min/max]. Leaving this configuration out while these are present in the
 				dataset will have the effect that they will be wrongly used as a regular scoring scheme, having effect on for example
@@ -70,6 +70,9 @@
 &#xd;&#xa;pvCSVEX =&gt; p_value,CSV_EXPORT,min
 &#xd;&#xa;smAUTO_LIKELIHOOD =&gt; AUTOMOD_LOGLIKELIHOOD,PLGS/Auto-mod,max
 &#xd;&#xa;smLIKELIHOOD =&gt; LOGLIKELIHOOD,PLGS/Databank-search,max
+&#xd;&#xa;smPercoProb =&gt; Percolator: probability,Percolator probability,max
+&#xd;&#xa;smPercoPEP =&gt; Percolator: PEP,Percolator PEP,min
+&#xd;&#xa;smPercoQval =&gt; Percolator: q-Value,Percolator q-Value,max
 "/>
 <!-- keep value attribute above aligned like this to avoid white spaces in the value -->				
    		<param name="quantificationDataToUse" type="select" 
--- a/sedmat.xml	Fri Mar 28 14:43:46 2014 +0100
+++ b/sedmat.xml	Mon Apr 14 17:11:33 2014 +0200
@@ -1,4 +1,4 @@
-<tool name="SedMat" id="sedmat1" version="1.0.2">
+<tool name="SedMat" id="sedmat1" version="1.0.3">
 	<description>Matches MS and MS/MS results</description>
 	<!-- 
 	   For remote debugging start you listener on port 8000 and use the following as command interpreter:
@@ -12,10 +12,14 @@
 	    -ppidsFileGrouping $fileType.type 
 	    -ppidsInputFormat $fileType.inputFormatType.ppidsInputFormat
 	    -ppidsFileDescription "$fileType.inputFormatType.ppidsFile.name"  
-	    #if $fileType.inputFormatType.ppidsInputFormat == "mzid"
+	    #if $fileType.inputFormatType.ppidsInputFormat == "MZML_MZIDENTML_COMBI"
 			-spectraDataFile $fileType.inputFormatType.spectraDataFile
-		#end if 
-	    -out $outputData 
+		#end if
+		#if $fileType.inputFormatType.ppidsInputFormat == "pepxml"
+			-pepxmlDataType $fileType.inputFormatType.pepxmlDataType
+			-pepxmlGeneratedBy $fileType.inputFormatType.pepxmlGeneratedBy
+		#end if
+		-out $outputData 
 	    -outUnmatchedMS2 $outUnmatchedMS2
 	    -mtol $mtol 
 	    -rttol $rttol 
@@ -52,10 +56,11 @@
 		    <when value="single">
 		      <conditional name="inputFormatType">
 		      	<param name="ppidsInputFormat" type="select" label="MS/MS input format">
-			    	<option value="mzid" selected="true">mzIdentML on mzML</option>
+			    	<option value="MZML_MZIDENTML_COMBI" selected="true">mzIdentML on mzML</option>
 			    	<option value="apml">APML</option>
+			    	<option value="pepxml">pepxml (beta support)</option>
 				</param>
-				<when value="mzid">
+				<when value="MZML_MZIDENTML_COMBI">
 		      		<param name="spectraDataFile" type="data" format="mzml" label="MS/MS spectra file (mzml)"/>
 		      		<param name="ppidsFile" type="data" format="mzid" label="MS/MS peptide identifications file (mzidentml)"/>
 		      	</when>
@@ -65,6 +70,25 @@
 		      			<validator type="expression" message="You already selected this file as the MS data file.">value.id == inputMS,{"inputMS":$inputMS},{}</validator>-->
 		      		</param>
 		      	</when>
+		      	<when value="pepxml">
+		      		<param name="ppidsFile" type="data" format="pepxml" label="MS/MS peptide identifications file (pepxml)"/>
+		      		<param name="pepxmlDataType" type="select" label=">> Type of data stored in the pepxml"
+		      		       help="Options marked with (*) are ProteomeDiscoverer specific scenarios">
+				    	<option value="" selected="true">--Please select--</option>
+			    		<option value="single_2d" >2D LC-MS runs, one per msms_run_summary</option>
+				    	<option value="multi_2d">(*) 2D LC-MS runs, multiple runs (e.g. rx.F1 to rx.FN) merged as a 'single' msms_run_summary</option>
+				    	<option value="single_1d">1D LC-MS runs, one per msms_run_summary</option>
+					</param>
+					<param name="pepxmlGeneratedBy" type="select" label=">> pepxml generated by"
+						help="Some tools, like ProteomeDiscoverer 1.4, have specific issues in their pepxml generation logic. Correctly indicating the tool used here will ensure known issues are taken 
+						     into consideration when the file is parsed." >
+						<option value="" selected="true">--Please select--</option>
+			    		<option value="proteome_discoverer_v1.4">ProteomeDiscoverer 1.4</option>
+				    	<option value="other">Other</option> 
+					</param>
+					
+		      	</when>
+		      	
 		      </conditional>
 		    </when>
 		    <when value="fileSet">
@@ -79,7 +103,7 @@
 		      </conditional>
 		    </when>
 		</conditional>
-		<param name="mtol" type="integer" size="10" value="50" label="m/z tolerance (ppm) " />
+		<param name="mtol" type="float" size="10" value="50.0" label="m/z tolerance (ppm) " />
 		<param name="rttol" type="integer" size="10" value="150" label="Rention time tolerance (seconds) " />
 		<param name="rtShiftDetectionWindow" type="integer" size="10" value="20" label="Rention time shift detection window (seconds) " help="Size of the window to use for average rt shift calculations"/>
 
@@ -87,7 +111,7 @@
      	<conditional name="matchOnNamingConvention">
      		<param name="match" type="boolean" truevalue="Yes" falsevalue="No" checked="false" label="Match using naming convention" help="Use a list of codes that occur in the file names and that link them together."/>
      		<when value="Yes">
-     			<param name="namingConventionCodesForMatching" type="text" size="100" value="" label="List of codes in naming convention" help="Add the CSV list of codes that occur in the file names and that link them together. E.g. '_F1,_F2,_F3,etc.'"/>
+     			<param name="namingConventionCodesForMatching" type="text" size="100" value="" label=">> List of codes in naming convention" help="Add the CSV list of codes that occur in the file names and that link them together. E.g. '_F1,_F2,_F3,_F4,_F5,_F6,_F7,etc.'"/>
      		</when>
      		<when value="No">
      		</when>
@@ -98,7 +122,9 @@
 	      	<option value="2" selected="true">2</option>
 	      	<option value="3" selected="true">3</option>
 	      	<option value="4" selected="true">4</option>
-	      	<option value="5">5</option>
+	      	<option value="5" selected="true">5</option>
+	      	<option value="6" >6</option>
+	      	<option value="7" >7</option>
 		</param>
 
    		<param name="summaryReport" type="boolean" checked="true" label="Generate summary report" help="NB: this will increase the processing time"/>