diff msfilt.xml @ 17:40ec8770780d

* Added support for pepxml (and more specifically for ProteomeDiscoverer 1.4). Tested with ProteomeDiscoverer 1.4 pepxml. * Improved HTML report of NapQ tool. * Fixed issue that was preventing SEDMAT matching from running in parallel/multi-threaded.
author pieter.lukasse@wur.nl
date Mon, 14 Apr 2014 17:11:33 +0200
parents 72d4a37869ee
children ad911e9aaf33
line wrap: on
line diff
--- a/msfilt.xml	Fri Mar 28 14:43:46 2014 +0100
+++ b/msfilt.xml	Mon Apr 14 17:11:33 2014 +0200
@@ -1,4 +1,4 @@
-<tool name="MsFilt" id="msfilt" version="1.0.3">
+<tool name="MsFilt" id="msfilt" version="1.0.4">
 	<description>Filters annotations based MS/MS peptide identification and annotation quality measures</description>
 	<!-- 
 	   For remote debugging start you listener on port 8000 and use the following as command interpreter:
@@ -27,6 +27,10 @@
 	    -addRawIntensityInfo $addRawIntensityInfo
     	-outReport $htmlReportFile
 	    -outReportPicturesPath $htmlReportFile.files_path
+	    #if $containsPepxml.pepxmlInSet == True
+        	-pepxmlDataType $containsPepxml.pepxmlDataType
+			-pepxmlGeneratedBy $containsPepxml.pepxmlGeneratedBy
+        #end if
 	</command>
 	
 	<inputs>
@@ -37,9 +41,35 @@
    		               SEDMAT or Quantiline tools." />
    		
    		<repeat name="annotationSourceFiles" title="(Optional) Peptide identification files" help="Full set of MS/MS peptide identification files, including peptides that could not be quantified.">
-   			<param name="identificationsFile" type="data" format="apml,mzidentml,prims.fileset.zip" label="Identifications file (APML or MZIDENTML or MZIDENTML fileSet)" />
+   			<param name="identificationsFile" type="data" format="apml,pepxml,mzidentml,prims.fileset.zip" label="Identifications file (APML, pepxml, MZIDENTML or MZIDENTML fileSet)" />
    		</repeat>
    		
+   		<!-- ================== PEPXML specific ================== -->
+   		<conditional name="containsPepxml">
+	  		<param name="pepxmlInSet" type="boolean" truevalue="Yes" falsevalue="No" checked="false" 
+	    		label="Identifications set contains one or more files in pepxml format"
+	    		help="Indicate whether one or more (Optional) Peptide identification files is in pepxml format. Support for pepxml is still considered 'beta'."/> 
+   			<when value="Yes">
+	      		<param name="pepxmlDataType" type="select" label=">> Type of data stored in the pepxml"
+	      		       help="Options marked with (*) are ProteomeDiscoverer specific scenarios">
+			    	<option value="" selected="true">--Please select--</option>
+			    	<option value="single_2d" >2D LC-MS runs, one per msms_run_summary</option>
+			    	<option value="multi_2d">(*) 2D LC-MS runs, multiple runs (e.g. rx.F1 to rx.FN) merged as a 'single' msms_run_summary</option>
+			    	<option value="single_1d">1D LC-MS runs, one per msms_run_summary</option>
+				</param>
+				<param name="pepxmlGeneratedBy" type="select" label=">> pepxml generated by"
+					help="Some tools, like ProteomeDiscoverer 1.4, have specific issues in their pepxml generation logic. Correctly indicating the tool used here will ensure known issues are taken 
+					     into consideration when the file is parsed." >
+					<option value="" selected="true">--Please select--</option>
+					<option value="proteome_discoverer_v1.4">ProteomeDiscoverer 1.4</option>
+			    	<option value="other">Other</option> 
+				</param>
+	      	</when>
+	      	<when value="No">
+     		</when>
+     	</conditional>
+   		<!-- ================== END - PEPXML specific ================== -->
+   		
      	<!-- 
      	<param name="maxNrRankings" type="integer" size="10" value="0" label="Maximum nr. of items to leave in the final ranking (set=0 for no limit) " />
      	-->
@@ -62,13 +92,16 @@
 &#xd;&#xa;qmBCOS =&gt; best correlation with other sibling peptide (correl),1
 "/>
 
-		<param name="statisticalMeasuresConfig" type="text" area="true" size="6x70" label="Statistical measures configuration" 
+		<param name="statisticalMeasuresConfig" type="text" area="true" size="8x70" label="Statistical measures configuration" 
 		help="Here you may specify the statistical measures that are found in the ms/ms results (e.g. p or e-values). 
 		The format is: SM alias => SM name,type,mode[min/max]. "
 value="smXTD =&gt; MS:1001330,XSLASH!Tandem:expect,min
 &#xd;&#xa;pvCSVEX =&gt; p_value,CSV_EXPORT,min
 &#xd;&#xa;smAUTO_LIKELIHOOD =&gt; AUTOMOD_LOGLIKELIHOOD,PLGS/Auto-mod,max
 &#xd;&#xa;smLIKELIHOOD =&gt; LOGLIKELIHOOD,PLGS/Databank-search,max
+&#xd;&#xa;smPercoProb =&gt; Percolator: probability,Percolator probability,max
+&#xd;&#xa;smPercoPEP =&gt; Percolator: PEP,Percolator PEP,min
+&#xd;&#xa;smPercoQval =&gt; Percolator: q-Value,Percolator q-Value,max
 "/>
 
      	<param name="filterOutUnannotatedAlignments" type="boolean" checked="true" 
@@ -117,7 +150,9 @@
 		#for $i, $s in enumerate( $annotationSourceFiles )
 			${s.identificationsFile}
 			## also print out the datatype in the next line, based on previously configured datatype
-			#if isinstance( $s.identificationsFile.datatype, $__app__.datatypes_registry.get_datatype_by_extension('apml').__class__):
+			#if isinstance( $s.identificationsFile.datatype, $__app__.datatypes_registry.get_datatype_by_extension('pepxml').__class__):
+				pepxml
+			#elif isinstance( $s.identificationsFile.datatype, $__app__.datatypes_registry.get_datatype_by_extension('apml').__class__):
 				apml
 			#else:
         		mzid