view sedmat.xml @ 28:6107b74eeb11 default tip

improved documentation
author pieter.lukasse@wur.nl
date Fri, 30 Jan 2015 14:53:10 +0100
parents 40ec8770780d
children
line wrap: on
line source

<tool name="SedMat" id="sedmat1" version="1.0.3">
	<description>Matches MS and MS/MS results</description>
	<!-- 
	   For remote debugging start you listener on port 8000 and use the following as command interpreter:
	       java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000 
	    -->
	<command interpreter="java -jar ">
	    SedMat_cli.jar 
	    -pl $inputMS 
	    -plInputFormat apml 
	    -ppids $fileType.inputFormatType.ppidsFile 
	    -ppidsFileGrouping $fileType.type 
	    -ppidsInputFormat $fileType.inputFormatType.ppidsInputFormat
	    -ppidsFileDescription "$fileType.inputFormatType.ppidsFile.name"  
	    #if $fileType.inputFormatType.ppidsInputFormat == "MZML_MZIDENTML_COMBI"
			-spectraDataFile $fileType.inputFormatType.spectraDataFile
		#end if
		#if $fileType.inputFormatType.ppidsInputFormat == "pepxml"
			-pepxmlDataType $fileType.inputFormatType.pepxmlDataType
			-pepxmlGeneratedBy $fileType.inputFormatType.pepxmlGeneratedBy
		#end if
		-out $outputData 
	    -outUnmatchedMS2 $outUnmatchedMS2
	    -mtol $mtol 
	    -rttol $rttol 
	    -rtShiftDetectionWindow $rtShiftDetectionWindow
	    -matchOnSameSourceOnly $matchOnSameSourceOnly
	    -chargeStatesToGenerate $chargeStatesToGenerate
	    -outReport $htmlReportFile
	    -outReportPicturesPath $htmlReportFile.files_path
        #if $troubleshoot1.troubleshootPeakLocations == True
        	-troubleshootPeakLocations YES
        	-mStart $troubleshoot1.mStart
        	-mEnd $troubleshoot1.mEnd
        	-rtStart $troubleshoot1.rtStart
        	-rtEnd $troubleshoot1.rtEnd
        	-filterSourceName $troubleshoot1.filterSourceName
        #end if
        #if $matchOnNamingConvention.match == True
        	-matchOnNamingConvention YES
        	-namingConventionCodesForMatching $matchOnNamingConvention.namingConventionCodesForMatching
        #end if
        	    
	</command>
	
	<inputs>
	 	
  		<param name="inputMS" type="data" format="apml" label="MS data (APML format)" />
	 	<!-- possible option <validator type="metadata" check="base_name" message="Metadata missing, click the pencil icon in the history item and set base_name."/> -->

	 	<conditional name="fileType">
		    <param name="type" type="select" label="Peptide identification file grouping type">
		      <option value="single" selected="true">single-File</option>
		      <option value="fileSet">fileSet</option>
		    </param>
		    <when value="single">
		      <conditional name="inputFormatType">
		      	<param name="ppidsInputFormat" type="select" label="MS/MS input format">
			    	<option value="MZML_MZIDENTML_COMBI" selected="true">mzIdentML on mzML</option>
			    	<option value="apml">APML</option>
			    	<option value="pepxml">pepxml (beta support)</option>
				</param>
				<when value="MZML_MZIDENTML_COMBI">
		      		<param name="spectraDataFile" type="data" format="mzml" label="MS/MS spectra file (mzml)"/>
		      		<param name="ppidsFile" type="data" format="mzid" label="MS/MS peptide identifications file (mzidentml)"/>
		      	</when>
		      	<when value="apml">
		      		<param name="ppidsFile" type="data" format="apml" label="MS/MS peptide identifications file (apml)">
		      			<!-- TODO - find out how to use
		      			<validator type="expression" message="You already selected this file as the MS data file.">value.id == inputMS,{"inputMS":$inputMS},{}</validator>-->
		      		</param>
		      	</when>
		      	<when value="pepxml">
		      		<param name="ppidsFile" type="data" format="pepxml" label="MS/MS peptide identifications file (pepxml)"/>
		      		<param name="pepxmlDataType" type="select" label=">> Type of data stored in the pepxml"
		      		       help="Options marked with (*) are ProteomeDiscoverer specific scenarios">
				    	<option value="" selected="true">--Please select--</option>
			    		<option value="single_2d" >2D LC-MS runs, one per msms_run_summary</option>
				    	<option value="multi_2d">(*) 2D LC-MS runs, multiple runs (e.g. rx.F1 to rx.FN) merged as a 'single' msms_run_summary</option>
				    	<option value="single_1d">1D LC-MS runs, one per msms_run_summary</option>
					</param>
					<param name="pepxmlGeneratedBy" type="select" label=">> pepxml generated by"
						help="Some tools, like ProteomeDiscoverer 1.4, have specific issues in their pepxml generation logic. Correctly indicating the tool used here will ensure known issues are taken 
						     into consideration when the file is parsed." >
						<option value="" selected="true">--Please select--</option>
			    		<option value="proteome_discoverer_v1.4">ProteomeDiscoverer 1.4</option>
				    	<option value="other">Other</option> 
					</param>
					
		      	</when>
		      	
		      </conditional>
		    </when>
		    <when value="fileSet">
		      <conditional name="inputFormatType">
		      	<param name="ppidsInputFormat" type="select" label="inputFormat">
			    	<option value="mzid" selected="true">mzIdentML on mzML</option>
				</param>
				<when value="mzid">
		      		<param name="spectraDataFile" type="data" format="prims.fileset.zip" label="MS/MS spectra fileSet (N mzml files)"/>
		      		<param name="ppidsFile" type="data" format="prims.fileset.zip" label="MS/MS peptide identifications fileSet (N mzidentml files)"/>
		      	</when>
		      </conditional>
		    </when>
		</conditional>
		<param name="mtol" type="float" size="10" value="50.0" label="m/z tolerance (ppm) " />
		<param name="rttol" type="integer" size="10" value="150" label="Rention time tolerance (seconds) " />
		<param name="rtShiftDetectionWindow" type="integer" size="10" value="20" label="Rention time shift detection window (seconds) " help="Size of the window to use for average rt shift calculations"/>

		<param name="matchOnSameSourceOnly" type="boolean" checked="false" label="Match peaks from same source only" help="If you want this, you might have to inform how to match the source files"/>
     	<conditional name="matchOnNamingConvention">
     		<param name="match" type="boolean" truevalue="Yes" falsevalue="No" checked="false" label="Match using naming convention" help="Use a list of codes that occur in the file names and that link them together."/>
     		<when value="Yes">
     			<param name="namingConventionCodesForMatching" type="text" size="100" value="" label=">> List of codes in naming convention" help="Add the CSV list of codes that occur in the file names and that link them together. E.g. '_F1,_F2,_F3,_F4,_F5,_F6,_F7,etc.'"/>
     		</when>
     		<when value="No">
     		</when>
     	</conditional>	 

 		<param name="chargeStatesToGenerate" type="select" display="checkboxes" multiple="true" label="Generate extra charge states" help="The selected charge states will be generated for each MS2 feature ">
	      	<option value="1" selected="true">1</option>
	      	<option value="2" selected="true">2</option>
	      	<option value="3" selected="true">3</option>
	      	<option value="4" selected="true">4</option>
	      	<option value="5" selected="true">5</option>
	      	<option value="6" >6</option>
	      	<option value="7" >7</option>
		</param>

   		<param name="summaryReport" type="boolean" checked="true" label="Generate summary report" help="NB: this will increase the processing time"/>
     	
     	<conditional name="troubleshoot1">
     		<param name="troubleshootPeakLocations" type="boolean" truevalue="Yes" falsevalue="No" checked="false" label="Troubleshoot ms1/ms2 peak locations" help="Small trial run to check if the MS and MS/MS peak lists in their current states can easily be matched "/>
     		<when value="Yes">
     			<param name="mStart" optional="false" type="integer" size="10" value="100" label="Set m/z start " />
     			<param name="mEnd" optional="false" type="integer" size="10" value="1000" label="Set m/z end " />
				<param name="rtStart" optional="false" type="integer" size="10" value="10" label="Set rention time start (minutes) " />
				<param name="rtEnd" optional="false" type="integer" size="10" value="20" label="Set rention time end (minutes) " />
				<param name="filterSourceName" type="text" size="100" value="" label="Restrict matching to a specific subset of the files " help="Part of a file name that occurs in both a ms1 and ms2 file (e.g. 'RibO_1_msE1')"/>
     		</when>
     		<when value="No">
     		</when>
     	</conditional>
     	
	</inputs>
	<outputs>
	  <data name="outputData" format="apml" label="${inputMS.metadata.base_name} - ${tool.name} on ${on_string}: APML" metadata_source="inputMS"></data>
	  <data name="outUnmatchedMS2" format="csv" label="${inputMS.metadata.base_name} - ${tool.name} on ${on_string}: unmatched MS2 features CSV" metadata_source="inputMS"></data>
	  <data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - HTML report">
	 	<!-- If the expression is false, the file is not created -->
	  	<filter>( summaryReport == True )</filter>
	  </data>
	</outputs>
	<tests>
	  <!--  find out how to use -->
	  <test>
	  </test>
	</tests>
  <help>
  
.. class:: infomark
  
This tool matches MS and MS/MS results. SEDMAT stands for "Single Experiment Data Matching Tool".
It can match peaks found in the MS spectra with the peptides found using the MS/MS spectra.
The result is the list of MS peaks annotated with peptides and proteins.

-----

**Output example**

This tools returns APML output, a Cytoscape network (.xgmml) of the matches and Retention Time plots (.pdf). 

  </help>
</tool>