view csv2apml.xml @ 5:5d99c9d0615d

performance improvements for MsFilt
author pieter.lukasse@wur.nl
date Mon, 27 Jan 2014 12:11:29 +0100
parents d50f079096ee
children 40ec8770780d
line wrap: on
line source

<tool name="Csv2Apml" id="csv2apml" version="1.0.2">
	<description>Converts MS/MS data in CSV format to APML format</description>
	<!-- 
	   For remote debugging start you listener on port 8000 and use the following as command interpreter:
	       java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000 
	                    //////////////////////////
	    -->
	<command interpreter="java -jar ">
	    Csv2Apml.jar 
	    -peptideAndProteinMatchListCSV $peptideAndProteinMatchListCSV
	    -attributesMappingCSV $attributesMappingCSV
		-apmlFile $apmlFile
	</command>
	
	<inputs>
	 	
   		<param name="peptideAndProteinMatchListCSV" type="data" 
   		format="csv" label="MS/MS CSV file"
   		help="MS/MS CSV file containing peptide identifications and protein matches" />
     	
		<param name="mz" type="text" optional="false" size="30" 
		       label="Column name for precursor m/z" />     			

		<param name="rt" type="text" optional="false" size="30" 
		       label="Column name for precursor rt" />
     	
		<param name="charge" type="text" optional="false" size="30" 
		       label="Column name for precursor charge (z)" />

		<param name="pepSequence" type="text" optional="false" size="30" 
		       label="Column name for peptide sequence" />
		       
		<param name="ppidScore" type="text" optional="false" size="30" 
		       label="Column name for peptide identification score" />
		       
		<param name="scoringSchemeName" type="text" optional="true" size="30" 
		       label="(Optional) Column name containing scoring scheme name" />

		<param name="statisticalMeasure" type="text" optional="true" size="30" 
			   label="(Optional) Column name for reported statistical measure values" 
			   help="(e.g. column containing p-values or e-values)" /> 

		<param name="ppidTheoreticalMz" type="text" optional="true" size="30" 
		       label="(Optional) Column name for peptide theoretical m/z" />
		       
		<param name="modifications" type="text" optional="true" size="30" 
		       label="(Optional) Column name for reported modifications" />
		       
		<param name="proteinAccession" type="text" optional="false" size="30" 
		       label="Column name for protein accession code" />
		       
		<param name="protSequenceLength" type="text" optional="true" size="30" 
		       label="(Optional) Column name for protein sequence length" />
		       
		<param name="pepProtStart" type="text" optional="true" size="30" 
		       label="(Optional) Column name for protein match location start" 
		       help="Where peptide sequence starts in protein"/>

		<param name="pepProtEnd" type="text" optional="true" size="30" 
		       label="(Optional) Column name for protein match location end" 
		       help="Where peptide sequence ends in protein"/>
		       
		<param name="sourceName" type="text" optional="true" size="30" 
		       label="(Optional) Column name for sample names" />
		       
	</inputs>
	<configfiles>
		<configfile name="attributesMappingCSV">Generic name,name in S1 table CSV
mz,${mz}
rt,${rt}
charge,${charge}
pepSequence,${pepSequence}
ppidScore,${ppidScore}
proteinAccession,${proteinAccession}
#if $ppidTheoreticalMz != "None"
ppidTheoreticalMz,${ppidTheoreticalMz}
#end if
#if $modifications != "None"
modifications,${modifications}
#end if
#if $scoringSchemeName != "None"
scoringSchemeName,${scoringSchemeName}
#end if
#if $statisticalMeasure != "None"
statisticalMeasure,${statisticalMeasure}
#end if
#if $protSequenceLength != "None"
protSequenceLength,${protSequenceLength}
#end if
#if $pepProtStart != "None"
pepProtStart,${pepProtStart}
#end if
#if $pepProtEnd != "None"
pepProtEnd,${pepProtEnd}
#end if
#if $sourceName != "None"
sourceName,${sourceName}
#end if</configfile>
	</configfiles>
	
	<outputs>
	  <data name="apmlFile" format="apml" label="${tool.name} on ${on_string}: APML" >
	  </data>
	</outputs>
	<tests>
	</tests>
  <help>
  
.. class:: infomark
  
This tool converts a CSV file containing MS/MS peptide identifications and their respective protein matches
to the APML xml format. 
The identifications in APML format can be used for example to annotate unidentified MS features via SEDMAT(*).
This format is also compatible with what is expected by other post-processing tools like Quantifere (for 
protein inference). 

(*)SEDMAT can use MS2 identification data
and couple it to this MS1 data, thereby annotating the MS1 feature list with identifications.

-----

**Output**

This tools returns the input data in APML xml format. 

  </help>
</tool>