Mercurial > repos > pieterlukasse > prims_proteomics

<tool name="Csv2Apml" id="csv2apml" version="1.0.2">
	<description>Converts MS/MS data in CSV format to APML format</description>
	<!--
	   For remote debugging start you listener on port 8000 and use the following as command interpreter:
	       java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000
	                    //////////////////////////
	    -->
	<command interpreter="java -jar ">
	    Csv2Apml.jar
	    -peptideAndProteinMatchListCSV $peptideAndProteinMatchListCSV
	    -attributesMappingCSV $attributesMappingCSV
		-apmlFile $apmlFile
	</command>

	<inputs>

   		<param name="peptideAndProteinMatchListCSV" type="data"
   		format="csv" label="MS/MS CSV file"
   		help="MS/MS CSV file containing peptide identifications and protein matches" />

		<param name="mz" type="text" optional="false" size="30"
		       label="Column name for precursor m/z" />

		<param name="rt" type="text" optional="false" size="30"
		       label="Column name for precursor rt" />

		<param name="charge" type="text" optional="false" size="30"
		       label="Column name for precursor charge (z)" />

		<param name="pepSequence" type="text" optional="false" size="30"
		       label="Column name for peptide sequence" />

		<param name="ppidScore" type="text" optional="false" size="30"
		       label="Column name for peptide identification score" />

		<param name="scoringSchemeName" type="text" optional="true" size="30"
		       label="(Optional) Column name containing scoring scheme name" />

		<param name="statisticalMeasure" type="text" optional="true" size="30"
			   label="(Optional) Column name for reported statistical measure values"
			   help="(e.g. column containing p-values or e-values)" />

		<param name="ppidTheoreticalMz" type="text" optional="true" size="30"
		       label="(Optional) Column name for peptide theoretical m/z" />

		<param name="modifications" type="text" optional="true" size="30"
		       label="(Optional) Column name for reported modifications" />

		<param name="proteinAccession" type="text" optional="false" size="30"
		       label="Column name for protein accession code" />

		<param name="protSequenceLength" type="text" optional="true" size="30"
		       label="(Optional) Column name for protein sequence length" />

		<param name="pepProtStart" type="text" optional="true" size="30"
		       label="(Optional) Column name for protein match location start"
		       help="Where peptide sequence starts in protein"/>

		<param name="pepProtEnd" type="text" optional="true" size="30"
		       label="(Optional) Column name for protein match location end"
		       help="Where peptide sequence ends in protein"/>

		<param name="sourceName" type="text" optional="true" size="30"
		       label="(Optional) Column name for sample names" />

	</inputs>
	<configfiles>
		<configfile name="attributesMappingCSV">Generic name,name in S1 table CSV
mz,${mz}
rt,${rt}
charge,${charge}
pepSequence,${pepSequence}
ppidScore,${ppidScore}
proteinAccession,${proteinAccession}
#if $ppidTheoreticalMz != "None"
ppidTheoreticalMz,${ppidTheoreticalMz}
#end if
#if $modifications != "None"
modifications,${modifications}
#end if
#if $scoringSchemeName != "None"
scoringSchemeName,${scoringSchemeName}
#end if
#if $statisticalMeasure != "None"
statisticalMeasure,${statisticalMeasure}
#end if
#if $protSequenceLength != "None"
protSequenceLength,${protSequenceLength}
#end if
#if $pepProtStart != "None"
pepProtStart,${pepProtStart}
#end if
#if $pepProtEnd != "None"
pepProtEnd,${pepProtEnd}
#end if
#if $sourceName != "None"
sourceName,${sourceName}
#end if</configfile>
	</configfiles>

	<outputs>
	  <data name="apmlFile" format="apml" label="${tool.name} on ${on_string}: APML" >
	  </data>
	</outputs>
	<tests>
	</tests>
  <help>

.. class:: infomark

This tool converts a CSV file containing MS/MS peptide identifications and their respective protein matches
to the APML xml format.
The identifications in APML format can be used for example to annotate unidentified MS features via SEDMAT(*).
This format is also compatible with what is expected by other post-processing tools like Quantifere (for
protein inference).

(*)SEDMAT can use MS2 identification data
and couple it to this MS1 data, thereby annotating the MS1 feature list with identifications.

-----

**Output**

This tools returns the input data in APML xml format.

  </help>
</tool>
author	pieter.lukasse@wur.nl
date	Fri, 28 Mar 2014 14:43:46 +0100
parents	d50f079096ee
children	40ec8770780d