prims_proteomics: quantifere.xml comparison

comparison quantifere.xml @ 0:d50f079096ee

Push to main toolshed

author	pieter.lukasse@wur.nl
date	Wed, 08 Jan 2014 11:39:16 +0100
parents
children	73c7c6589202

comparison

equal deleted inserted replaced

--1:000000000000
+:d50f079096ee
+<tool name="Quantifere" id="quantifere1" version="1.0.2">
+	<description>Protein Inference by Peptide Quantification patterns</description>
+	<!--
+	   For remote debugging start you listener on port 8000 and use the following as command interpreter:
+	       java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000
+	                    //////////////////////////
+	    -->
+	<command interpreter="java -jar ">
+	    Quantifere.jar
+	    -annotatedQuantificationFilesList $annotatedQuantificationFilesList
+	    -identificationFilesList $identificationFilesList
+	-statisticalMeasuresConfigFile $statisticalMeasuresConfigFile
+	    -quantificationDataToUse $quantificationDataToUse
+	    -minCorrel $minCorrel
+	    -minProtCoverage $minProtCoverage
+	    -minAboveAverageHits $minAboveAverageHits
+	    -minNrIdsForInferencePeptide $minNrIdsForInferencePeptide
+	    -refineModel $refineModel
+	    -functionalAnnotationCSV $functionalAnnotationCSV
+	    -outputCSV $outputCSV
+	    -outputInferenceLogCSV $outputInferenceLogCSV
+	    -outputSummaryAnnotationCSV $outputSummaryAnnotationCSV
+	    -outReport $htmlReportFile
+	    -outReportPicturesPath $htmlReportFile.files_path
+	    #if $is2D_LC_MS.fractions == True
+	-namingConventionCodesForFractions $is2D_LC_MS.namingConventionCodesForFractions
+#end if
+	</command>
+	<inputs>
+		<repeat name="annotatedQuantificationFiles" title="Peptide (filtered) quantification files (APML)"
+		help="The APML contents as aligned, annotated and scored feature lists,
+		as produced by MsFilt tool. Select one or more files. For 2D-LC-MS we expect one file per fraction.">
+			<param name="annotatedQuantificationFile" size="50" type="data" format="apml" label="File (APML format)" />
+		</repeat>
+		<repeat name="identificationFiles" title="Peptide (filtered) identification files (MS/MS identifications)"
+		help="Full set of MS/MS peptide identification files, including peptides that could not be quantified.
+		This set of identifications is ideally filtered on some quality and
+		statistical measures (e.g. as is done by MsFilt). Tip: to base the inference only on the
+		selected peptide quantification files, you
+		can select the same quantification files here as well. Select one or more files.">
+			<param name="identificationFile" size="50" type="data" format="apml,mzid" label="File (APML or MZIDENTML format)" />
+		</repeat>
+		<conditional name="is2D_LC_MS">
+		<param name="fractions" type="boolean" truevalue="Yes" falsevalue="No" checked="false"
+		label="Data is from 2D LC-MS"
+		help="Data acquisition was done in multiple fractions."/>
+		<when value="Yes">
+			<param name="namingConventionCodesForFractions" type="text" size="100" value=""
+			label="Part of run/file name that identifies the 2D LC-MS fraction"
+			help="Add the CSV list of codes that occur in the file names
+				and that stand for a fraction code. E.g. '_F1,_F2,_F3,etc.' In this
+				way different peptide identifications from the same sample but measured
+				in different fractions can be merged together. Otherwise each (fraction) file
+				is seen as a separate sample."/> <!-- could do regular expressions as well but this would be hard for biologists, e.g. _F\d\b -->
+		</when>
+	</conditional>
+		<param name="statisticalMeasuresConfig" type="text" area="true" size="6x70" label="Statistical measures configuration"
+				help="Here you may specify the statistical measures that are found in the ms/ms results (e.g. p or e-values).
+				The format is: SM alias => SM name,type,mode[min/max]. Leaving this configuration out while these are present in the
+				dataset will have the effect that they will be wrongly used as a regular scoring scheme, having effect on for example
+				the filter criteria below like 'Minimum number of peptide matches with a score above average' ."
+value="smXTD =&gt; MS:1001330,XSLASH!Tandem:expect,min
+&#xd;&#xa;pvCSVEX =&gt; p_value,CSV_EXPORT,min
+&#xd;&#xa;smAUTO_LIKELIHOOD =&gt; AUTOMOD_LOGLIKELIHOOD,PLGS/Auto-mod,max
+&#xd;&#xa;smLIKELIHOOD =&gt; LOGLIKELIHOOD,PLGS/Databank-search,max
+"/>
+<!-- keep value attribute above aligned like this to avoid white spaces in the value -->
+		<param name="quantificationDataToUse" type="select"
+		label="Quantification data to use"
+		help="Quantification data to use for the pattern clustering and inference steps. NB: check if the chosen data is also
+		      present in your file, or choose 'auto' to let Quantifere check which quantification type is present in most peptides.">
+	    	<option value="auto" selected="true">auto</option>
+	    	<option value="getIntensity">(TODO)raw intensities</option>
+	    	<option value="getApexIntensity">(TODO)apex intensities</option>
+	    	<option value="getNormalizedIntensity">(TODO)normalized intensities</option>
+		</param>
+		<!-- TODO let minCorrel default value vary according to quantification type chosen above -->
+		<param name="minCorrel" type="float" size="10" value="0.85" label="Minimum correlation in a cluster" help="Features will be grouped by their protein annotation and
+		sample intensity values correlation. Set here the minimum correlation expected between grouped members. This is used to guide the clustering algorithm."/>
+		<!--  simple extra heuristics to remove some "noise" protein hits  -->
+		<param name="minProtCoverage" type="float" size="10" value="5.0" label="Minimum protein coverage (%)" help="This will remove proteins that have a too small
+		portion of their sequence covered by peptide matches."/>
+		<param name="minAboveAverageHits" type="integer" size="10" value="1" label="Minimum number of different peptide matches with a score above average"
+		help="This will remove proteins that do not have enough reasonable peptides hits."/>
+		<param name="minNrIdsForInferencePeptide" type="integer" size="10" value="1" label="Minimum number of peptide identifications for inference peptides"
+		help="Minimum number of peptide identifications a peptide needs to be used as inference peptide for secondary proteins."/>
+	<param name="functionalAnnotationCSV" type="data" format="csv,txt,tsv" optional="true"
+	label="(Functional)annotation mapping file (csv or tsv format)"
+	help="Optional file that maps protein accessions to a network, pathway or other higher level annotations. In this file a header line is expected with these 2 columns (names and lower case is important): accession,annotation"/>
+	<param name="refineModel" type="boolean" checked="true" label="Refine matches model"
+	help="This will let the algorithm search for a reduced set of secondary protein matches that still explains the variation in the peptide quantification patterns"/>
+	<param name="summaryReport" type="boolean" checked="true" label="Generate summary report"/>
+	</inputs>
+	<configfiles>
+		<configfile name="annotatedQuantificationFilesList">## start comment
+		## iterate over the selected files and store their names in the config file
+		#for $i, $s in enumerate( $annotatedQuantificationFiles )
+			${s.annotatedQuantificationFile}
+		#end for
+		## end comment</configfile>
+		<configfile name="identificationFilesList">## start comment
+		## iterate over the selected files and store their names in the config file
+		#for $i, $s in enumerate( $identificationFiles )
+			${s.identificationFile}
+			## also print out the datatype in the next line, based on previously configured datatype
+			#if isinstance( $s.identificationFile.datatype, $__app__.datatypes_registry.get_datatype_by_extension('apml').__class__):
+				apml
+			#else:
+		mzid
+		#end if
+		#end for
+		## end comment</configfile>
+		<configfile name="statisticalMeasuresConfigFile">## start comment
+			${statisticalMeasuresConfig}
+		</configfile>
+	</configfiles>
+	<outputs>
+	  <data name="outputCSV" format="csv" label="${tool.name} on ${on_string}: Proteins list (CSV)" />
+	  <data name="outputInferenceLogCSV" format="csv" label="${tool.name} on ${on_string}: Inference log (CSV)"/>
+	  <data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - HTML report">
+	 	<!-- If the expression is false, the file is not created -->
+	  	<filter>( summaryReport == True )</filter>
+	  </data>
+	  <data name="outputSummaryAnnotationCSV" format="csv" label="${tool.name} on ${on_string} - Functional annotation summary (CSV)">
+	 	<!-- If the expression is false, the file is not created -->
+	  	<filter>( functionalAnnotationCSV != None )</filter>
+	  </data>
+	</outputs>
+	<tests>
+	</tests>
+<help>
+.. class:: infomark
+This tool takes Peptide Quantification patterns and uses this to do Protein Inference of both Primary Protein
+identifications as well as Secondary Protein identifications. This last class of protein identifications
+can not be done by traditional protein inference methods that look only at peptide identifications and
+their quality parameters.
+-----
+**List of definitions**
+Primary Protein identification: protein identification belonging to the minimum set of proteins needed
+to account for the observed peptides.
+Secondary Protein identification: extra protein identifications that do not below to the minimum set
+of proteins mentioned above.
+raw intensities : is the intensity value resulting from the integration of the feature peak area
+apex intensities: is the intensity value as on the highest point of the feature peak
+normalized intensities : is the intensity normalized by some means
+-----
+**Minimum correlation in a cluster**
+TODO - add doc.
+-----
+**Output details**
+*Proteins list (CSV)*
+This is the list of primary and secondary proteins and their calculated inference score. Proteins
+with exactly the same peptide hits are also grouped together and labeled as primary_group and secondary_group
+instead of simply primary and secondary.
+*Inference log (CSV)*
+This CSV table shows all data, both inferred and ruled out proteins. This can be used by the user to
+troubleshoot the inference process and understand why certain proteins might have been ruled out.
+The CSV is provided in such a format that the data can easily be explored in a Cytoscape network.
+The figure below shows an example of the data being explored in Cytoscape using also the
+`Cytoscape chartplugin`_ to visualize the quantification data when selecting the peptide nodes.
+.. image:: $PATH_TO_IMAGES/quantifere_cyto_out.png
+.. _Cytoscape chartplugin: http://apps.cytoscape.org/apps/chartplugin
+</help>
+</tool>

Mercurial > repos > pieterlukasse > prims_proteomics

comparison quantifere.xml @ 0:d50f079096ee