view napq.xml @ 17:40ec8770780d

* Added support for pepxml (and more specifically for ProteomeDiscoverer 1.4). Tested with ProteomeDiscoverer 1.4 pepxml. * Improved HTML report of NapQ tool. * Fixed issue that was preventing SEDMAT matching from running in parallel/multi-threaded.
author pieter.lukasse@wur.nl
date Mon, 14 Apr 2014 17:11:33 +0200
parents d56c18ed0f77
children d31c6978d9d0
line wrap: on
line source

<tool name="NapQ" id="napq" version="0.0.3">
	<description>'no alignment' (alignment-free) peptide quantification</description>
	<!-- 
	   For remote debugging start you listener on port 8000 and use the following as command interpreter:
	       java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000 
	                    //////////////////////////
	    -->
	<command interpreter="java -jar ">
	    NapQ.jar 
	    -identificationsConfigFile $identificationsConfigFile
	    -namingConventionCodesForSamples "$namingConventionCodesForSamples"
	    #if $is2D_LC_MS.fractions == True
        	-namingConventionCodesForFractions "$is2D_LC_MS.namingConventionCodesForFractions"
        #end if
	    -outputApml $outputApml
	    -outputTsv $outputTsv
	    -outReport $htmlReportFile
	    -outReportPicturesPath $htmlReportFile.files_path
	</command>
	
	<inputs>

   		<repeat name="identificationFileList" title="Peptide identification files" help="Set of MS/MS peptide identification files that have some form of quantification data coupled to it (e.g. MSE identifications&amp;intensity).">
   			<param name="identificationsFile" type="data" format="apml,mzidentml,prims.fileset.zip" label="Identifications file (APML or MZIDENTML or MZIDENTML fileSet)" />
   		</repeat>

		<param name="namingConventionCodesForSamples" type="text" size="100" value="" 
		label="Part of run/file name that identifies the sample" 
		help="Add the CSV list of codes that occur in the file names 
			and that stand for a sample code. E.g. '_S1,_S2,_S3,etc.' "/> <!-- could do regular expressions as well but this would be hard for biologists, e.g. _F\d\b -->

   		
   		<conditional name="is2D_LC_MS">
     		<param name="fractions" type="boolean" truevalue="Yes" falsevalue="No" checked="false" 
     		label="Data is from 2D LC-MS"
     		help="Data acquisition was done in multiple fractions."/>
     		<when value="Yes"> 
     			<param name="namingConventionCodesForFractions" type="text" size="100" value="" 
     			label="Part of run/file name that identifies the 2D LC-MS fraction" 
     			help="Add the CSV list of codes that occur in the file names 
     				and that stand for a fraction code. E.g. '_F1,_F2,_F3,etc.' Use this to avoid
     				that each (fraction) file is seen as a separate run."/> <!-- could do regular expressions as well but this would be hard for biologists, e.g. _F\d\b -->
     		</when>
     		<when value="No">
     		</when>
     	</conditional>     	
     	
	</inputs>
	<configfiles>
		<configfile name="identificationsConfigFile">## start comment
		## iterate over the selected files and store their names in the config file
		#for $i, $s in enumerate( $identificationFileList )
			${s.identificationsFile}
			## also print out the datatype in the next line, based on previously configured datatype
			#if isinstance( $s.identificationsFile.datatype, $__app__.datatypes_registry.get_datatype_by_extension('apml').__class__):
				apml
			#else:
        		mzid
      		#end if
		#end for
		## end comment</configfile>
	</configfiles>
	<outputs>
	  <data name="outputApml" format="apml" label="${tool.name} on ${on_string}: peptide quantifications (APML)"/>
	  <data name="outputTsv" format="tabular" label="${tool.name} on ${on_string}: peptide quantifications (TSV)"/>
	  <!-- in tsv we can have cols like: pep, avg_m/z, avg rt, m/z window, rt window, i_s1, i_s2, ...-->
	  <data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - HTML report"/>
	  <!-- here we show the samples extracted and the files used to 'build up' each sample -->
	</outputs>
	<tests>
	</tests>
  <help>
  
.. class:: infomark
  
This tool takes in multiple peptide identification result files that have peptide identifications 
coupled to some quantification (e.g. precursor intensity information or for example data coming 
from MS^E acquisition where peptide identification and quantification are done in the same run and reported together). 
Then, based on the given experiment design parameters (i.e. how the result files related back to 
replicate runs and samples), it produces a new file in which the peptides are reported with 
their calculated quantifications at the sample level. 

The figure below explains this: 

.. image:: $PATH_TO_IMAGES/napq_overview.png 

.




  </help>
</tool>