view napq.xml @ 5:5d99c9d0615d

performance improvements for MsFilt
author pieter.lukasse@wur.nl
date Mon, 27 Jan 2014 12:11:29 +0100
parents 73c7c6589202
children d1edc7971d48
line wrap: on
line source

<tool name="NapQ" id="napq" version="0.0.1">
	<description>'no alignment' (alignment-free) peptide quantification</description>
	<!-- 
	   For remote debugging start you listener on port 8000 and use the following as command interpreter:
	       java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000 
	                    //////////////////////////
	    -->
	<command interpreter="java -jar ">
	    NapQ.jar 
	    -identificationsConfigFile $identificationsConfigFile
	    -namingConventionCodesForSamples $namingConventionCodesForSamples
	    #if $is2D_LC_MS.fractions == True
        	-namingConventionCodesForFractions $is2D_LC_MS.namingConventionCodesForFractions
        #end if
	    -outputApml $outputApml
	    -outputTsv $outputTsv
	    -outReport $htmlReportFile
	    -outReportPicturesPath $htmlReportFile.files_path
	</command>
	
	<inputs>

   		<repeat name="identificationFileList" title="Peptide identification files" help="Full set of MS/MS peptide identification files, including peptides that could not be quantified.">
   			<param name="identificationsFile" type="data" format="apml,mzidentml,prims.fileset.zip" label="Identifications file (APML or MZIDENTML or MZIDENTML fileSet)" />
   			<param name="spectraFile" type="data" format="mzidentml,prims.fileset.zip" optional="true" label="(Optional) Spectra fileSet (mzml file or fileSet)"
   				   help="Select this in case your Identifications file is MZIDENTML or MZIDENTML fileSet" />
   		</repeat>

		<param name="namingConventionCodesForSamples" type="text" size="100" value="" 
		label="Part of run/file name that identifies the sample" 
		help="Add the CSV list of codes that occur in the file names 
			and that stand for a sample code. E.g. '_S1,_S2,_S3,etc.' "/> <!-- could do regular expressions as well but this would be hard for biologists, e.g. _F\d\b -->

   		
   		<conditional name="is2D_LC_MS">
     		<param name="fractions" type="boolean" truevalue="Yes" falsevalue="No" checked="false" 
     		label="Data is from 2D LC-MS"
     		help="Data acquisition was done in multiple fractions."/>
     		<when value="Yes"> 
     			<param name="namingConventionCodesForFractions" type="text" size="100" value="" 
     			label="Part of run/file name that identifies the 2D LC-MS fraction" 
     			help="Add the CSV list of codes that occur in the file names 
     				and that stand for a fraction code. E.g. '_F1,_F2,_F3,etc.' Use this to avoid
     				that each (fraction) file is seen as a separate run."/> <!-- could do regular expressions as well but this would be hard for biologists, e.g. _F\d\b -->
     		</when>
     	</conditional>     	
     	
	</inputs>
	<configfiles>
		<configfile name="identificationsConfigFile">## start comment
		## iterate over the selected files and store their names in the config file
		#for $i, $s in enumerate( $identificationFileList )
			${s.identificationsFile}|${s.spectraFile}
			## also print out the datatype in the next line, based on previously configured datatype
			#if isinstance( $s.identificationsFile.datatype, $__app__.datatypes_registry.get_datatype_by_extension('apml').__class__):
				apml
			#else:
        		mzid
      		#end if
		#end for
		## end comment</configfile>
	</configfiles>
	<outputs>
	  <data name="outputApml" format="apml" label="${tool.name} on ${on_string}: peptide quantifications (APML)"/>
	  <data name="outputTsv" format="tabular" label="${tool.name} on ${on_string}: peptide quantifications (TSV)"/>
	  <!-- in tsv we can have cols like: pep, avg_m/z, avg rt, m/z window, rt window, i_s1, i_s2, ...-->
	  <data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - HTML report"/>
	  <!-- here we show the samples extracted and the files used to 'build up' each sample -->
	</outputs>
	<tests>
	</tests>
  <help>
  
.. class:: infomark
  
This tool takes in multiple peptide identification result files that have peptide identifications 
coupled to some quantification (e.g. precursor intensity information or for example data coming 
from MS^E acquisition where peptide identification and quantification are done in the same run and reported together). 
Then, based on the given experiment design parameters (i.e. how the result files related back to 
replicate runs and samples), it produces a new file in which the peptides are reported with 
their calculated quantifications at the sample level. 

The figure below explains this: 

.. image:: $PATH_TO_IMAGES/napq_overview.png 

.




  </help>
</tool>