view napq.xml @ 28:6107b74eeb11 default tip

improved documentation
author pieter.lukasse@wur.nl
date Fri, 30 Jan 2015 14:53:10 +0100
parents 34c4e7e0f23b
children
line wrap: on
line source

<tool name="NapQ" id="napq" version="0.0.3">
	<description>'no alignment' (alignment-free) peptide quantification</description>
	<!-- 
	   For remote debugging start you listener on port 8000 and use the following as command interpreter:
	       java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000 
	                    //////////////////////////
	    -->
	<command interpreter="java -jar ">
	    NapQ.jar 
	    -identificationsConfigFile $identificationsConfigFile
	    -namingConventionCodesForSamples "$namingConventionCodesForSamples"
	    #if $is2D_LC_MS.fractions == True
        	-namingConventionCodesForFractions "$is2D_LC_MS.namingConventionCodesForFractions"
        #end if
	    -outputApml $outputApml
	    -outputTsv $outputTsv
	    -outReport $htmlReportFile
	    -outReportPicturesPath $htmlReportFile.files_path
	</command>
	
	<inputs>

   		<repeat name="identificationFileList" title="(Filtered) Peptide identification files" help="Set of MS/MS peptide identification files that have some form of 
   		quantification data coupled to it (e.g. MSE identifications&amp;intensity). This list of peptide identifications is 
   		preferably pre-processed by a tool (e.g. MsFilt) that filters out as much as possible false-positive peptide identifications.">
   			<param name="identificationsFile" type="data" format="apml,prims.fileset.zip" label="Identifications file ([APML] or [MZIDENTML fileSet])" 
   			     help="When using MsFilt, select MsFilt's APML output file here."/>
   		</repeat>

		<param name="namingConventionCodesForSamples" type="text" size="100" value="" 
		label="Part of run/file name that identifies the sample" 
		help="Add the CSV list of codes that occur in the file names 
			and that stand for a sample code. E.g. '_S1,_S2,_S3,etc.' "> <!-- could do regular expressions as well but this would be hard for biologists, e.g. _F\d\b -->
			<sanitizer>
				<!-- adding more characters to the set of "valid" ones: -->
				<valid>
					<add preset="string.printable"/>
					<add value="#"/>
					<add value="@"/>
					<add value="$"/>
					<add value="%"/>
					<add value="&amp;"/>
					<add value="*"/>
					<add value="["/>
					<add value="]"/>
					<add value="|"/>
					<add value="{"/>
					<add value="}"/>
				</valid>
			</sanitizer>
		</param>
   		
   		<conditional name="is2D_LC_MS">
     		<param name="fractions" type="boolean" truevalue="Yes" falsevalue="No" checked="false" 
     		label="Data is from 2D LC-MS"
     		help="Data acquisition was done in multiple fractions."/>
     		<when value="Yes"> 
     			<param name="namingConventionCodesForFractions" type="text" size="100" value="" 
     			label="Part of run/file name that identifies the 2D LC-MS fraction" 
     			help="Add the CSV list of codes that occur in the file names 
     				and that stand for a fraction code. E.g. '_F1,_F2,_F3,etc.' Use this to avoid
     				that each (fraction) file is seen as a separate run."> <!-- could do regular expressions as well but this would be hard for biologists, e.g. _F\d\b -->
     					<sanitizer>
							<!-- adding more characters to the set of "valid" ones: -->
							<valid>
								<add preset="string.printable"/>
								<add value="#"/>
								<add value="@"/>
								<add value="$"/>
								<add value="%"/>
								<add value="&amp;"/>
								<add value="*"/>
								<add value="["/>
								<add value="]"/>
								<add value="|"/>
								<add value="{"/>
								<add value="}"/>
							</valid>
						</sanitizer>
					</param>
     		
     		</when>
     		<when value="No">
     		</when>
     	</conditional>     	
     	
	</inputs>
	<configfiles>
		<configfile name="identificationsConfigFile">## start comment
		## iterate over the selected files and store their names in the config file
		#for $i, $s in enumerate( $identificationFileList )
			${s.identificationsFile}
			## also print out the datatype in the next line, based on previously configured datatype
			#if isinstance( $s.identificationsFile.datatype, $__app__.datatypes_registry.get_datatype_by_extension('apml').__class__):
				apml
			#else:
        		mzid
      		#end if
		#end for
		## end comment</configfile>
	</configfiles>
	<outputs>
	  <data name="outputApml" format="apml" label="${tool.name} on ${on_string}: peptide quantifications (APML)"/>
	  <data name="outputTsv" format="tabular" label="${tool.name} on ${on_string}: peptide quantifications (TSV)"/>
	  <!-- in tsv we can have cols like: pep, avg_m/z, avg rt, m/z window, rt window, i_s1, i_s2, ...-->
	  <data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - HTML report"/>
	  <!-- here we show the samples extracted and the files used to 'build up' each sample -->
	</outputs>
	<tests>
	</tests>
  <help>
  
.. class:: infomark
  
This tool takes in multiple peptide identification result files that have peptide identifications 
coupled to some quantification (e.g. precursor intensity information or for example data coming 
from MS^E acquisition where peptide identification and quantification are done in the same run and reported together). 
Then, based on the given experiment design parameters (i.e. how the result files related back to 
replicate runs and samples), it produces a new file in which the peptides are reported with 
their calculated quantifications at the sample level. 

The figure below explains this: 

.. image:: $PATH_TO_IMAGES/napq_overview.png 

.




  </help>
</tool>