view napq.xml @ 19:d31c6978d9d0

fixes for NapQ
author pieter.lukasse@wur.nl
date Mon, 26 Jan 2015 06:24:15 +0100
parents 40ec8770780d
children 46f568202d46
line wrap: on
line source

<tool name="NapQ" id="napq" version="0.0.3">
	<description>'no alignment' (alignment-free) peptide quantification</description>
	<!-- 
	   For remote debugging start you listener on port 8000 and use the following as command interpreter:
	       java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000 
	                    //////////////////////////
	    -->
	<command interpreter="java -jar ">
	    NapQ.jar 
	    -identificationsConfigFile $identificationsConfigFile
	    -namingConventionCodesForSamples "$namingConventionCodesForSamples"
	    #if $is2D_LC_MS.fractions == True
        	-namingConventionCodesForFractions "$is2D_LC_MS.namingConventionCodesForFractions"
        #end if
	    -outputApml $outputApml
	    -outputTsv $outputTsv
	    -outReport $htmlReportFile
	    -outReportPicturesPath $htmlReportFile.files_path
	</command>
	
	<inputs>

   		<repeat name="identificationFileList" title="Peptide identification files" help="Set of MS/MS peptide identification files that have some form of quantification data coupled to it (e.g. MSE identifications&amp;intensity).">
   			<param name="identificationsFile" type="data" format="apml,mzidentml,prims.fileset.zip" label="Identifications file (APML or MZIDENTML or MZIDENTML fileSet)" />
   		</repeat>

		<param name="namingConventionCodesForSamples" type="text" size="100" value="" 
		label="Part of run/file name that identifies the sample" 
		help="Add the CSV list of codes that occur in the file names 
			and that stand for a sample code. E.g. '_S1,_S2,_S3,etc.' "> <!-- could do regular expressions as well but this would be hard for biologists, e.g. _F\d\b -->
			<sanitizer>
				<!-- adding more characters to the set of "valid" ones: -->
				<valid>
					<add preset="string.printable"/>
					<add value="#"/>
					<add value="@"/>
					<add value="$"/>
					<add value="%"/>
					<add value="&"/>
					<add value="*"/>
					<add value="["/>
					<add value="]"/>
					<add value="|"/>
					<add value="{"/>
					<add value="}"/>
				</valid>
			</sanitizer>
		</param>
   		
   		<conditional name="is2D_LC_MS">
     		<param name="fractions" type="boolean" truevalue="Yes" falsevalue="No" checked="false" 
     		label="Data is from 2D LC-MS"
     		help="Data acquisition was done in multiple fractions."/>
     		<when value="Yes"> 
     			<param name="namingConventionCodesForFractions" type="text" size="100" value="" 
     			label="Part of run/file name that identifies the 2D LC-MS fraction" 
     			help="Add the CSV list of codes that occur in the file names 
     				and that stand for a fraction code. E.g. '_F1,_F2,_F3,etc.' Use this to avoid
     				that each (fraction) file is seen as a separate run."> <!-- could do regular expressions as well but this would be hard for biologists, e.g. _F\d\b -->
     					<sanitizer>
							<!-- adding more characters to the set of "valid" ones: -->
							<valid>
								<add preset="string.printable"/>
								<add value="#"/>
								<add value="@"/>
								<add value="$"/>
								<add value="%"/>
								<add value="&"/>
								<add value="*"/>
								<add value="["/>
								<add value="]"/>
								<add value="|"/>
								<add value="{"/>
								<add value="}"/>
							</valid>
						</sanitizer>
					</param>
     		
     		</when>
     		<when value="No">
     		</when>
     	</conditional>     	
     	
	</inputs>
	<configfiles>
		<configfile name="identificationsConfigFile">## start comment
		## iterate over the selected files and store their names in the config file
		#for $i, $s in enumerate( $identificationFileList )
			${s.identificationsFile}
			## also print out the datatype in the next line, based on previously configured datatype
			#if isinstance( $s.identificationsFile.datatype, $__app__.datatypes_registry.get_datatype_by_extension('apml').__class__):
				apml
			#else:
        		mzid
      		#end if
		#end for
		## end comment</configfile>
	</configfiles>
	<outputs>
	  <data name="outputApml" format="apml" label="${tool.name} on ${on_string}: peptide quantifications (APML)"/>
	  <data name="outputTsv" format="tabular" label="${tool.name} on ${on_string}: peptide quantifications (TSV)"/>
	  <!-- in tsv we can have cols like: pep, avg_m/z, avg rt, m/z window, rt window, i_s1, i_s2, ...-->
	  <data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - HTML report"/>
	  <!-- here we show the samples extracted and the files used to 'build up' each sample -->
	</outputs>
	<tests>
	</tests>
  <help>
  
.. class:: infomark
  
This tool takes in multiple peptide identification result files that have peptide identifications 
coupled to some quantification (e.g. precursor intensity information or for example data coming 
from MS^E acquisition where peptide identification and quantification are done in the same run and reported together). 
Then, based on the given experiment design parameters (i.e. how the result files related back to 
replicate runs and samples), it produces a new file in which the peptides are reported with 
their calculated quantifications at the sample level. 

The figure below explains this: 

.. image:: $PATH_TO_IMAGES/napq_overview.png 

.




  </help>
</tool>