Mercurial > repos > pieterlukasse > prims_metabolomics

diff msclust.xml @ 62:9bd2597c8851 default tip
r
author: pieter.lukasse@wur.nl
date: Fri, 06 Feb 2015 15:49:26 +0100
parents: d685210eef3e
--- a/msclust.xml	Fri Dec 19 15:30:13 2014 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,356 +0,0 @@
-<tool name="MsClust" id="msclust2" version="2.0.7">
-	<description>Extracts fragmentation spectra from aligned data</description>
-	<!-- 
-	   For remote debugging start you listener on port 8000 and use the following as command interpreter:
-	       java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000 
-	                    //////////////////////////
-	       
-	       TODO in command below: add conditionals according to options of using or NOT the tolerances/thresholds from previous steps 
-	    -->
-	<command interpreter="java -jar ">
-	    MsClust.jar 
-	   	-peaksFileName $inputPeaks 
-	   	-dataType $dataType
-        -imputationMethod $imputationMethod.type
-        #if $imputationMethod.type == "valueRange"
-        	-rangeUpperLimit $imputationMethod.rangeUpperLimit
-        #end if
-		-plInputFormat $plInputFormat 
-		-potDensFuncType $potDensFuncType.type 
-		-centerSelectionType $centerSelectionType.type 
-		-clusteringType $clusteringType.type 
-		-neighborhoodWindowSize $potDensFuncType.pdf_neighborhoodWindowSize 
-		-clusterSearchStopCriterium $centerSelectionType.cs_stop_criterion
-		-pearsonDistTreshold $potDensFuncType.pdf_pears_treshold
-		-pearsonTresholdConfidence $potDensFuncType.pdf_pears_conf
-		-pearsonPDReductionThreshold $centerSelectionType.cs_pears_pd_reductionTreshold
-		-pearsonPDReductionSlope $centerSelectionType.cs_pears_pd_reductionSlope
-		-rtDistTolUnit $potDensFuncType.rt_dist_tol_unit.type
-		-rtDistTol $potDensFuncType.rt_dist_tol_unit.pdf_rt_toler
-		-rtDistanceConfidence $potDensFuncType.pdf_scan_conf
-		#if $clusteringType.type == "original"
-			-clustMembershipCutoff $clusteringType.clust_membership_cutoff
-		#end if  
-		-centrotypesOut $centrotypesOut 
-		-simOut $simOut
-		-micOut $micOut
-		-mspOut $mspOut 
-		-classOut $classOut
-		-outReport $htmlReportFile
-	    -outReportPicturesPath $htmlReportFile.files_path
-        #if $clusteringType.type == "fuzzyCMeans"
-        	-fcmMembershipWeightingExponent $clusteringType.fcmMembershipWeightingExponent 
-			-fcmStopCriterion $clusteringType.fcmStopCriterion
-			-fcmCorrelationWeight $clusteringType.fcmCorrelationWeight
-			-fcmFinalAssemblyType $clusteringType.finalClusterAssembly.type
-			#if $clusteringType.finalClusterAssembly.type == "membershipBased"
-				-fcmMembershipCutoff $clusteringType.finalClusterAssembly.fcmMembershipCutoff
-			#end if
-        #end if
-		-verbose "false"
-	    #if $advancedSettings.settings == True
-	    	-advancedSettings YES
-	    	-saturationLimit $advancedSettings.saturationLimit
-	    	-sampleSelectionSortType $advancedSettings.sampleSelectionSortType
-	    	-simSelectionAlgorithm $advancedSettings.simSelectionAlgorithm
-	    	-simMassFilter "$advancedSettings.simMassFilter"
-	    	-simMembershipThreshold $advancedSettings.simMembershipThreshold
-	    	-simSaturationThreshold $advancedSettings.simSaturationThreshold
-	    	-simAbsenseThreshold $advancedSettings.simAbsenseThreshold
-	    	-micMembershipThreshold $advancedSettings.micMembershipThreshold
-	    	-peakIntensityCorrectionAlgorithm $advancedSettings.peakIntensityCorrectionAlgorithm
-        #else
-        	-advancedSettings YES
-        	-sampleSelectionSortType SIM_INTENSITY
-        	-peakIntensityCorrectionAlgorithm CORRELATION_BASED
-        #end if
-	    
-	</command>
-	<inputs>
-
-	 	<param name="inputPeaks" type="data" format="txt" label="Ion-wise aligned data (e.g. MetAlign or XCMS/metaMS output data)" />
-	 	<param name="plInputFormat" type="select" size="30" label="Data format">
-			<option value="metalign"  selected="true">MetAlign</option>
-			<option value="xcms">XCMS/metaMS (beta)</option>
-		</param>
-		<param name="dataType" type="select" size="30" label="Data type">
-			<option value="gcms"  selected="true">GC-MS</option>
-			<option value="lcms">LC-MS</option>
-		</param>
-	 	<conditional name="imputationMethod">
-			<param name="type" type="select" size="30" label="Select the approach used for imputing missing values (optional)" help="select how you generated the values to fill in the data gaps">
-				<option value="none" >none</option>
-				<option value="metot" selected="true">MeTot</option>
-				<option value="valueRange">Values range</option>
-			</param>
-			<when value="valueRange">
-				<param name="rangeUpperLimit" type="integer" size="10" value="0" label="Range upper limit" help="values up to this limit will be considered 'generated' values"  />
-			</when>
-			<when value="metot">
-     		</when>
-     		<when value="none">
-     		</when>
-		</conditional>		    
-	 	<conditional name="potDensFuncType">
-		    <param name="type" type="select" size="30" label="Select PD function type =====================================================">
-		      <option value="original" selected="true">Original</option>
-		    </param>
-		    <when value="original">
-		      <param name="pdf_neighborhoodWindowSize" type="integer" size="10" value="200" label="Effective Peaks"  />
-		      <conditional name="rt_dist_tol_unit">
-		      	<param name="type" type="select" size="30" label="Peak time unit">
-		      		<option value="1" selected="true">scan nr (MetAlign)</option>
-		      		<option value="2" >(average) micro minutes (MetAlign)</option>
-		      		<option value="3" >(average) minutes (XCMS)</option>
-		      	</param>
-		      	<when value="1">
-		      		<param name="pdf_rt_toler" type="float" size="10" value="10" label="Peak Width, in scans"  />
-		      	</when>
-		      	<when value="2">
-		      		<param name="pdf_rt_toler" type="float" size="10" value="100000" label="Peak Width, in micro minutes" help="e.g. 100,000=6 seconds" />
-		      	</when>
-		      	<when value="3">
-		      		<param name="pdf_rt_toler" type="float" size="10" value="0.1" label="Peak Width, in minutes" help="e.g. 0.1=6 seconds" />
-		      	</when>
-		      </conditional>
-		      <param name="pdf_scan_conf" type="float" size="10" value="80" label="Peak Width confidence (0.0 to 99.99)" help="example: 0[no confidence]...50[good guess]...99.9[quite certain])" />
-		      <param name="pdf_pears_treshold" type="float" size="10" value="0.8" label="Correlation threshold (0.0 - 1.0)" />
-		      <param name="pdf_pears_conf" type="float" size="10" value="98.0" label="Correlation threshold confidence (0.0 to 99.99)" help="example: 0[no confidence]...50[good guess]...99.9[quite certain])" />
-		    </when>
-		</conditional>
-		<conditional name="centerSelectionType">
-		    <param name="type" type="select" label="Initial Centers selection type ==================================================" >
-		      <option value="original" selected="true">Original - Subtractive potential reductions with stop criterion and REUSE tolerances (from PD function)</option>
-		    </param>
-		    <when value="original">
-		      <param name="cs_pears_pd_reductionTreshold" type="float" size="10" value="0.8" label="Potential Density reduction (0.0 - 1.0)"  />
-		      <param name="cs_pears_pd_reductionSlope" type="float" size="10" value="0.01" label="Potential Density reduction softness "  />
-		      <param name="cs_stop_criterion" type="float" size="10" value="2" label="Stop Criterion "  />
-			</when>
-		</conditional>
-		<conditional name="clusteringType">
-		    <param name="type" type="select" label="Classify using ===========================================================">
-		      <option value="original" selected="true">Original - Fuzzy clustering, keep original centers and REUSE (scan distance) tolerances</option>
-		      <option value="fuzzyCMeans">(experimental) Fuzzy C-Means - Fuzzy clustering, optimize centers</option>
-		    </param>
-		    <when value="original">
-		    	<param name="clust_membership_cutoff" type="float" size="10" value=""
-		    	       label="Membership cutoff (0.0 - 1.0)"
-		    	       help="Items with membership below this value are NOT added to the cluster"/>
-			</when>
-		    <!-- one idea would be to have clustering specific tolerance values, not reusing the centrotype selection ones
-		    <when value="originalNewTol">
-		      <param name="clust_scan_toler" type="float" size="10" value="10" label="Peak Width, in scans"  />
-		      <param name="clust_scan_slope" type="float" size="10" value="2" label="Peak Width margin softness"  />
-		    </when>
-		    -->
-		    <when value="fuzzyCMeans">
-				<param name="fcmMembershipWeightingExponent" type="float" size="10" value="2.0" label="Membership Weighting Exponent" help="Influences cluster center repositioning in the iterations 1.1 (exploratory) to around 3.0 (conservative)" />
-				<param name="fcmStopCriterion" type="float" size="10" value="0.05" label="Stop Criterion" help="When convergence is 'reached' (e.g. 0.05 means memberships only changed with 5% in last iteration)" />
-				<param name="fcmCorrelationWeight" type="float" size="10" value="2" label="Correlation weight factor" help="Increase this if you think the correlation is reliable (e.g. you have a high number of samples)" />
-				<conditional name="finalClusterAssembly">
-					<param name="type" type="select" label="Final cluster assembly" >
-				      <option value="original" selected="true">Original - distance based</option>
-				      <option value="membershipBased">Membership based</option>
-				    </param>
-					<when value="membershipBased">
-						<param name="fcmMembershipCutoff" type="select" label="Maximum allowed peak overlap" >
-							<option value="0.05" >~7 clusters</option>
-							<option value="0.10" >~5 clusters</option>
-							<option value="0.20" >~3 clusters</option>
-						</param>
-					</when>
-					<when value="original">
-					    <!-- nothing -->
-					</when>
-				</conditional>
-		    </when>
-		</conditional>
-		
-		<param name="summaryReport" type="boolean" checked="true" label="Generate summary report" help="NB: this will increase the processing time (in some cases up to a few extra minutes)"/>
-     	
-        <conditional name="advancedSettings">
-     		<param name="settings" type="boolean" truevalue="Yes" falsevalue="No" checked="false" label="Advanced settings ========================================================"/>
-     		<when value="Yes">
-     			<param name="saturationLimit" optional="true" type="integer" size="10" label="Saturation limit (optional)" help="fill in if you have saturation problems in your data"  />
-	 			<param name="sampleSelectionSortType"  type="select" label="Sample selection scheme for spectrum peak intensity correction algorithm (optional/experimental)" help="The intensity values to use to select the samples for each cluster/metabolite in which it is most intense/abundant. These samples are used in the peak intensity correction (see parameter below). Use this option to try to avoid samples that have insufficient signal or saturation."  >
-     				<option value="None">None</option>
-     				<!-- in order of best FORWARD scoring when tested on /test/data/report_test_sets/(P2) Relative peak heights in spectra/Input (Test set 1) -->
-     				<option value="SIM_INTENSITY" selected="true">SIM intensities</option>
-		    		<option value="MAX_INTENSITY">Maximum intensities</option>
-     				<option value="CENTROTYPE_INTENSITY">Centrotype peak intensities</option>
-		    		<option value="MIC_INTENSITY">MIC intensities</option>		    		
-     			</param>
-     			<param name="peakIntensityCorrectionAlgorithm"  type="select" label="Spectrum peak intensity correction algorithm (optional/experimental)" help="Whether spectrum peak heights should be adjusted according to their membership to the cluster or to their correlation to the cluster's centrotype ion"  >
-     				<option value="MEMBERSHIP_BASED">Membership based (msclust 1.0 mode)</option>
-		    		<option value="CORRELATION_BASED" selected="true">Correlation based</option>
-     			</param>     			
-     			<param name="simSelectionAlgorithm" type="select" label="SIM selection algorithm (experimental)" help="Set this if you want to deviate from the standard which is: allow shared SIM peaks for GC-MS data, and force unique SIM peaks for LC-MS data">
-     				<option value="" selected="true"></option>
-     				<option value="uniqueSIM">Unique SIM peak</option>
-		    		<option value="sharedSIM">Shared SIM peak</option>
-     			</param>
-     			<param name="simMassFilter" type="text" optional="true" size="30" label="SIM mass exclusion list" help="Comma-separated list of masses NOT to use as SIM peaks. E.g. '73,147,...' " />
-     			<param name="simMembershipThreshold" optional="true" type="float" size="10" label="SIM membership threshold" help="Minimum membership a peak should have to qualify as a SIM candidate. E.g. 0.8 " />
-     			<param name="simSaturationThreshold" optional="true" type="float" size="10" label="SIM saturation threshold (%)" help="Maximum % of samples in which a SIM candidate peak may be saturated. If the candidate peak exceeds this threshold, then another peak is chosen. If no peak can be found this criteria, mass 0 is reported" />
-     			<param name="simAbsenseThreshold" optional="true" type="float" size="10" label="SIM absence threshold (%)" help="Maximum % of samples in which a SIM candidate peak may be absent. If the candidate peak exceeds this threshold, then another peak is chosen. If no peak can be found meeting this criteria, mass 0 is reported" />
-     			
-     			<param name="micMembershipThreshold" optional="true" type="float" size="10" label="MIC membership threshold" help="Minimum membership a peak should have to be counted in the MIC sum. E.g. 0.8 " />
-     			
-     		</when>
-     		<when value="No">
-			</when>
-     	</conditional>	
-
-     	
-	</inputs>
-	<outputs>
-	  <data name="centrotypesOut" format="msclust.csv" label="${tool.name} on ${on_string} - centrotypes file"/>
-	  <data name="simOut" format="msclust.csv" label="${tool.name} on ${on_string} - SIM file"/>
-	  <data name="micOut" format="msclust.csv" label="${tool.name} on ${on_string} - MIC file"/>
-	   <data name="mspOut" format="msp" label="${tool.name} on ${on_string} - SPECTRA file"/>
-	  <data name="classOut" format="msclust.csv" label="${tool.name} on ${on_string} - Classification file"/>
-	  <data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - HTML report">
-	 	<!-- If the expression is false, the file is not created -->
-	  	<filter>( summaryReport == True )</filter>
-	  </data>
-	</outputs>
-	<tests>
-	  <!--  find out how to use -->
-	</tests>
-  <help>
-
-<!-- see also http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#hyperlink-targets -->
-  
-.. class:: infomark
-  
-This tool extracts spectra from ion-wise aligned MS(/MS) results. It uses expression profiles and 
-retention times of the putative ions to cluster them. Each cluster is then used to generate 
-one spectrum containing the clustered ions (peaks). 
-
-.. image:: msclust_summary.png 
-
-
------
-
-**Input**
-
-The input file should contain the following columns (in this order), followed by the sample intensity columns (one column with the
-intensity value for each sample):
-
-*ScanNR*
-	
-*Ret(umin)*
-	
-*Mass(uD)*
-	
-*(Optional)retentionMean*
-
-*(only required if retentionMean is present)retentionSD*
-
-*N sample intensity columns...*
-
-
------
-
-**Output**
-
-This tools returns a number of ouptut files and a small report. 
-
-**Parameters index**
-
-
-*Select the approach used for imputing missing values:* only select this if you have used a specific method to 
-fill in the data gaps in the input file. One example is replacing zero values by some randomly generated low value.
-If MeTot is chosen, then a value is considered generated if: the value contains a dot '.' and some number 
-other than 0 (zero) after the dot. 
-
-*Effective Peaks:* Neighborhood window size to consider when calculating density. Smaller values increase 
-performance but are less reliable.
-
-*Peak Width, in scans:* Scan window width of scans to consider 'close'. One can see this as the 
-'tolerated variation in scans' for the apex positions of the fragment peaks composing a cluster. 
-Note: if MetAlign was used, this is the variation *after* pre-processing by MetAlign.   
-
-*Peak Width confidence:* The higher the confidence, the stricter the threshold.
-
-*Correlation threshold (0.0 - 1.0):* Tolerance center for pearson distance calculation. The higher this value, 
-the higher the correlation between 2 items has to be for them to be considered 'close'. 
-
-*Correlation threshold confidence:* The higher the confidence, the stricter the threshold. `More...`__
-
-*Potential Density reduction (0.0 - 1.0):* Reduction tolerance center for pearson distance calculation. 
-The higher this value, the less the low correlated items get reduced, getting a chance to form a cluster of their own. 
-
-*Potential Density reduction softness:* Reduction curve slope for pearson distance tolerance. Lower 
-values = stricter separation at the value determined in 'Potential Density reduction' above  
-(TODO review this comment). 
-
-*Stop Criterion:* When to stop reducing and looking for new clusters. Lower values = more iterations 
-
-.. __: javascript:window.open('.. image:: confidence_and_slope_params_explain.png'.replace('.. image:: ', ''),'popUpWindow','height=700,width=800,left=10,top=10,resizable=yes,scrollbars=yes,toolbar=yes,menubar=no,location=no,directories=no,status=yes')
-
-
------
-
-**Output files described below**
-
------
-
-*SPECTRA:* this file can be submitted to NIST for identification of the spectra.
-
-`Click here for more details on the Sample selection and Spectrum peak intensity correction algorithm parameters related to SPECTRA generation`_  
-
-.. _Click here for more details on the Sample selection and Spectrum peak intensity correction algorithm parameters related to SPECTRA generation: javascript:window.open('.. image:: sample_sel_and_peak_height_correction.png'.replace('.. image:: ', ''),'popUpWindow','height=700,width=800,left=10,top=10,resizable=yes,scrollbars=yes,toolbar=yes,menubar=no,location=no,directories=no,status=yes')
-
------
-
-*MIC:* stands for Measured Ions Count -> it contains, for each cluster, the sum of the ion count 
-values (corrected by their membership) for all MEASURED cluster ions in the given sample.
-
-The MIC for a **cluster i** in **sample s**, where **cluster i** has **n** members is thus: 
-
-sum ( [intensity of member n in **sample s**] x [membership value of member n in **cluster i** ] )
-
------
-
-*SIM:* stands for Selective Ion Mode ->  it contains, for each cluster, the intensity values of the 
-most representative member ion peak of this cluster. The most representative member peak is the one with the 
-highest membership*average_intensity. This definition leads to conflicts as a peak can have a 
-membership in two or more clusters. The assignment of a SIM peak to a cluster depends on 
-the configured data type (LC or GC-MS). NB: this can be overruled in the "advanced settings":
-
-(1) LC-MS SIM: select SIM peak only once and for the centrotype in which this specific mass has its 
-highest membership; for neighboring centrotypes use its "second best SIM", etcetera. In other words,
-if the SIM peak has been identified as the SIM in more than 1 cluster, assign as SIM to the cluster 
-with highest membership. Continue searching for other SIM peaks to assign to the other clusters until 
-all ambiguities are solved.
-
-(2) GC-MS SIM: the SIM peak can be "shared" by multiple clusters. However, the intensity values are corrected
-by the membership value of the peak in the cluster in case the SIM peak is "shared". If the SIM peak is not 
-"shared" then the "raw" intensity values of the SIM peak are recorded in the SIM file. 
-
-`Click here for more details on the SIM output file`_  
-
-.. _Click here for more details on the SIM output file: javascript:window.open('.. image:: sample_SIM.png'.replace('.. image:: ', ''),'popUpWindow','height=700,width=800,left=10,top=10,resizable=yes,scrollbars=yes,toolbar=yes,menubar=no,location=no,directories=no,status=yes')
-
-
-**References**
-
-If you use this Galaxy tool in work leading to a scientific publication please
-cite the following papers:
-
-Y. M. Tikunov, S. Laptenok, R. D. Hall, A. Bovy, and R. C. H. de Vos (2012).
-MSClust: a tool for unsupervised mass spectra extraction of 
-chromatography-mass spectrometry ion-wise aligned data
-http://dx.doi.org/10.1007%2Fs11306-011-0368-2
-
-  <citations>
-        <citation type="doi">10.1007%2Fs11306-011-0368-2</citation> <!-- example 
-        see also https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccitations.3E_tag_set
-        -->
-   </citations>
-
-
-  </help>
-</tool>
author	pieter.lukasse@wur.nl
date	Fri, 06 Feb 2015 15:49:26 +0100
parents	d685210eef3e
children