Mercurial > repos > pieterlukasse > prims_metabolomics
changeset 3:2c1c9f0d8aa6
using normal versioning
author | pieter.lukasse@wur.nl |
---|---|
date | Fri, 17 Jan 2014 12:39:28 +0100 |
parents | a35b55bfe96c |
children | 80075a4c6543 |
files | msclust.xml msclust2.0.1.xml |
diffstat | 2 files changed, 289 insertions(+), 289 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/msclust.xml Fri Jan 17 12:39:28 2014 +0100 @@ -0,0 +1,289 @@ +<tool name="MsClust" id="msclust2" version="2.0.2"> + <description>Extracts fragmentation spectra from aligned data</description> + <!-- + For remote debugging start you listener on port 8000 and use the following as command interpreter: + java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000 + ////////////////////////// + + TODO in command below: add conditionals according to options of using or NOT the tolerances/thresholds from previous steps + --> + <command interpreter="java -jar "> + MsClust.jar + -peaksFileName $inputPeaks + -dataType $dataType + -imputationMethod $imputationMethod.type + #if $imputationMethod.type == "valueRange" + -rangeUpperLimit $imputationMethod.rangeUpperLimit + #end if + -plInputFormat "metalign" + -potDensFuncType $potDensFuncType.type + -centerSelectionType $centerSelectionType.type + -clusteringType $clusteringType.type + -neighborhoodWindowSize $potDensFuncType.pdf_neighborhoodWindowSize + -clusterSearchStopCriterium $centerSelectionType.cs_stop_criterion + -pearsonDistTreshold $potDensFuncType.pdf_pears_treshold + -pearsonTresholdConfidence $potDensFuncType.pdf_pears_conf + -pearsonPDReductionThreshold $centerSelectionType.cs_pears_pd_reductionTreshold + -pearsonPDReductionSlope $centerSelectionType.cs_pears_pd_reductionSlope + -scanDistTol $potDensFuncType.pdf_scan_toler + -scanDistanceConfidence $potDensFuncType.pdf_scan_conf + -centrotypesOut $centrotypesOut + -simOut $simOut + -micOut $micOut + -mspOut $mspOut + -classOut $classOut + -outReport $htmlReportFile + -outReportPicturesPath $htmlReportFile.files_path + #if $clusteringType.type == "fuzzyCMeans" + -fcmMembershipWeightingExponent $clusteringType.fcmMembershipWeightingExponent + -fcmStopCriterion $clusteringType.fcmStopCriterion + -fcmCorrelationWeight $clusteringType.fcmCorrelationWeight + -fcmFinalAssemblyType $clusteringType.finalClusterAssembly.type + #if $clusteringType.finalClusterAssembly.type == "membershipBased" + -fcmMembershipCutoff $clusteringType.finalClusterAssembly.fcmMembershipCutoff + #end if + #end if + -verbose "false" + #if $advancedSettings.settings == True + -advancedSettings YES + -saturationLimit $advancedSettings.saturationLimit + -sampleSelectionSortType $advancedSettings.sampleSelectionSortType + -simSelectionAlgorithm $advancedSettings.simSelectionAlgorithm + -simMassFilter "$advancedSettings.simMassFilter" + -simMembershipThreshold $advancedSettings.simMembershipThreshold + -simSaturationThreshold $advancedSettings.simSaturationThreshold + -simAbsenseThreshold $advancedSettings.simAbsenseThreshold + -micMembershipThreshold $advancedSettings.micMembershipThreshold + -peakIntensityCorrectionAlgorithm $advancedSettings.peakIntensityCorrectionAlgorithm + #else + -advancedSettings YES + -sampleSelectionSortType SIM_INTENSITY + -peakIntensityCorrectionAlgorithm CORRELATION_BASED + #end if + + </command> + <inputs> + <!-- <param name="rankingWeightConfig" type="text" area="true" size="11x70" label="NB - TEST VERSION" +value="VERSION BEING TESTED AT THIS MOMENT...NOT READY FOR USE..."/> + --> + <param name="inputPeaks" type="data" format="txt" label="Ion-wise aligned data (e.g. MetAlign output data)" /> + <param name="dataType" type="select" size="30" label="Data type"> + <option value="gcms" selected="true">GC-MS</option> + <option value="lcms">LC-MS</option> + </param> + <conditional name="imputationMethod"> + <param name="type" type="select" size="30" label="Select the approach used for imputing missing values (optional)" help="select how you generated the values to fill in the data gaps"> + <option value="none" >none</option> + <option value="metot" selected="true">MeTot</option> + <option value="valueRange">Values range</option> + </param> + <when value="valueRange"> + <param name="rangeUpperLimit" type="integer" size="10" value="0" label="Range upper limit" help="values up to this limit will be considered 'generated' values" /> + </when> + </conditional> + <conditional name="potDensFuncType"> + <param name="type" type="select" size="30" label="Select PD function type ====================================================="> + <option value="original" selected="true">Original</option> + </param> + <when value="original"> + <param name="pdf_neighborhoodWindowSize" type="integer" size="10" value="200" label="Effective Peaks" /> + <param name="pdf_scan_toler" type="float" size="10" value="10" label="Peak Width, in scans" /> + <param name="pdf_scan_conf" type="float" size="10" value="80" label="Peak Width confidence (0.0 to 99.99)" help="example: 0[no confidence]...50[good guess]...99.9[quite certain])" /> + <param name="pdf_pears_treshold" type="float" size="10" value="0.8" label="Correlation threshold (0.0 - 1.0)" /> + <param name="pdf_pears_conf" type="float" size="10" value="98.0" label="Correlation threshold confidence (0.0 to 99.99)" help="example: 0[no confidence]...50[good guess]...99.9[quite certain])" /> + </when> + </conditional> + <conditional name="centerSelectionType"> + <param name="type" type="select" label="Initial Centers selection type ==================================================" > + <option value="original" selected="true">Original - Subtractive potential reductions with stop criterion and REUSE tolerances (from PD function)</option> + </param> + <when value="original"> + <param name="cs_pears_pd_reductionTreshold" type="float" size="10" value="0.8" label="Potential Density reduction (0.0 - 1.0)" /> + <param name="cs_pears_pd_reductionSlope" type="float" size="10" value="0.01" label="Potential Density reduction softness " /> + <param name="cs_stop_criterion" type="float" size="10" value="2" label="Stop Criterion " /> + </when> + </conditional> + <conditional name="clusteringType"> + <param name="type" type="select" label="Classify using ==========================================================="> + <option value="original" selected="true">Original - Fuzzy clustering, keep original centers and REUSE (scan distance) tolerances</option> + <option value="fuzzyCMeans">(experimental) Fuzzy C-Means - Fuzzy clustering, optimize centers</option> + </param> + <when value="original"> + <!-- nothing --> + </when> + <when value="originalNewTol"> + <param name="clust_scan_toler" type="float" size="10" value="10" label="Peak Width, in scans" /> + <param name="clust_scan_slope" type="float" size="10" value="2" label="Peak Width margin softness" /> + </when> + <when value="fuzzyCMeans"> + <param name="fcmMembershipWeightingExponent" type="float" size="10" value="2.0" label="Membership Weighting Exponent" help="Influences cluster center repositioning in the iterations 1.1 (exploratory) to around 3.0 (conservative)" /> + <param name="fcmStopCriterion" type="float" size="10" value="0.05" label="Stop Criterion" help="When convergence is 'reached' (e.g. 0.05 means memberships only changed with 5% in last iteration)" /> + <param name="fcmCorrelationWeight" type="float" size="10" value="2" label="Correlation weight factor" help="Increase this if you think the correlation is reliable (e.g. you have a high number of samples)" /> + <conditional name="finalClusterAssembly"> + <param name="type" type="select" label="Final cluster assembly" > + <option value="original" selected="true">Original - distance based</option> + <option value="membershipBased">Membership based</option> + </param> + <when value="membershipBased"> + <param name="fcmMembershipCutoff" type="select" label="Maximum allowed peak overlap" > + <option value="0.05" >~7 clusters</option> + <option value="0.10" >~5 clusters</option> + <option value="0.20" >~3 clusters</option> + </param> + </when> + <when value="original"> + <!-- nothing --> + </when> + </conditional> + </when> + </conditional> + + <param name="summaryReport" type="boolean" checked="true" label="Generate summary report" help="NB: this will increase the processing time (in some cases up to a few extra minutes)"/> + + <conditional name="advancedSettings"> + <param name="settings" type="boolean" truevalue="Yes" falsevalue="No" checked="false" label="Advanced settings ========================================================"/> + <when value="Yes"> + <param name="saturationLimit" optional="true" type="integer" size="10" label="Saturation limit (optional)" help="fill in if you have saturation problems in your data" /> + <param name="sampleSelectionSortType" type="select" label="Sample selection scheme for spectrum peak intensity correction algorithm (optional/experimental)" help="The intensity values to use to select the samples for each cluster/metabolite in which it is most intense/abundant. These samples are used in the peak intensity correction (see parameter below). Use this option to try to avoid samples that have insufficient signal or saturation." > + <option value="None">None</option> + <!-- in order of best FORWARD scoring when tested on /test/data/report_test_sets/(P2) Relative peak heights in spectra/Input (Test set 1) --> + <option value="SIM_INTENSITY" selected="true">SIM intensities</option> + <option value="MAX_INTENSITY">Maximum intensities</option> + <option value="CENTROTYPE_INTENSITY">Centrotype peak intensities</option> + <option value="MIC_INTENSITY">MIC intensities</option> + </param> + <param name="peakIntensityCorrectionAlgorithm" type="select" label="Spectrum peak intensity correction algorithm (optional/experimental)" help="Whether spectrum peak heights should be adjusted according to their membership to the cluster or to their correlation to the cluster's centrotype ion" > + <option value="MEMBERSHIP_BASED">Membership based (msclust 1.0 mode)</option> + <option value="CORRELATION_BASED" selected="true">Correlation based</option> + </param> + <param name="simSelectionAlgorithm" type="select" label="SIM selection algorithm (experimental)" help="Set this if you want to deviate from the standard which is: allow shared SIM peaks for GC-MS data, and force unique SIM peaks for LC-MS data"> + <option value="" selected="true"></option> + <option value="uniqueSIM">Unique SIM peak</option> + <option value="sharedSIM">Shared SIM peak</option> + </param> + <param name="simMassFilter" type="text" optional="true" size="30" label="SIM mass exclusion list" help="Comma-separated list of masses NOT to use as SIM peaks. E.g. '73,147,...' " /> + <param name="simMembershipThreshold" optional="true" type="float" size="10" label="SIM membership threshold" help="Minimum membership a peak should have to qualify as a SIM candidate. E.g. 0.8 " /> + <param name="simSaturationThreshold" optional="true" type="float" size="10" label="SIM saturation threshold (%)" help="Maximum % of samples in which a SIM candidate peak may be saturated. If the candidate peak exceeds this threshold, then another peak is chosen. If no peak can be found this criteria, mass 0 is reported" /> + <param name="simAbsenseThreshold" optional="true" type="float" size="10" label="SIM absence threshold (%)" help="Maximum % of samples in which a SIM candidate peak may be absent. If the candidate peak exceeds this threshold, then another peak is chosen. If no peak can be found meeting this criteria, mass 0 is reported" /> + + <param name="micMembershipThreshold" optional="true" type="float" size="10" label="MIC membership threshold" help="Minimum membership a peak should have to be counted in the MIC sum. E.g. 0.8 " /> + + </when> + </conditional> + + + </inputs> + <outputs> + <data name="centrotypesOut" format="msclust.csv" label="${tool.name} on ${on_string} - centrotypes file"/> + <data name="simOut" format="msclust.csv" label="${tool.name} on ${on_string} - SIM file"/> + <data name="micOut" format="msclust.csv" label="${tool.name} on ${on_string} - MIC file"/> + <data name="mspOut" format="msp" label="${tool.name} on ${on_string} - SPECTRA file"/> + <data name="classOut" format="msclust.csv" label="${tool.name} on ${on_string} - Classification file"/> + <data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - HTML report"> + <!-- If the expression is false, the file is not created --> + <filter>( summaryReport == True )</filter> + </data> + </outputs> + <tests> + <!-- find out how to use --> + </tests> + <help> + +<!-- see also http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#hyperlink-targets --> + +.. class:: infomark + +This tool extracts spectra from ion-wise aligned MS(/MS) results. It uses expression profiles and +retention times of the putative ions to cluster them. Each cluster is then used to generate +one spectrum containing the clustered ions (peaks). + +.. image:: $PATH_TO_IMAGES/msclust_summary.png + + +----- + +**Output** + +This tools returns a number of ouptut files and a small report. + +**Parameters index** + + +*Select the approach used for imputing missing values:* only select this if you have used a specific method to +fill in the data gaps in the input file. One example is replacing zero values by some randomly generated low value. +If MeTot is chosen, then a value is considered generated if: the value contains a dot '.' and some number +other than 0 (zero) after the dot. + +*Effective Peaks:* Neighborhood window size to consider when calculating density. Smaller values increase +performance but are less reliable. + +*Peak Width, in scans:* Scan window width of scans to consider 'close'. One can see this as the +'tolerated variation in scans' for the apex positions of the fragment peaks composing a cluster. +Note: if MetAlign was used, this is the variation *after* pre-processing by MetAlign. + +*Peak Width confidence:* The higher the confidence, the stricter the threshold. + +*Correlation threshold (0.0 - 1.0):* Tolerance center for pearson distance calculation. The higher this value, +the higher the correlation between 2 items has to be for them to be considered 'close'. + +*Correlation threshold confidence:* The higher the confidence, the stricter the threshold. `More...`__ + +*Potential Density reduction (0.0 - 1.0):* Reduction tolerance center for pearson distance calculation. +The higher this value, the less the low correlated items get reduced, getting a chance to form a cluster of their own. + +*Potential Density reduction softness:* Reduction curve slope for pearson distance tolerance. Lower +values = stricter separation at the value determined in 'Potential Density reduction' above +(TODO review this comment). + +*Stop Criterion:* When to stop reducing and looking for new clusters. Lower values = more iterations + +.. __: javascript:window.open('$PATH_TO_IMAGES/confidence_and_slope_params_explain.png','popUpWindow','height=700,width=800,left=10,top=10,resizable=yes,scrollbars=yes,toolbar=yes,menubar=no,location=no,directories=no,status=yes') + + +----- + +**Output files described below** + +----- + +*SPECTRA:* this file can be submitted to NIST for identification of the spectra. + +`Click here for more details on the Sample selection and Spectrum peak intensity correction algorithm parameters related to SPECTRA generation`_ + +.. _Click here for more details on the Sample selection and Spectrum peak intensity correction algorithm parameters related to SPECTRA generation: javascript:window.open('$PATH_TO_IMAGES/sample_sel_and_peak_height_correction.png','popUpWindow','height=700,width=800,left=10,top=10,resizable=yes,scrollbars=yes,toolbar=yes,menubar=no,location=no,directories=no,status=yes') + +----- + +*MIC:* stands for Measured Ions Count -> it contains, for each cluster, the sum of the ion count +values (corrected by their membership) for all MEASURED cluster ions in the given sample. + +The MIC for a **cluster i** in **sample s**, where **cluster i** has **n** members is thus: + +sum ( [intensity of member n in **sample s**] x [membership value of member n in **cluster i** ] ) + +----- + +*SIM:* stands for Selective Ion Mode -> it contains, for each cluster, the intensity values of the +most representative member ion peak of this cluster. The most representative member peak is the one with the +highest membership*average_intensity. This definition leads to conflicts as a peak can have a +membership in two or more clusters. The assignment of a SIM peak to a cluster depends on +the configured data type (LC or GC-MS). NB: this can be overruled in the "advanced settings": + +(1) LC-MS SIM: select SIM peak only once and for the centrotype in which this specific mass has its +highest membership; for neighboring centrotypes use its "second best SIM", etcetera. In other words, +if the SIM peak has been identified as the SIM in more than 1 cluster, assign as SIM to the cluster +with highest membership. Continue searching for other SIM peaks to assign to the other clusters until +all ambiguities are solved. + +(2) GC-MS SIM: the SIM peak can be "shared" by multiple clusters. However, the intensity values are corrected +by the membership value of the peak in the cluster in case the SIM peak is "shared". If the SIM peak is not +"shared" then the "raw" intensity values of the SIM peak are recorded in the SIM file. + +`Click here for more details on the SIM output file`_ + +.. _Click here for more details on the SIM output file: javascript:window.open('$PATH_TO_IMAGES/sample_SIM.png','popUpWindow','height=700,width=800,left=10,top=10,resizable=yes,scrollbars=yes,toolbar=yes,menubar=no,location=no,directories=no,status=yes') + + + + </help> +</tool>
--- a/msclust2.0.1.xml Thu Jan 16 13:12:07 2014 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,289 +0,0 @@ -<tool name="MsClust" id="msclust2" version="2.0.1"> - <description>Extracts fragmentation spectra from aligned data</description> - <!-- - For remote debugging start you listener on port 8000 and use the following as command interpreter: - java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000 - ////////////////////////// - - TODO in command below: add conditionals according to options of using or NOT the tolerances/thresholds from previous steps - --> - <command interpreter="java -jar "> - MsClust.jar - -peaksFileName $inputPeaks - -dataType $dataType - -imputationMethod $imputationMethod.type - #if $imputationMethod.type == "valueRange" - -rangeUpperLimit $imputationMethod.rangeUpperLimit - #end if - -plInputFormat "metalign" - -potDensFuncType $potDensFuncType.type - -centerSelectionType $centerSelectionType.type - -clusteringType $clusteringType.type - -neighborhoodWindowSize $potDensFuncType.pdf_neighborhoodWindowSize - -clusterSearchStopCriterium $centerSelectionType.cs_stop_criterion - -pearsonDistTreshold $potDensFuncType.pdf_pears_treshold - -pearsonTresholdConfidence $potDensFuncType.pdf_pears_conf - -pearsonPDReductionThreshold $centerSelectionType.cs_pears_pd_reductionTreshold - -pearsonPDReductionSlope $centerSelectionType.cs_pears_pd_reductionSlope - -scanDistTol $potDensFuncType.pdf_scan_toler - -scanDistanceConfidence $potDensFuncType.pdf_scan_conf - -centrotypesOut $centrotypesOut - -simOut $simOut - -micOut $micOut - -mspOut $mspOut - -classOut $classOut - -outReport $htmlReportFile - -outReportPicturesPath $htmlReportFile.files_path - #if $clusteringType.type == "fuzzyCMeans" - -fcmMembershipWeightingExponent $clusteringType.fcmMembershipWeightingExponent - -fcmStopCriterion $clusteringType.fcmStopCriterion - -fcmCorrelationWeight $clusteringType.fcmCorrelationWeight - -fcmFinalAssemblyType $clusteringType.finalClusterAssembly.type - #if $clusteringType.finalClusterAssembly.type == "membershipBased" - -fcmMembershipCutoff $clusteringType.finalClusterAssembly.fcmMembershipCutoff - #end if - #end if - -verbose "false" - #if $advancedSettings.settings == True - -advancedSettings YES - -saturationLimit $advancedSettings.saturationLimit - -sampleSelectionSortType $advancedSettings.sampleSelectionSortType - -simSelectionAlgorithm $advancedSettings.simSelectionAlgorithm - -simMassFilter "$advancedSettings.simMassFilter" - -simMembershipThreshold $advancedSettings.simMembershipThreshold - -simSaturationThreshold $advancedSettings.simSaturationThreshold - -simAbsenseThreshold $advancedSettings.simAbsenseThreshold - -micMembershipThreshold $advancedSettings.micMembershipThreshold - -peakIntensityCorrectionAlgorithm $advancedSettings.peakIntensityCorrectionAlgorithm - #else - -advancedSettings YES - -sampleSelectionSortType SIM_INTENSITY - -peakIntensityCorrectionAlgorithm CORRELATION_BASED - #end if - - </command> - <inputs> - <!-- <param name="rankingWeightConfig" type="text" area="true" size="11x70" label="NB - TEST VERSION" -value="VERSION BEING TESTED AT THIS MOMENT...NOT READY FOR USE..."/> - --> - <param name="inputPeaks" type="data" format="txt" label="Ion-wise aligned data (e.g. MetAlign output data)" /> - <param name="dataType" type="select" size="30" label="Data type"> - <option value="gcms" selected="true">GC-MS</option> - <option value="lcms">LC-MS</option> - </param> - <conditional name="imputationMethod"> - <param name="type" type="select" size="30" label="Select the approach used for imputing missing values (optional)" help="select how you generated the values to fill in the data gaps"> - <option value="none" >none</option> - <option value="metot" selected="true">MeTot</option> - <option value="valueRange">Values range</option> - </param> - <when value="valueRange"> - <param name="rangeUpperLimit" type="integer" size="10" value="0" label="Range upper limit" help="values up to this limit will be considered 'generated' values" /> - </when> - </conditional> - <conditional name="potDensFuncType"> - <param name="type" type="select" size="30" label="Select PD function type ====================================================="> - <option value="original" selected="true">Original</option> - </param> - <when value="original"> - <param name="pdf_neighborhoodWindowSize" type="integer" size="10" value="200" label="Effective Peaks" /> - <param name="pdf_scan_toler" type="float" size="10" value="10" label="Peak Width, in scans" /> - <param name="pdf_scan_conf" type="float" size="10" value="80" label="Peak Width confidence (0.0 to 99.99)" help="example: 0[no confidence]...50[good guess]...99.9[quite certain])" /> - <param name="pdf_pears_treshold" type="float" size="10" value="0.8" label="Correlation threshold (0.0 - 1.0)" /> - <param name="pdf_pears_conf" type="float" size="10" value="98.0" label="Correlation threshold confidence (0.0 to 99.99)" help="example: 0[no confidence]...50[good guess]...99.9[quite certain])" /> - </when> - </conditional> - <conditional name="centerSelectionType"> - <param name="type" type="select" label="Initial Centers selection type ==================================================" > - <option value="original" selected="true">Original - Subtractive potential reductions with stop criterion and REUSE tolerances (from PD function)</option> - </param> - <when value="original"> - <param name="cs_pears_pd_reductionTreshold" type="float" size="10" value="0.8" label="Potential Density reduction (0.0 - 1.0)" /> - <param name="cs_pears_pd_reductionSlope" type="float" size="10" value="0.01" label="Potential Density reduction softness " /> - <param name="cs_stop_criterion" type="float" size="10" value="2" label="Stop Criterion " /> - </when> - </conditional> - <conditional name="clusteringType"> - <param name="type" type="select" label="Classify using ==========================================================="> - <option value="original" selected="true">Original - Fuzzy clustering, keep original centers and REUSE (scan distance) tolerances</option> - <option value="fuzzyCMeans">(experimental) Fuzzy C-Means - Fuzzy clustering, optimize centers</option> - </param> - <when value="original"> - <!-- nothing --> - </when> - <when value="originalNewTol"> - <param name="clust_scan_toler" type="float" size="10" value="10" label="Peak Width, in scans" /> - <param name="clust_scan_slope" type="float" size="10" value="2" label="Peak Width margin softness" /> - </when> - <when value="fuzzyCMeans"> - <param name="fcmMembershipWeightingExponent" type="float" size="10" value="2.0" label="Membership Weighting Exponent" help="Influences cluster center repositioning in the iterations 1.1 (exploratory) to around 3.0 (conservative)" /> - <param name="fcmStopCriterion" type="float" size="10" value="0.05" label="Stop Criterion" help="When convergence is 'reached' (e.g. 0.05 means memberships only changed with 5% in last iteration)" /> - <param name="fcmCorrelationWeight" type="float" size="10" value="2" label="Correlation weight factor" help="Increase this if you think the correlation is reliable (e.g. you have a high number of samples)" /> - <conditional name="finalClusterAssembly"> - <param name="type" type="select" label="Final cluster assembly" > - <option value="original" selected="true">Original - distance based</option> - <option value="membershipBased">Membership based</option> - </param> - <when value="membershipBased"> - <param name="fcmMembershipCutoff" type="select" label="Maximum allowed peak overlap" > - <option value="0.05" >~7 clusters</option> - <option value="0.10" >~5 clusters</option> - <option value="0.20" >~3 clusters</option> - </param> - </when> - <when value="original"> - <!-- nothing --> - </when> - </conditional> - </when> - </conditional> - - <param name="summaryReport" type="boolean" checked="true" label="Generate summary report" help="NB: this will increase the processing time (in some cases up to a few extra minutes)"/> - - <conditional name="advancedSettings"> - <param name="settings" type="boolean" truevalue="Yes" falsevalue="No" checked="false" label="Advanced settings ========================================================"/> - <when value="Yes"> - <param name="saturationLimit" optional="true" type="integer" size="10" label="Saturation limit (optional)" help="fill in if you have saturation problems in your data" /> - <param name="sampleSelectionSortType" type="select" label="Sample selection scheme for spectrum peak intensity correction algorithm (optional/experimental)" help="The intensity values to use to select the samples for each cluster/metabolite in which it is most intense/abundant. These samples are used in the peak intensity correction (see parameter below). Use this option to try to avoid samples that have insufficient signal or saturation." > - <option value="None">None</option> - <!-- in order of best FORWARD scoring when tested on /test/data/report_test_sets/(P2) Relative peak heights in spectra/Input (Test set 1) --> - <option value="SIM_INTENSITY" selected="true">SIM intensities</option> - <option value="MAX_INTENSITY">Maximum intensities</option> - <option value="CENTROTYPE_INTENSITY">Centrotype peak intensities</option> - <option value="MIC_INTENSITY">MIC intensities</option> - </param> - <param name="peakIntensityCorrectionAlgorithm" type="select" label="Spectrum peak intensity correction algorithm (optional/experimental)" help="Whether spectrum peak heights should be adjusted according to their membership to the cluster or to their correlation to the cluster's centrotype ion" > - <option value="MEMBERSHIP_BASED">Membership based (msclust 1.0 mode)</option> - <option value="CORRELATION_BASED" selected="true">Correlation based</option> - </param> - <param name="simSelectionAlgorithm" type="select" label="SIM selection algorithm (experimental)" help="Set this if you want to deviate from the standard which is: allow shared SIM peaks for GC-MS data, and force unique SIM peaks for LC-MS data"> - <option value="" selected="true"></option> - <option value="uniqueSIM">Unique SIM peak</option> - <option value="sharedSIM">Shared SIM peak</option> - </param> - <param name="simMassFilter" type="text" optional="true" size="30" label="SIM mass exclusion list" help="Comma-separated list of masses NOT to use as SIM peaks. E.g. '73,147,...' " /> - <param name="simMembershipThreshold" optional="true" type="float" size="10" label="SIM membership threshold" help="Minimum membership a peak should have to qualify as a SIM candidate. E.g. 0.8 " /> - <param name="simSaturationThreshold" optional="true" type="float" size="10" label="SIM saturation threshold (%)" help="Maximum % of samples in which a SIM candidate peak may be saturated. If the candidate peak exceeds this threshold, then another peak is chosen. If no peak can be found this criteria, mass 0 is reported" /> - <param name="simAbsenseThreshold" optional="true" type="float" size="10" label="SIM absence threshold (%)" help="Maximum % of samples in which a SIM candidate peak may be absent. If the candidate peak exceeds this threshold, then another peak is chosen. If no peak can be found meeting this criteria, mass 0 is reported" /> - - <param name="micMembershipThreshold" optional="true" type="float" size="10" label="MIC membership threshold" help="Minimum membership a peak should have to be counted in the MIC sum. E.g. 0.8 " /> - - </when> - </conditional> - - - </inputs> - <outputs> - <data name="centrotypesOut" format="msclust.csv" label="${tool.name} on ${on_string} - centrotypes file"/> - <data name="simOut" format="msclust.csv" label="${tool.name} on ${on_string} - SIM file"/> - <data name="micOut" format="msclust.csv" label="${tool.name} on ${on_string} - MIC file"/> - <data name="mspOut" format="msp" label="${tool.name} on ${on_string} - SPECTRA file"/> - <data name="classOut" format="msclust.csv" label="${tool.name} on ${on_string} - Classification file"/> - <data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - HTML report"> - <!-- If the expression is false, the file is not created --> - <filter>( summaryReport == True )</filter> - </data> - </outputs> - <tests> - <!-- find out how to use --> - </tests> - <help> - -<!-- see also http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#hyperlink-targets --> - -.. class:: infomark - -This tool extracts spectra from ion-wise aligned MS(/MS) results. It uses expression profiles and -retention times of the putative ions to cluster them. Each cluster is then used to generate -one spectrum containing the clustered ions (peaks). - -.. image:: $PATH_TO_IMAGES/msclust_summary.png - - ------ - -**Output** - -This tools returns a number of ouptut files and a small report. - -**Parameters index** - - -*Select the approach used for imputing missing values:* only select this if you have used a specific method to -fill in the data gaps in the input file. One example is replacing zero values by some randomly generated low value. -If MeTot is chosen, then a value is considered generated if: the value contains a dot '.' and some number -other than 0 (zero) after the dot. - -*Effective Peaks:* Neighborhood window size to consider when calculating density. Smaller values increase -performance but are less reliable. - -*Peak Width, in scans:* Scan window width of scans to consider 'close'. One can see this as the -'tolerated variation in scans' for the apex positions of the fragment peaks composing a cluster. -Note: if MetAlign was used, this is the variation *after* pre-processing by MetAlign. - -*Peak Width confidence:* The higher the confidence, the stricter the threshold. - -*Correlation threshold (0.0 - 1.0):* Tolerance center for pearson distance calculation. The higher this value, -the higher the correlation between 2 items has to be for them to be considered 'close'. - -*Correlation threshold confidence:* The higher the confidence, the stricter the threshold. `More...`__ - -*Potential Density reduction (0.0 - 1.0):* Reduction tolerance center for pearson distance calculation. -The higher this value, the less the low correlated items get reduced, getting a chance to form a cluster of their own. - -*Potential Density reduction softness:* Reduction curve slope for pearson distance tolerance. Lower -values = stricter separation at the value determined in 'Potential Density reduction' above -(TODO review this comment). - -*Stop Criterion:* When to stop reducing and looking for new clusters. Lower values = more iterations - -.. __: javascript:window.open('$PATH_TO_IMAGES/confidence_and_slope_params_explain.png','popUpWindow','height=700,width=800,left=10,top=10,resizable=yes,scrollbars=yes,toolbar=yes,menubar=no,location=no,directories=no,status=yes') - - ------ - -**Output files described below** - ------ - -*SPECTRA:* this file can be submitted to NIST for identification of the spectra. - -`Click here for more details on the Sample selection and Spectrum peak intensity correction algorithm parameters related to SPECTRA generation`_ - -.. _Click here for more details on the Sample selection and Spectrum peak intensity correction algorithm parameters related to SPECTRA generation: javascript:window.open('$PATH_TO_IMAGES/sample_sel_and_peak_height_correction.png','popUpWindow','height=700,width=800,left=10,top=10,resizable=yes,scrollbars=yes,toolbar=yes,menubar=no,location=no,directories=no,status=yes') - ------ - -*MIC:* stands for Measured Ions Count -> it contains, for each cluster, the sum of the ion count -values (corrected by their membership) for all MEASURED cluster ions in the given sample. - -The MIC for a **cluster i** in **sample s**, where **cluster i** has **n** members is thus: - -sum ( [intensity of member n in **sample s**] x [membership value of member n in **cluster i** ] ) - ------ - -*SIM:* stands for Selective Ion Mode -> it contains, for each cluster, the intensity values of the -most representative member ion peak of this cluster. The most representative member peak is the one with the -highest membership*average_intensity. This definition leads to conflicts as a peak can have a -membership in two or more clusters. The assignment of a SIM peak to a cluster depends on -the configured data type (LC or GC-MS). NB: this can be overruled in the "advanced settings": - -(1) LC-MS SIM: select SIM peak only once and for the centrotype in which this specific mass has its -highest membership; for neighboring centrotypes use its "second best SIM", etcetera. In other words, -if the SIM peak has been identified as the SIM in more than 1 cluster, assign as SIM to the cluster -with highest membership. Continue searching for other SIM peaks to assign to the other clusters until -all ambiguities are solved. - -(2) GC-MS SIM: the SIM peak can be "shared" by multiple clusters. However, the intensity values are corrected -by the membership value of the peak in the cluster in case the SIM peak is "shared". If the SIM peak is not -"shared" then the "raw" intensity values of the SIM peak are recorded in the SIM file. - -`Click here for more details on the SIM output file`_ - -.. _Click here for more details on the SIM output file: javascript:window.open('$PATH_TO_IMAGES/sample_SIM.png','popUpWindow','height=700,width=800,left=10,top=10,resizable=yes,scrollbars=yes,toolbar=yes,menubar=no,location=no,directories=no,status=yes') - - - - </help> -</tool>