Mercurial > repos > pieterlukasse > prims_proteomics
diff quantifere.xml @ 0:d50f079096ee
Push to main toolshed
author | pieter.lukasse@wur.nl |
---|---|
date | Wed, 08 Jan 2014 11:39:16 +0100 |
parents | |
children | 73c7c6589202 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/quantifere.xml Wed Jan 08 11:39:16 2014 +0100 @@ -0,0 +1,206 @@ +<tool name="Quantifere" id="quantifere1" version="1.0.2"> + <description>Protein Inference by Peptide Quantification patterns</description> + <!-- + For remote debugging start you listener on port 8000 and use the following as command interpreter: + java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000 + ////////////////////////// + --> + <command interpreter="java -jar "> + Quantifere.jar + -annotatedQuantificationFilesList $annotatedQuantificationFilesList + -identificationFilesList $identificationFilesList + -statisticalMeasuresConfigFile $statisticalMeasuresConfigFile + -quantificationDataToUse $quantificationDataToUse + -minCorrel $minCorrel + -minProtCoverage $minProtCoverage + -minAboveAverageHits $minAboveAverageHits + -minNrIdsForInferencePeptide $minNrIdsForInferencePeptide + -refineModel $refineModel + -functionalAnnotationCSV $functionalAnnotationCSV + -outputCSV $outputCSV + -outputInferenceLogCSV $outputInferenceLogCSV + -outputSummaryAnnotationCSV $outputSummaryAnnotationCSV + -outReport $htmlReportFile + -outReportPicturesPath $htmlReportFile.files_path + #if $is2D_LC_MS.fractions == True + -namingConventionCodesForFractions $is2D_LC_MS.namingConventionCodesForFractions + #end if + </command> + + <inputs> + + <repeat name="annotatedQuantificationFiles" title="Peptide (filtered) quantification files (APML)" + help="The APML contents as aligned, annotated and scored feature lists, + as produced by MsFilt tool. Select one or more files. For 2D-LC-MS we expect one file per fraction."> + <param name="annotatedQuantificationFile" size="50" type="data" format="apml" label="File (APML format)" /> + </repeat> + + <repeat name="identificationFiles" title="Peptide (filtered) identification files (MS/MS identifications)" + help="Full set of MS/MS peptide identification files, including peptides that could not be quantified. + This set of identifications is ideally filtered on some quality and + statistical measures (e.g. as is done by MsFilt). Tip: to base the inference only on the + selected peptide quantification files, you + can select the same quantification files here as well. Select one or more files."> + <param name="identificationFile" size="50" type="data" format="apml,mzid" label="File (APML or MZIDENTML format)" /> + </repeat> + + <conditional name="is2D_LC_MS"> + <param name="fractions" type="boolean" truevalue="Yes" falsevalue="No" checked="false" + label="Data is from 2D LC-MS" + help="Data acquisition was done in multiple fractions."/> + <when value="Yes"> + <param name="namingConventionCodesForFractions" type="text" size="100" value="" + label="Part of run/file name that identifies the 2D LC-MS fraction" + help="Add the CSV list of codes that occur in the file names + and that stand for a fraction code. E.g. '_F1,_F2,_F3,etc.' In this + way different peptide identifications from the same sample but measured + in different fractions can be merged together. Otherwise each (fraction) file + is seen as a separate sample."/> <!-- could do regular expressions as well but this would be hard for biologists, e.g. _F\d\b --> + </when> + </conditional> + + <param name="statisticalMeasuresConfig" type="text" area="true" size="6x70" label="Statistical measures configuration" + help="Here you may specify the statistical measures that are found in the ms/ms results (e.g. p or e-values). + The format is: SM alias => SM name,type,mode[min/max]. Leaving this configuration out while these are present in the + dataset will have the effect that they will be wrongly used as a regular scoring scheme, having effect on for example + the filter criteria below like 'Minimum number of peptide matches with a score above average' ." +value="smXTD => MS:1001330,XSLASH!Tandem:expect,min +
pvCSVEX => p_value,CSV_EXPORT,min +
smAUTO_LIKELIHOOD => AUTOMOD_LOGLIKELIHOOD,PLGS/Auto-mod,max +
smLIKELIHOOD => LOGLIKELIHOOD,PLGS/Databank-search,max +"/> +<!-- keep value attribute above aligned like this to avoid white spaces in the value --> + <param name="quantificationDataToUse" type="select" + label="Quantification data to use" + help="Quantification data to use for the pattern clustering and inference steps. NB: check if the chosen data is also + present in your file, or choose 'auto' to let Quantifere check which quantification type is present in most peptides."> + <option value="auto" selected="true">auto</option> + <option value="getIntensity">(TODO)raw intensities</option> + <option value="getApexIntensity">(TODO)apex intensities</option> + <option value="getNormalizedIntensity">(TODO)normalized intensities</option> + </param> + <!-- TODO let minCorrel default value vary according to quantification type chosen above --> + <param name="minCorrel" type="float" size="10" value="0.85" label="Minimum correlation in a cluster" help="Features will be grouped by their protein annotation and + sample intensity values correlation. Set here the minimum correlation expected between grouped members. This is used to guide the clustering algorithm."/> + + <!-- simple extra heuristics to remove some "noise" protein hits --> + <param name="minProtCoverage" type="float" size="10" value="5.0" label="Minimum protein coverage (%)" help="This will remove proteins that have a too small + portion of their sequence covered by peptide matches."/> + + <param name="minAboveAverageHits" type="integer" size="10" value="1" label="Minimum number of different peptide matches with a score above average" + help="This will remove proteins that do not have enough reasonable peptides hits."/> + + <param name="minNrIdsForInferencePeptide" type="integer" size="10" value="1" label="Minimum number of peptide identifications for inference peptides" + help="Minimum number of peptide identifications a peptide needs to be used as inference peptide for secondary proteins."/> + + + <param name="functionalAnnotationCSV" type="data" format="csv,txt,tsv" optional="true" + label="(Functional)annotation mapping file (csv or tsv format)" + help="Optional file that maps protein accessions to a network, pathway or other higher level annotations. In this file a header line is expected with these 2 columns (names and lower case is important): accession,annotation"/> + + <param name="refineModel" type="boolean" checked="true" label="Refine matches model" + help="This will let the algorithm search for a reduced set of secondary protein matches that still explains the variation in the peptide quantification patterns"/> + + + <param name="summaryReport" type="boolean" checked="true" label="Generate summary report"/> + + </inputs> + <configfiles> + <configfile name="annotatedQuantificationFilesList">## start comment + ## iterate over the selected files and store their names in the config file + #for $i, $s in enumerate( $annotatedQuantificationFiles ) + ${s.annotatedQuantificationFile} + #end for + ## end comment</configfile> + + <configfile name="identificationFilesList">## start comment + ## iterate over the selected files and store their names in the config file + #for $i, $s in enumerate( $identificationFiles ) + ${s.identificationFile} + ## also print out the datatype in the next line, based on previously configured datatype + #if isinstance( $s.identificationFile.datatype, $__app__.datatypes_registry.get_datatype_by_extension('apml').__class__): + apml + #else: + mzid + #end if + #end for + ## end comment</configfile> + <configfile name="statisticalMeasuresConfigFile">## start comment + ${statisticalMeasuresConfig} + </configfile> + </configfiles> + <outputs> + <data name="outputCSV" format="csv" label="${tool.name} on ${on_string}: Proteins list (CSV)" /> + <data name="outputInferenceLogCSV" format="csv" label="${tool.name} on ${on_string}: Inference log (CSV)"/> + <data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - HTML report"> + <!-- If the expression is false, the file is not created --> + <filter>( summaryReport == True )</filter> + </data> + <data name="outputSummaryAnnotationCSV" format="csv" label="${tool.name} on ${on_string} - Functional annotation summary (CSV)"> + <!-- If the expression is false, the file is not created --> + <filter>( functionalAnnotationCSV != None )</filter> + </data> + </outputs> + <tests> + </tests> + <help> + +.. class:: infomark + +This tool takes Peptide Quantification patterns and uses this to do Protein Inference of both Primary Protein +identifications as well as Secondary Protein identifications. This last class of protein identifications +can not be done by traditional protein inference methods that look only at peptide identifications and +their quality parameters. + + +----- + +**List of definitions** + +Primary Protein identification: protein identification belonging to the minimum set of proteins needed +to account for the observed peptides. + +Secondary Protein identification: extra protein identifications that do not below to the minimum set +of proteins mentioned above. + +raw intensities : is the intensity value resulting from the integration of the feature peak area + +apex intensities: is the intensity value as on the highest point of the feature peak + +normalized intensities : is the intensity normalized by some means + +----- + +**Minimum correlation in a cluster** + +TODO - add doc. + +----- + +**Output details** + +*Proteins list (CSV)* + +This is the list of primary and secondary proteins and their calculated inference score. Proteins +with exactly the same peptide hits are also grouped together and labeled as primary_group and secondary_group +instead of simply primary and secondary. + + +*Inference log (CSV)* + +This CSV table shows all data, both inferred and ruled out proteins. This can be used by the user to +troubleshoot the inference process and understand why certain proteins might have been ruled out. +The CSV is provided in such a format that the data can easily be explored in a Cytoscape network. + +The figure below shows an example of the data being explored in Cytoscape using also the +`Cytoscape chartplugin`_ to visualize the quantification data when selecting the peptide nodes. + +.. image:: $PATH_TO_IMAGES/quantifere_cyto_out.png + + +.. _Cytoscape chartplugin: http://apps.cytoscape.org/apps/chartplugin + + + + </help> +</tool>