Mercurial > repos > pieterlukasse > prims_proteomics
changeset 17:40ec8770780d
* Added support for pepxml (and more specifically for
ProteomeDiscoverer 1.4). Tested with ProteomeDiscoverer 1.4 pepxml.
* Improved HTML report of NapQ tool.
* Fixed issue that was preventing SEDMAT matching from running
in parallel/multi-threaded.
author | pieter.lukasse@wur.nl |
---|---|
date | Mon, 14 Apr 2014 17:11:33 +0200 |
parents | d56c18ed0f77 |
children | ad911e9aaf33 |
files | Csv2Apml.jar MsFilt.jar NapQ.jar PRIMS.jar ProgenesisConv.jar Quantifere.jar Quantiline.jar README.rst SedMat_cli.jar csv2apml.xml msfilt.xml napq.xml quantifere.xml sedmat.xml |
diffstat | 14 files changed, 86 insertions(+), 17 deletions(-) [+] |
line wrap: on
line diff
--- a/README.rst Fri Mar 28 14:43:46 2014 +0100 +++ b/README.rst Mon Apr 14 17:11:33 2014 +0200 @@ -20,6 +20,11 @@ ============== ====================================================================== Date Changes -------------- ---------------------------------------------------------------------- +April 2014 * Added support for pepxml (and more specifically for + ProteomeDiscoverer 1.4). Tested with ProteomeDiscoverer 1.4 pepxml. + * Improved HTML report of NapQ tool. + * Fixed issue that was preventing SEDMAT matching from running + in parallel/multi-threaded. January 2014 * first release via Tool Shed November 2013 * multiple tools used internally at PRI end 2011 * first tool
--- a/csv2apml.xml Fri Mar 28 14:43:46 2014 +0100 +++ b/csv2apml.xml Mon Apr 14 17:11:33 2014 +0200 @@ -22,7 +22,7 @@ label="Column name for precursor m/z" /> <param name="rt" type="text" optional="false" size="30" - label="Column name for precursor rt" /> + label="Column name for precursor rt (nb: rt in minutes)" /> <param name="charge" type="text" optional="false" size="30" label="Column name for precursor charge (z)" />
--- a/msfilt.xml Fri Mar 28 14:43:46 2014 +0100 +++ b/msfilt.xml Mon Apr 14 17:11:33 2014 +0200 @@ -1,4 +1,4 @@ -<tool name="MsFilt" id="msfilt" version="1.0.3"> +<tool name="MsFilt" id="msfilt" version="1.0.4"> <description>Filters annotations based MS/MS peptide identification and annotation quality measures</description> <!-- For remote debugging start you listener on port 8000 and use the following as command interpreter: @@ -27,6 +27,10 @@ -addRawIntensityInfo $addRawIntensityInfo -outReport $htmlReportFile -outReportPicturesPath $htmlReportFile.files_path + #if $containsPepxml.pepxmlInSet == True + -pepxmlDataType $containsPepxml.pepxmlDataType + -pepxmlGeneratedBy $containsPepxml.pepxmlGeneratedBy + #end if </command> <inputs> @@ -37,9 +41,35 @@ SEDMAT or Quantiline tools." /> <repeat name="annotationSourceFiles" title="(Optional) Peptide identification files" help="Full set of MS/MS peptide identification files, including peptides that could not be quantified."> - <param name="identificationsFile" type="data" format="apml,mzidentml,prims.fileset.zip" label="Identifications file (APML or MZIDENTML or MZIDENTML fileSet)" /> + <param name="identificationsFile" type="data" format="apml,pepxml,mzidentml,prims.fileset.zip" label="Identifications file (APML, pepxml, MZIDENTML or MZIDENTML fileSet)" /> </repeat> + <!-- ================== PEPXML specific ================== --> + <conditional name="containsPepxml"> + <param name="pepxmlInSet" type="boolean" truevalue="Yes" falsevalue="No" checked="false" + label="Identifications set contains one or more files in pepxml format" + help="Indicate whether one or more (Optional) Peptide identification files is in pepxml format. Support for pepxml is still considered 'beta'."/> + <when value="Yes"> + <param name="pepxmlDataType" type="select" label=">> Type of data stored in the pepxml" + help="Options marked with (*) are ProteomeDiscoverer specific scenarios"> + <option value="" selected="true">--Please select--</option> + <option value="single_2d" >2D LC-MS runs, one per msms_run_summary</option> + <option value="multi_2d">(*) 2D LC-MS runs, multiple runs (e.g. rx.F1 to rx.FN) merged as a 'single' msms_run_summary</option> + <option value="single_1d">1D LC-MS runs, one per msms_run_summary</option> + </param> + <param name="pepxmlGeneratedBy" type="select" label=">> pepxml generated by" + help="Some tools, like ProteomeDiscoverer 1.4, have specific issues in their pepxml generation logic. Correctly indicating the tool used here will ensure known issues are taken + into consideration when the file is parsed." > + <option value="" selected="true">--Please select--</option> + <option value="proteome_discoverer_v1.4">ProteomeDiscoverer 1.4</option> + <option value="other">Other</option> + </param> + </when> + <when value="No"> + </when> + </conditional> + <!-- ================== END - PEPXML specific ================== --> + <!-- <param name="maxNrRankings" type="integer" size="10" value="0" label="Maximum nr. of items to leave in the final ranking (set=0 for no limit) " /> --> @@ -62,13 +92,16 @@ 
qmBCOS => best correlation with other sibling peptide (correl),1 "/> - <param name="statisticalMeasuresConfig" type="text" area="true" size="6x70" label="Statistical measures configuration" + <param name="statisticalMeasuresConfig" type="text" area="true" size="8x70" label="Statistical measures configuration" help="Here you may specify the statistical measures that are found in the ms/ms results (e.g. p or e-values). The format is: SM alias => SM name,type,mode[min/max]. " value="smXTD => MS:1001330,XSLASH!Tandem:expect,min 
pvCSVEX => p_value,CSV_EXPORT,min 
smAUTO_LIKELIHOOD => AUTOMOD_LOGLIKELIHOOD,PLGS/Auto-mod,max 
smLIKELIHOOD => LOGLIKELIHOOD,PLGS/Databank-search,max +
smPercoProb => Percolator: probability,Percolator probability,max +
smPercoPEP => Percolator: PEP,Percolator PEP,min +
smPercoQval => Percolator: q-Value,Percolator q-Value,max "/> <param name="filterOutUnannotatedAlignments" type="boolean" checked="true" @@ -117,7 +150,9 @@ #for $i, $s in enumerate( $annotationSourceFiles ) ${s.identificationsFile} ## also print out the datatype in the next line, based on previously configured datatype - #if isinstance( $s.identificationsFile.datatype, $__app__.datatypes_registry.get_datatype_by_extension('apml').__class__): + #if isinstance( $s.identificationsFile.datatype, $__app__.datatypes_registry.get_datatype_by_extension('pepxml').__class__): + pepxml + #elif isinstance( $s.identificationsFile.datatype, $__app__.datatypes_registry.get_datatype_by_extension('apml').__class__): apml #else: mzid
--- a/napq.xml Fri Mar 28 14:43:46 2014 +0100 +++ b/napq.xml Mon Apr 14 17:11:33 2014 +0200 @@ -1,4 +1,4 @@ -<tool name="NapQ" id="napq" version="0.0.1"> +<tool name="NapQ" id="napq" version="0.0.3"> <description>'no alignment' (alignment-free) peptide quantification</description> <!-- For remote debugging start you listener on port 8000 and use the following as command interpreter:
--- a/quantifere.xml Fri Mar 28 14:43:46 2014 +0100 +++ b/quantifere.xml Mon Apr 14 17:11:33 2014 +0200 @@ -1,4 +1,4 @@ -<tool name="Quantifere" id="quantifere1" version="1.0.2"> +<tool name="Quantifere" id="quantifere1" version="1.0.3"> <description>Protein Inference by Peptide Quantification patterns</description> <!-- For remote debugging start you listener on port 8000 and use the following as command interpreter: @@ -61,7 +61,7 @@ </when> </conditional> - <param name="statisticalMeasuresConfig" type="text" area="true" size="6x70" label="Statistical measures configuration" + <param name="statisticalMeasuresConfig" type="text" area="true" size="8x70" label="Statistical measures configuration" help="Here you may specify the statistical measures that are found in the ms/ms results (e.g. p or e-values). The format is: SM alias => SM name,type,mode[min/max]. Leaving this configuration out while these are present in the dataset will have the effect that they will be wrongly used as a regular scoring scheme, having effect on for example @@ -70,6 +70,9 @@ 
pvCSVEX => p_value,CSV_EXPORT,min 
smAUTO_LIKELIHOOD => AUTOMOD_LOGLIKELIHOOD,PLGS/Auto-mod,max 
smLIKELIHOOD => LOGLIKELIHOOD,PLGS/Databank-search,max +
smPercoProb => Percolator: probability,Percolator probability,max +
smPercoPEP => Percolator: PEP,Percolator PEP,min +
smPercoQval => Percolator: q-Value,Percolator q-Value,max "/> <!-- keep value attribute above aligned like this to avoid white spaces in the value --> <param name="quantificationDataToUse" type="select"
--- a/sedmat.xml Fri Mar 28 14:43:46 2014 +0100 +++ b/sedmat.xml Mon Apr 14 17:11:33 2014 +0200 @@ -1,4 +1,4 @@ -<tool name="SedMat" id="sedmat1" version="1.0.2"> +<tool name="SedMat" id="sedmat1" version="1.0.3"> <description>Matches MS and MS/MS results</description> <!-- For remote debugging start you listener on port 8000 and use the following as command interpreter: @@ -12,10 +12,14 @@ -ppidsFileGrouping $fileType.type -ppidsInputFormat $fileType.inputFormatType.ppidsInputFormat -ppidsFileDescription "$fileType.inputFormatType.ppidsFile.name" - #if $fileType.inputFormatType.ppidsInputFormat == "mzid" + #if $fileType.inputFormatType.ppidsInputFormat == "MZML_MZIDENTML_COMBI" -spectraDataFile $fileType.inputFormatType.spectraDataFile - #end if - -out $outputData + #end if + #if $fileType.inputFormatType.ppidsInputFormat == "pepxml" + -pepxmlDataType $fileType.inputFormatType.pepxmlDataType + -pepxmlGeneratedBy $fileType.inputFormatType.pepxmlGeneratedBy + #end if + -out $outputData -outUnmatchedMS2 $outUnmatchedMS2 -mtol $mtol -rttol $rttol @@ -52,10 +56,11 @@ <when value="single"> <conditional name="inputFormatType"> <param name="ppidsInputFormat" type="select" label="MS/MS input format"> - <option value="mzid" selected="true">mzIdentML on mzML</option> + <option value="MZML_MZIDENTML_COMBI" selected="true">mzIdentML on mzML</option> <option value="apml">APML</option> + <option value="pepxml">pepxml (beta support)</option> </param> - <when value="mzid"> + <when value="MZML_MZIDENTML_COMBI"> <param name="spectraDataFile" type="data" format="mzml" label="MS/MS spectra file (mzml)"/> <param name="ppidsFile" type="data" format="mzid" label="MS/MS peptide identifications file (mzidentml)"/> </when> @@ -65,6 +70,25 @@ <validator type="expression" message="You already selected this file as the MS data file.">value.id == inputMS,{"inputMS":$inputMS},{}</validator>--> </param> </when> + <when value="pepxml"> + <param name="ppidsFile" type="data" format="pepxml" label="MS/MS peptide identifications file (pepxml)"/> + <param name="pepxmlDataType" type="select" label=">> Type of data stored in the pepxml" + help="Options marked with (*) are ProteomeDiscoverer specific scenarios"> + <option value="" selected="true">--Please select--</option> + <option value="single_2d" >2D LC-MS runs, one per msms_run_summary</option> + <option value="multi_2d">(*) 2D LC-MS runs, multiple runs (e.g. rx.F1 to rx.FN) merged as a 'single' msms_run_summary</option> + <option value="single_1d">1D LC-MS runs, one per msms_run_summary</option> + </param> + <param name="pepxmlGeneratedBy" type="select" label=">> pepxml generated by" + help="Some tools, like ProteomeDiscoverer 1.4, have specific issues in their pepxml generation logic. Correctly indicating the tool used here will ensure known issues are taken + into consideration when the file is parsed." > + <option value="" selected="true">--Please select--</option> + <option value="proteome_discoverer_v1.4">ProteomeDiscoverer 1.4</option> + <option value="other">Other</option> + </param> + + </when> + </conditional> </when> <when value="fileSet"> @@ -79,7 +103,7 @@ </conditional> </when> </conditional> - <param name="mtol" type="integer" size="10" value="50" label="m/z tolerance (ppm) " /> + <param name="mtol" type="float" size="10" value="50.0" label="m/z tolerance (ppm) " /> <param name="rttol" type="integer" size="10" value="150" label="Rention time tolerance (seconds) " /> <param name="rtShiftDetectionWindow" type="integer" size="10" value="20" label="Rention time shift detection window (seconds) " help="Size of the window to use for average rt shift calculations"/> @@ -87,7 +111,7 @@ <conditional name="matchOnNamingConvention"> <param name="match" type="boolean" truevalue="Yes" falsevalue="No" checked="false" label="Match using naming convention" help="Use a list of codes that occur in the file names and that link them together."/> <when value="Yes"> - <param name="namingConventionCodesForMatching" type="text" size="100" value="" label="List of codes in naming convention" help="Add the CSV list of codes that occur in the file names and that link them together. E.g. '_F1,_F2,_F3,etc.'"/> + <param name="namingConventionCodesForMatching" type="text" size="100" value="" label=">> List of codes in naming convention" help="Add the CSV list of codes that occur in the file names and that link them together. E.g. '_F1,_F2,_F3,_F4,_F5,_F6,_F7,etc.'"/> </when> <when value="No"> </when> @@ -98,7 +122,9 @@ <option value="2" selected="true">2</option> <option value="3" selected="true">3</option> <option value="4" selected="true">4</option> - <option value="5">5</option> + <option value="5" selected="true">5</option> + <option value="6" >6</option> + <option value="7" >7</option> </param> <param name="summaryReport" type="boolean" checked="true" label="Generate summary report" help="NB: this will increase the processing time"/>