Mercurial > repos > galaxyp > pyteomics_mztab2tsv
changeset 0:84e4b5d4b7ad draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pyteomics commit 49b21b01937067ffc7cf088e615d68177644640b"
author | galaxyp |
---|---|
date | Fri, 15 Jan 2021 15:58:54 +0000 |
parents | |
children | a475c1906e0b |
files | mztab2tsv.xml mztab_reader.py test-data/1.mztab test-data/2.mztab |
diffstat | 4 files changed, 357 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mztab2tsv.xml Fri Jan 15 15:58:54 2021 +0000 @@ -0,0 +1,98 @@ +<tool id="mztab2tsv" name="pyteomics" version="@TOOL_VERSION@" profile="20.01" license="MIT"> + <description>convert mztab to tabular</description> + <macros> + <token name="@TOOL_VERSION@">4.4.1</token> + <xml name="output" token_type="" token_label=""> + <data name="out_@TYPE@" format="tabular" from_work_dir="@TYPE@.tsv" label="${tool.name} on ${on_string}: @LABEL@"> + <filter>"@TYPE@" in out_select</filter> + </data> + </xml> + </macros> + <xrefs> + <xref type="bio.tools">pyteomics</xref> + </xrefs> + <edam_topics> + <edam_topic>topic_0121</edam_topic><!-- proteomics --> + <edam_topic>topic_3520</edam_topic><!-- proteomics experiment--> + </edam_topics> + <edam_operations> + <edam_operation>operation_3434</edam_operation><!-- Convert a data set from one form to another --> + </edam_operations> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">pyteomics</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + ## make sure that selected outputs exist even if the tool does not generate them + #for o in $out_select + touch '$o'.tsv && + #end for + '$__tool_directory__/mztab_reader.py' --path_in '$path_in' + ]]></command> + <inputs> + <param argument="--path_in" type="data" format="mztab,mztab2" label="mztab or mztab2 data set" help="" /> + <param name="out_select" type="select" label="Select desired tables" multiple="true" help=""> + <option value="mtd" selected="true">Metadata (v1,v2)</option> + <option value="prt">Protein table (v1)</option> + <option value="pep">Peptide table (v1)</option> + <option value="psm">Peptide spectrum match table (v1)</option> + <option value="sml">Small molecule table (v1,v2)</option> + <option value="smf">Small molecule feature table (v2)</option> + <option value="sme">Small molecule evidence table (v2)</option> + </param> + </inputs> + <outputs> + <expand macro="output" type="mtd" label="Metadata"/> + <expand macro="output" type="prt" label="Proteins"/> + <expand macro="output" type="pep" label="Peptides"/> + <expand macro="output" type="psm" label="Peptide spectrum matches"/> + <expand macro="output" type="sml" label="Small molecules"/> + <expand macro="output" type="smf" label="Small molecule feature"/> + <expand macro="output" type="sme" label="Small molecule evidence"/> + </outputs> + <tests> + <test expect_num_outputs="5"> + <param name="path_in" ftype="mztab" value="1.mztab"/> + <param name="out_select" value="mtd,prt,pep,psm,sml"/> + <output name="out_mtd" ftype="tabular"> + <assert_contents><has_text text="mzTab-version"/></assert_contents> + </output> + <output name="out_prt" ftype="tabular"> + <assert_contents><has_text text="accession"/></assert_contents> + </output> + <output name="out_pep" ftype="tabular"> + <assert_contents><has_text text=""""/></assert_contents> + </output> + <output name="out_psm" ftype="tabular"> + <assert_contents><has_text text="PSM_ID"/></assert_contents> + </output> + <output name="out_sml" ftype="tabular"> + <assert_contents><has_text text=""""/></assert_contents> + </output> + </test> + <test expect_num_outputs="4"> + <param name="path_in" ftype="mztab" value="2.mztab"/> + <param name="out_select" value="mtd,sml,smf,sme"/> + <output name="out_mtd" ftype="tabular"> + <assert_contents><has_text text="mzTab-version"/></assert_contents> + </output> + <output name="out_sml" ftype="tabular"> + <assert_contents><has_text text="SML_ID"/></assert_contents> + </output> + <output name="out_smf" ftype="tabular"> + <assert_contents><has_text text="SMF_ID"/></assert_contents> + </output> + <output name="out_sme" ftype="tabular"> + <assert_contents><has_text text="SME_ID"/></assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ + Convert a mztab or mztab2 file to tabular files using the pyteomics library https://pyteomics.readthedocs.io/en/latest/. + + The specifications of the mztab and mztab2 formats can be found here https://github.com/HUPO-PSI/mzTab/. + ]]></help> + <citations> + <citation type="doi">10.1007/s13361-012-0516-6</citation> + <citation type="doi">10.1021/acs.jproteome.8b00717</citation> + </citations> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mztab_reader.py Fri Jan 15 15:58:54 2021 +0000 @@ -0,0 +1,69 @@ +#!/usr/bin/env python + +import argparse +import os + +import pandas as pd +from pyteomics.mztab import MzTab + + +def read_mztab(input_path, output_path): + """ + Read mztab file + """ + mztab = MzTab(input_path) + if mztab.variant == 'P': + return read_mztab_p(mztab, output_path) + elif mztab.variant == 'M': + return read_mztab_m(mztab, output_path) + + +def read_mztab_p(mztab, output_path): + """ + Processing mztab "P" + """ + mtd = pd.DataFrame.from_dict(mztab.metadata, orient='index') + mtd.to_csv(os.path.join(output_path, "mtd.tsv"), sep="\t") + prt = mztab.protein_table + prt.to_csv(os.path.join(output_path, "prt.tsv"), sep="\t") + pep = mztab.peptide_table + pep.to_csv(os.path.join(output_path, "pep.tsv"), sep="\t") + psm = mztab.spectrum_match_table + psm.to_csv(os.path.join(output_path, "psm.tsv"), sep="\t") + sml = mztab.small_molecule_table + sml.to_csv(os.path.join(output_path, "sml.tsv"), sep="\t") + + +def read_mztab_m(mztab, output_path): + """ + Processing mztab "M" + """ + mtd = pd.DataFrame.from_dict(mztab.metadata, orient='index') + mtd.to_csv(os.path.join(output_path, "mtd.tsv"), sep="\t") + sml = mztab.small_molecule_table + sml.to_csv(os.path.join(output_path, "sml.tsv"), sep="\t") + smf = mztab.small_molecule_feature_table + smf.to_csv(os.path.join(output_path, "smf.tsv"), sep="\t") + sme = mztab.small_molecule_evidence_table + sme.to_csv(os.path.join(output_path, "sme.tsv"), sep="\t") + + +if __name__ == "__main__": + # Create the parser + my_parser = argparse.ArgumentParser(description='List of paths') + # Add the arguments + my_parser.add_argument('--path_in', + metavar='path', + type=str, + required=True, + help='the path of input .mztab file') + my_parser.add_argument('--path_out', + metavar='path', + type=str, + default=os.getcwd(), + help='the path of folder for output .tsv file') + + # Execute parse_args() + args = my_parser.parse_args() + + read_mztab(args.path_in, args.path_out)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/1.mztab Fri Jan 15 15:58:54 2021 +0000 @@ -0,0 +1,104 @@ +COM This line serves as a size and separator hint for spreadsheet applications. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +COM Report of a minimal "Complete Quantification report" label free experiment, quantification on 2 study variables (control/treatment), 3+3 assays (replicates) reported,identifications reported. +MTD mzTab-version 1.0.0 +MTD mzTab-mode Complete +MTD mzTab-type Quantification +MTD description mzTab example file for reporting a summary report of quantification data quantified on the protein level +MTD protein_search_engine_score[1] [MS,MS:1001171,Mascot:score,] +MTD psm_search_engine_score[1] [MS,MS:1001171,Mascot:score,] +MTD ms_run[1]-location file://C:/path/to/my/file1.mzML +MTD ms_run[2]-location file://C:/path/to/my/file2.mzML +MTD ms_run[3]-location file://C:/path/to/my/file3.mzML +MTD ms_run[4]-location file://C:/path/to/my/file4.mzML +MTD ms_run[5]-location file://C:/path/to/my/file5.mzML +MTD ms_run[6]-location file://C:/path/to/my/file6.mzML +MTD protein-quantification_unit [PRIDE, PRIDE:0000393, Relative quantification unit,] +MTD software[1] [MS, MS:1000752, TOPP software,] +MTD fixed_mod[1] [UNIMOD, UNIMOD:4, Carbamidomethyl, ] +MTD variable_mod[1] [UNIMOD, UNIMOD:35, Oxidation, ] +MTD quantification_method [MS, MS:1002038, unlabeled sample, ] +MTD assay[1]-quantification_reagent [MS, MS:1002038, unlabeled sample, ] +MTD assay[2]-quantification_reagent [MS, MS:1002038, unlabeled sample, ] +MTD assay[3]-quantification_reagent [MS, MS:1002038, unlabeled sample, ] +MTD assay[4]-quantification_reagent [MS, MS:1002038, unlabeled sample, ] +MTD assay[5]-quantification_reagent [MS, MS:1002038, unlabeled sample, ] +MTD assay[6]-quantification_reagent [MS, MS:1002038, unlabeled sample, ] +MTD assay[1]-ms_run_ref ms_run[1] +MTD assay[2]-ms_run_ref ms_run[2] +MTD assay[3]-ms_run_ref ms_run[3] +MTD assay[4]-ms_run_ref ms_run[4] +MTD assay[5]-ms_run_ref ms_run[5] +MTD assay[6]-ms_run_ref ms_run[6] +MTD study_variable[1]-assay_refs assay[1], assay[2], assay[3] +MTD study_variable[2]-assay_refs assay[4], assay[5], assay[6] +MTD study_variable[1]-description heat shock response of control +MTD study_variable[2]-description heat shock response of treatment + +PRH accession description taxid species database database_version search_engine best_search_engine_score[1] search_engine_score[1]_ms_run[1] search_engine_score[1]_ms_run[2] search_engine_score[1]_ms_run[3] search_engine_score[1]_ms_run[4] search_engine_score[1]_ms_run[5] search_engine_score[1]_ms_run[6] num_psms_ms_run[1] num_psms_ms_run[2] num_psms_ms_run[3] num_psms_ms_run[4] num_psms_ms_run[5] num_psms_ms_run[6] num_peptides_distinct_ms_run[1] num_peptides_distinct_ms_run[2] num_peptides_distinct_ms_run[3] num_peptides_distinct_ms_run[4] num_peptides_distinct_ms_run[5] num_peptides_distinct_ms_run[6] num_peptides_unique_ms_run[1] num_peptides_unique_ms_run[2] num_peptides_unique_ms_run[3] num_peptides_unique_ms_run[4] num_peptides_unique_ms_run[5] num_peptides_unique_ms_run[6] ambiguity_members modifications protein_coverage protein_abundance_assay[1] protein_abundance_assay[2] protein_abundance_assay[3] protein_abundance_assay[4] protein_abundance_assay[5] protein_abundance_assay[6] protein_abundance_study_variable[1] protein_abundance_stdev_study_variable[1] protein_abundance_std_error_study_variable[1] protein_abundance_study_variable[2] protein_abundance_stdev_study_variable[2] protein_abundance_std_error_study_variable[2] +COM Accession Description Taxonomie ID Species Database Version Search Engine best Mascot score Mascot score (HSPControlRep1) Mascot score (HSPControlRep2) Mascot score (HSPControlRep3) Mascot score (HSPTreatmentRep1) Mascot score (HSPTreatmentRep2) Mascot score (HSPTreatmentRep3) PSMs (HSPControlRep1) PSMs (HSPControlRep2) PSMs (HSPControlRep3) PSMs (HSPTreatmentRep4) PSMs (HSPTreatmentRep5) PSMs (HSPTreatmentRep6) Distinct Peptides (HSPControlRep1) Distinct Peptides (HSPControlRep2) Distinct Peptides (HSPControlRep3) Distinct Peptides (HSPTreatmentRep4) Distinct Peptides (HSPTreatmentRep5) Distinct Peptides (HSPTreatmentRep6) Unique Peptides (HSPControlRep1) Unique Peptides (HSPControlRep2) Unique Peptides (HSPControlRep3) Unique Peptides (HSPTreatmentRep4) Unique Peptides (HSPTreatmentRep5) Unique Peptides (HSPTreatmentRep6) Ambiguity Members Modifications Protein Coverage (fraction) Abundance (HSPTreatmentRep1) Abundance (HSPTreatmentRep2) Abundance (HSPTreatmentRep3) Abundance (HSPTreatmentRep4) Abundance (HSPTreatmentRep5) Abundance (HSPTreatmentRep6) Abundance (HSPControl) Standard Deviation (HSPControl) Standard Error (HSPControl) Abundance (HSPTreatment) Standard Deviation (HSPTreatment) Standard Error (HSPTreatment) +PRT P63017 Heat shock cognate 71 kDa protein 10090 Mus musculus UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 46 46 26 36 -3 -1 null 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 1 0 null 0 0.34 34.3 40.43507695 41.12124635 266.9554147 234.4 271.0324163 38.61877444 3.755870949 2.168453103 257.4626103 20.07656548 11.59121048 +PRT P14602 Heat shock protein beta-1 10090 Mus musculus UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 100 100 100 -9 20 100 4 3 3 1 2 3 2 3 3 1 2 3 2 2 2 1 2 2 1 Q340U4,Q5K0U2,P8L901 0 0.12 98588.4 114212.9033 100070.7061 4709.411242 4345.7 6704.588342 104290.6698 8624.809914 4979.536326 5253.233195 1269.998146 733.2337713 +PRT Q8K0U4 Heat shock 70 kDa protein 12A 10090 Mus musculus UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 120 120 null -2 39 36 -7 1 0 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1 1 null 0 0.14 43.4 86.09123822 54.98032306 459.4934179 375.5 609.3477328 61.49052043 22.0776461 12.74653492 481.4470502 118.4595375 68.39264584 +PRT Q61699 Heat shock protein 105 kDa 10090 Mus musculus UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 36 30 31 36 -2 31 24 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 null 0 0.08 3432.54 3847.349077 3838.448278 11372.30364 9587.5 10303.56594 3706.112452 236.9624883 136.8103564 10421.12319 898.190283 518.5704017 +PRT P07901 Heat shock protein HSP 90-alpha 10090 Mus musculus UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 45 45 6 -2 35 8 3 4 3 4 3 2 4 4 3 4 3 2 4 4 3 4 3 2 4 null 12-UNIMOD:35, 98-UNIMOD:35,727-UNIMOD:35 0.21 3242354.3 3284123.069 3404460.592 633072.591 552426.4 618457.6276 3310312.654 84166.6994 48593.66656 601318.8729 42968.06623 24807.6246 + +PSH sequence PSM_ID accession unique database database_version search_engine search_engine_score[1] modifications spectra_ref retention_time charge exp_mass_to_charge calc_mass_to_charge pre post start end +COM Sequence PSM identifier accession Unqiue Database Database Version Search Engine Mascot score Modifications Spectra Reference Retention Time Charge Experimental m/z Calculated m/z Pre Post Start End +PSM QTQTFTTYSDNQPGVL 1 P63017 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 46 null ms_run[1]:scan=1296 1336.62 3 600.6006697 600.6197 K I 424 439 +PSM AVVNGYSASDTVGAGFAQAK 2 Q8K0U4 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 120 null ms_run[1]:scan=1300 1327.08 2 956.9464833 956.9736 K E 261 281 +PSM ALLRLHQECEKLK 3 Q61699 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 30 9-UNIMOD:4 ms_run[1]:scan=845 885.62 3 527.6406579 527.6362 R K 262 274 +PSM DWYPAHSR 4 P14602 0 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 100 null ms_run[1]:scan=544 571.08 2 516.21 516.2383 R L 21 28 +PSM DWYPAHSR 4 Q340U4 0 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 100 null ms_run[1]:scan=544 571.08 2 516.21 516.2383 K E 143 150 +PSM DWYPAHSR 4 P16627 0 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 100 null ms_run[1]:scan=544 571.08 2 516.21 516.2383 R M 240 247 +PSM MNQSNASPTLDGLFR 5 P14602 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 14 null ms_run[1]:scan=1155 1195.62 3 550.9282794 550.935 - R 1 15 +PSM LWPFQVINEAGKPK 6 P14602 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 45 null ms_run[1]:scan=1064 1104.62 3 542.9688356 542.9716 K V 91 104 +PSM MIKLGLGIDEDDPTVDDTSAAVTEEMPPLEGDDDTSR 7 P07901 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 45 null ms_run[1]:scan=2849 2876.08 2 1974.400793 1974.3984 R M 692 728 +PSM LGLGIDEDDPTVDDTSAAVTEEMPPLEGDDDTSR 8 P07901 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 120 23-UNIMOD:35 ms_run[1]:scan=2584 2611.08 2 1788.281997 1788.2886 K M 695 728 +PSM TLTIVDTGIGMTK 9 P07901 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 76 11-UNIMOD:35 ms_run[1]:scan=1092 1132.62 3 450.5920214 450.583 R A 88 100 +PSM MPEETQTQDQPMEEEEVETFAFQAEIAQLMSLIINTFYSNK 10 P07901 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 87 0-UNIMOD:35 ms_run[1]:scan=3157 3184.08 2 2405.587318 2405.6084 - E 1 41 +PSM QTQTFTTYSDNQPGVL 11 P63017 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 26 null ms_run[2]:scan=1530 1336.62 3 600.6265518 600.6197 K I 424 439 +PSM ALLRLHQECEKLK 12 Q61699 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 36 9-UNIMOD:4 ms_run[2]:scan=1079 885.62 3 527.6362432 527.6362 R K 262 274 +PSM DWYPAHSR 13 P14602 0 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 100 null ms_run[2]:scan=778 571.08 2 516.21 516.2383 R L 21 28 +PSM DWYPAHSR 13 Q340U4 0 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 100 null ms_run[2]:scan=778 571.08 2 516.21 516.2383 K E 143 150 +PSM DWYPAHSR 13 P16627 0 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 100 null ms_run[2]:scan=778 571.08 2 516.21 516.2383 R M 240 247 +PSM MNQSNASPTLDGLFR 14 P14602 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 40 null ms_run[2]:scan=1389 1195.62 3 550.9468571 550.935 - R 1 15 +PSM LWPFQVINEAGKPK 15 P14602 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 16 null ms_run[2]:scan=1298 1104.62 3 542.9666503 542.9716 K V 91 104 +PSM MIKLGLGIDEDDPTVDDTSAAVTEEMPPLEGDDDTSR 16 P07901 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 6 null ms_run[2]:scan=3083 2876.08 2 1974.399035 1974.3984 R M 692 728 +PSM TLTIVDTGIGMTK 17 P07901 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 21 null ms_run[2]:scan=1326 1132.62 3 450.5400013 450.583 R A 88 100 +PSM MPEETQTQDQPMEEEEVETFAFQAEIAQLMSLIINTFYSNK 18 P07901 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] -5 null ms_run[2]:scan=3391 3184.08 2 2405.599817 2405.6084 - E 1 41 +PSM QTQTFTTYSDNQPGVL 19 P63017 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 36 null ms_run[3]:scan=1062 1336.62 3 600.6484541 600.6197 K I 424 439 +PSM AVVNGYSASDTVGAGFAQAK 20 Q8K0U4 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] -2 null ms_run[3]:scan=1066 1327.08 2 956.9766608 956.9736 K E 261 281 +PSM ALLRLHQECEKLK 21 Q61699 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 36 9-UNIMOD:4 ms_run[3]:scan=611 885.62 3 527.6486368 527.6362 R K 262 274 +PSM MNQSNASPTLDGLFR 22 P14602 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] -9 null ms_run[3]:scan=921 1195.62 3 550.9336303 550.935 - R 1 15 +PSM MIKLGLGIDEDDPTVDDTSAAVTEEMPPLEGDDDTSR 23 P07901 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] -2 null ms_run[3]:scan=2615 2876.08 2 1974.392219 1974.3984 R M 692 728 +PSM LGLGIDEDDPTVDDTSAAVTEEMPPLEGDDDTSR 24 P07901 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] -3 23-UNIMOD:35 ms_run[3]:scan=2350 2611.08 2 1788.28771 1788.2886 K M 695 728 +PSM TLTIVDTGIGMTK 25 P07901 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 37 null ms_run[3]:scan=858 1132.62 3 450.5960917 450.583 R A 88 100 +PSM MPEETQTQDQPMEEEEVETFAFQAEIAQLMSLIINTFYSNK 26 P07901 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 6 12-UNIMOD:35 ms_run[3]:scan=2923 3184.08 2 2405.604605 2405.6084 - E 1 41 +PSM QTQTFTTYSDNQPGVL 27 P63017 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] -3 null ms_run[4]:scan=2731 1336.62 3 600.6123009 600.6197 K I 424 439 +PSM AVVNGYSASDTVGAGFAQAK 28 Q8K0U4 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 39 null ms_run[4]:scan=2735 1327.08 2 956.9765302 956.9736 K E 261 281 +PSM ALLRLHQECEKLK 29 Q61699 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] -2 9-UNIMOD:4 ms_run[4]:scan=2280 885.62 3 527.6343404 527.6362 R K 262 274 +PSM MNQSNASPTLDGLFR 30 P14602 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 20 null ms_run[4]:scan=2590 1195.62 3 550.9284574 550.935 - R 1 15 +PSM LWPFQVINEAGKPK 31 P14602 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] -3 null ms_run[4]:scan=2499 1104.62 3 542.9715699 542.9716 K V 91 104 +PSM MIKLGLGIDEDDPTVDDTSAAVTEEMPPLEGDDDTSR 32 P07901 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 35 null ms_run[4]:scan=4284 2876.08 2 1974.40429 1974.3984 R M 692 728 +PSM LGLGIDEDDPTVDDTSAAVTEEMPPLEGDDDTSR 33 P07901 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 0 23-UNIMOD:35 ms_run[4]:scan=4019 2611.08 2 1788.289062 1788.2886 K M 695 728 +PSM MPEETQTQDQPMEEEEVETFAFQAEIAQLMSLIINTFYSNK 34 P07901 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 11 0-UNIMOD:35 ms_run[4]:scan=4592 3184.08 2 2405.57421 2405.6084 - E 1 41 +PSM QTQTFTTYSDNQPGVL 35 P63017 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] -1 null ms_run[5]:scan=2031 1336.62 3 600.5900228 600.6197 K I 424 439 +PSM AVVNGYSASDTVGAGFAQAK 36 Q8K0U4 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 36 null ms_run[5]:scan=2035 1327.08 2 956.9477197 956.9736 K E 261 281 +PSM ALLRLHQECEKLK 37 Q61699 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 31 9-UNIMOD:4 ms_run[5]:scan=1580 885.62 3 527.6254449 527.6362 R K 262 274 +PSM DWYPAHSR 38 P14602 0 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 100 null ms_run[5]:scan=1279 571.08 2 516.21 516.2383 R L 21 28 +PSM DWYPAHSR 38 Q340U4 0 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 100 null ms_run[5]:scan=1279 571.08 2 516.21 516.2383 K E 143 150 +PSM DWYPAHSR 38 P16627 0 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 100 null ms_run[5]:scan=1279 571.08 2 516.21 516.2383 R M 240 247 +PSM MNQSNASPTLDGLFR 39 P14602 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 32 null ms_run[5]:scan=1890 1195.62 3 550.9120992 550.935 - R 1 15 +PSM LWPFQVINEAGKPK 40 P14602 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 21 null ms_run[5]:scan=1799 1104.62 3 542.9599424 542.9716 K V 91 104 +PSM TLTIVDTGIGMTK 41 P07901 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 8 11-UNIMOD:35 ms_run[5]:scan=1827 1132.62 3 450.5534561 450.583 R A 88 100 +PSM MPEETQTQDQPMEEEEVETFAFQAEIAQLMSLIINTFYSNK 42 P07901 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] -5 0-UNIMOD:35 ms_run[5]:scan=3892 3184.08 2 2405.594573 2405.6084 - E 1 41 +PSM AVVNGYSASDTVGAGFAQAK 43 Q8K0U4 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] -7 null ms_run[6]:scan=1331 1327.08 2 956.9880766 956.9736 K E 261 281 +PSM ALLRLHQECEKLK 44 Q61699 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 24 9-UNIMOD:4 ms_run[6]:scan=876 885.62 3 527.64539 527.6362 R K 262 274 +PSM DWYPAHSR 45 P14602 0 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 4 null ms_run[6]:scan=575 571.08 2 516.21 516.2383 R L 21 28 +PSM DWYPAHSR 45 Q340U4 0 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 40 null ms_run[6]:scan=575 571.08 2 516.21 516.2383 K E 143 150 +PSM DWYPAHSR 45 P16627 0 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 9 null ms_run[6]:scan=575 571.08 2 516.21 516.2383 R M 240 247 +PSM MNQSNASPTLDGLFR 46 P14602 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 32 null ms_run[6]:scan=1186 1195.62 3 550.9319012 550.935 - R 1 15 +PSM MIKLGLGIDEDDPTVDDTSAAVTEEMPPLEGDDDTSR 47 P07901 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 3 null ms_run[6]:scan=2880 2876.08 2 1974.377816 1974.3984 R M 692 728 +PSM LGLGIDEDDPTVDDTSAAVTEEMPPLEGDDDTSR 48 P07901 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 29 23-UNIMOD:35 ms_run[6]:scan=2615 2611.08 2 1788.294771 1788.2886 K M 695 728 +PSM TLTIVDTGIGMTK 49 P07901 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 39 11-UNIMOD:35 ms_run[6]:scan=1123 1132.62 3 450.6038036 450.583 R A 88 100 +PSM MPEETQTQDQPMEEEEVETFAFQAEIAQLMSLIINTFYSNK 50 P07901 1 UniProtKB 2013_08 [MS,MS:1001207,Mascot,] 33 0-UNIMOD:35 ms_run[6]:scan=3188 3184.08 2 2405.605739 2405.6084 - E 1 41 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/2.mztab Fri Jan 15 15:58:54 2021 +0000 @@ -0,0 +1,86 @@ +COM Meta data section +MTD mzTab-version 2.0.0-M +MTD mzTab-ID ISAS-2018-1234 +MTD description Minimal proposed sample file for identification and quantification of lipids +MTD publication[1] pubmed:29039908 | doi:10.1021/acs.analchem.7b03576 +MTD cv[1]-label MS +MTD cv[1]-full_name PSI-MS controlled vocabulary +MTD cv[1]-version 4.0.18 +MTD cv[1]-uri https://github.com/HUPO-PSI/psi-ms-CV/blob/master/psi-ms.obo +MTD cv[2]-label MSIO +MTD cv[2]-uri https://www.ebi.ac.uk/ols/ontologies/msio +MTD cv[2]-version 1.0.1 +MTD cv[2]-full_name Metabolomics Standards Initiative Ontology (MSIO) +MTD cv[3]-label UO +MTD cv[3]-full_name Units of Measurement Ontology +MTD cv[3]-version 2017-09-25 +MTD cv[3]-uri http://purl.obolibrary.org/obo/uo.owl +MTD quantification_method [MS, MS:1001838, SRM quantitation analysis, ] +MTD sample_processing[1] [MSIO, MSIO:0000148, high performance liquid chromatography, ] +MTD instrument[1]-name [MS, MS:1001911, Q Exactive , ] +MTD instrument[1]-source [MS, MS:1000073, electrospray ionization, ] +MTD instrument[1]-analyzer[1] [MS, MS:1000081, quadrupole, ] +MTD instrument[1]-analyzer[2] [MS, MS:1000484, orbitrap, ] +MTD instrument[1]-detector [MS, MS:1000624, inductive detector, ] +MTD software[1] [MS, MS:1000532, Xcalibur,2.8-280502/2.8.1.2806] +MTD software[1]-setting[1] ScheduledSRMWindow: 2 min +MTD software[1]-setting[2] CycleTime: 2 s +MTD software[2] [MS, MS:1000922, Skyline, 3.5.0.9319] +MTD software[2]-setting[1] MSMSmassrange: (50.0, 1800.0) +MTD sample[1] QEx-1273-prm-sp1 +MTD sample[1]-description Sphingolipids with concentration reported as picomolar per mg of protein, abundances are reported after calibration correction. +MTD ms_run[1]-location file:///C:/data/QEx-1273-prm-sp1.mzML +MTD ms_run[1]-format [MS, MS:1000584, mzML file, ] +MTD ms_run[1]-id_format [MS, MS:1000768, Thermo nativeID format, ] +MTD ms_run[1]-scan_polarity[1] [MS, MS:1000130, positive scan, ] +MTD ms_run[1]-instrument_ref instrument[1] +MTD assay[1] Description of assay 1 +MTD assay[1]-sample_ref sample[1] +MTD assay[1]-ms_run_ref ms_run[1] +MTD study_variable[1] Sphingolipid SRM Quantitation +MTD study_variable[1]-assay_refs assay[1] +MTD study_variable[1]-description sphingolipid srm quantitation +MTD study_variable[1]-average_function [MS, MS:1002883, median, ] +MTD study_variable[1]-variation_function [MS, MS:1002885, standard error, ] +MTD small_molecule-quantification_unit [UO, UO:0000072, picomolal, ] +MTD small_molecule_feature-quantification_unit [UO, UO:0000072, picomolal, ] +MTD small_molecule-identification_reliability [MS, MS:1002896, compound identification confidence level, ] +MTD database[1] [,, Pubchem, ] +MTD database[1]-prefix PUBCHEM-CPD +MTD database[1]-version 02.12.2017 +MTD database[1]-uri https://www.ncbi.nlm.nih.gov/pccompound +MTD database[2] [,, LipidMaps, ] +MTD database[2]-prefix LM +MTD database[2]-version 2017-12 +MTD database[2]-uri http://www.lipidmaps.org/ +MTD database[3] [,, LipidCreator Transitions, ] +MTD database[3]-prefix LCTR +MTD database[3]-version 2018-07 +MTD database[3]-uri https://lifs.isas.de/lipidcreator +COM MTD colunit-small_molecule retention_time=[UO, UO:0000010, second, ] +MTD colunit-small_molecule_evidence opt_global_mass_error=[UO, UO:0000169, parts per million, ] +MTD id_confidence_measure[1] [MS, MS:1002890, fragmentation score, ] +MTD external_study_uri[1] file:///C:/data/prm.sky.zip + +COM "MzTab 2.0.0-M ""proposed"" specification" +COM Summary rows. +COM Evidences (e.g. multiple modifications, adducts incl. charge variants are summarized). +COM For most use cases this summary lines may be sufficient. +COM Negative and positive scan polarities are currently not explicitly included, this is still under debate in the mzTAB community. +SMH SML_ID SMF_ID_REFS chemical_name database_identifier chemical_formula smiles inchi uri theoretical_neutral_mass adduct_ions reliability best_id_confidence_measure best_id_confidence_value abundance_assay[1] abundance_study_variable[1] abundance_variation_study_variable[1] opt_global_lipid_category opt_global_lipid_species opt_global_lipid_best_id_level +SML 1 1 | 2 | 3 | 4 Cer(d18:1/24:0) LM:LMSP02010012 C42H83NO3 CCCCCCCCCCCCCCCCCCCCCCCC(=O)N[C@@H](CO)[C@H](O)/C=C/CCCCCCCCCCCCC InChI=1S/C42H83NO3/c1-3-5-7-9-11-13-15-17-18-19-20-21-22-23-24-26-28-30-32-34-36-38-42(46)43-40(39-44)41(45)37-35-33-31-29-27-25-16-14-12-10-8-6-4-2/h35,37,40-41,44-45H,3-34,36,38-39H2,1-2H3,(H,43,46)/b37-35+/t40-,41+/m0/s1 http://www.lipidmaps.org/data/LMSDRecord.php?LM_ID=LMSP02010012 649.6373 [M+H]+ 2 [,, qualifier ions exact mass,] 0.958 4.448784E-05 4.448784E-05 0 Sphingolipids Cer 42:1 Cer d18:1/24:0 + +COM MS feature rows , used to report m/z and individual abundance information for quantification +SFH SMF_ID SME_ID_REFS SME_ID_REF_ambiguity_code adduct_ion isotopomer exp_mass_to_charge charge retention_time_in_seconds retention_time_in_seconds_start retention_time_in_seconds_end abundance_assay[1] opt_global_quantifiers_SMF_ID_REFS +SMF 1 1 null [M+H]1+ null 650.6432 1 821.2341 756.0000 954.0000 4.448784E-05 3 +SMF 2 2 null null null 252.2677 1 821.2341 756.0000 954.0000 6.673176E-06 null +SMF 3 3 null null null 264.2689 1 821.2341 756.0000 954.0000 1.3346352E-05 null +SMF 4 4 null null null 282.2788 1 821.2341 756.0000 954.0000 9.831813E-06 null + +COM Evidence rows for parent / fragment ions. +COM Primary use case: report single hits from spectral library or accurate mass searches without quantification. -> Qualification +SEH SME_ID evidence_input_id database_identifier chemical_formula smiles inchi chemical_name uri derivatized_form adduct_ion exp_mass_to_charge charge theoretical_mass_to_charge opt_global_mass_error spectra_ref identification_method ms_level id_confidence_measure[1] rank opt_global_qualifiers_evidence_grouping_ID_REFS +SME 1 1 LM:LMSP0501AB02 C42H83NO3 CCCCCCCCCCCCCCCCCCCCCCCC(=O)N[C@@H](CO)[C@H](O)/C=C/CCCCCCCCCCCCC InChI=1S/C42H83NO3/c1-3-5-7-9-11-13-15-17-18-19-20-21-22-23-24-26-28-30-32-34-36-38-42(46)43-40(39-44)41(45)37-35-33-31-29-27-25-16-14-12-10-8-6-4-2/h35,37,40-41,44-45H,3-34,36,38-39H2,1-2H3,(H,43,46)/b37-35+/t40-,41+/m0/s1 LacCer d18:1/12:0 http://www.lipidmaps.org/data/LMSDRecord.php?LM_ID=LMSP02010012 null [M+H]1+ 650.6432 1 650.6446 -2.1517 ms_run[1]:controllerType=0 controllerNumber=1 scan=731 [,, qualifier ions exact mass,] [MS,MS:1000511, ms level, 1] 0.958 1 2 +SME 2 2 LCTR:LCTR0809812 C17H33N null null Cer d18:1/24:0 W' - CHO null null null 252.2677 1 252.2686 -3.5676 ms_run[1]:controllerType=0 controllerNumber=1 scan=732 [,, exact mass, ] [MS,MS:1000511, ms level, 2] 0.9780 1 null +SME 3 2 LCTR:LCTR0871245 C18H33N null null Cer d18:1/24:0 W'' null null null 264.2689 1 264.2686 -1.1352 ms_run[1]:controllerType=0 controllerNumber=1 scan=732 [,, exact mass, ] [MS,MS:1000511, ms level, 2] 0.7500 1 null +SME 4 2 LCTR:LCTR0809711 C18H35NO null null Cer d18:1/24:0 W' null null null 282.2788 1 282.2791 -1.0628 ms_run[1]:controllerType=0 controllerNumber=1 scan=732 [,, exact mass, ] [MS,MS:1000511, ms level, 2] 0.8760 1 null \ No newline at end of file