Mercurial > repos > computational-metabolomics > sirius_csifingerid
diff sirius_csifingerid.py @ 3:4cbfd3d0a4c4 draft
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit e4bc02f97a21da7556d1b76e5338ede3a9031fac"
| author | computational-metabolomics |
|---|---|
| date | Wed, 02 Feb 2022 17:29:46 +0000 |
| parents | 856b3761277d |
| children | 8fb51147d15e |
line wrap: on
line diff
--- a/sirius_csifingerid.py Thu Jul 02 11:01:45 2020 -0400 +++ b/sirius_csifingerid.py Wed Feb 02 17:29:46 2022 +0000 @@ -1,5 +1,3 @@ -from __future__ import absolute_import, print_function - import argparse import csv import glob @@ -11,11 +9,11 @@ import uuid from collections import defaultdict -import six parser = argparse.ArgumentParser() parser.add_argument('--input_pth') -parser.add_argument('--result_pth') +parser.add_argument('--canopus_result_pth') +parser.add_argument('--annotations_result_pth') parser.add_argument('--database') parser.add_argument('--profile') parser.add_argument('--candidates') @@ -28,8 +26,11 @@ parser.add_argument('--meta_select_col', default='all') parser.add_argument('--cores_top_level', default=1) parser.add_argument('--chunks', default=1) -parser.add_argument('--minMSMSpeaks', default=1) +parser.add_argument('--min_MSMS_peaks', default=1) parser.add_argument('--rank_filter', default=0) +parser.add_argument('--confidence_filter', default=0) +parser.add_argument('--backwards_compatible', + default=False, action='store_true') parser.add_argument('--schema', default='msp') parser.add_argument('-a', '--adducts', action='append', nargs=1, required=False, default=[], help='Adducts used') @@ -124,7 +125,7 @@ def parse_meta(meta_regex, meta_info=None): if meta_info is None: meta_info = {} - for k, regexes in six.iteritems(meta_regex): + for k, regexes in meta_regex.items(): for reg in regexes: m = re.search(reg, line, re.IGNORECASE) if m: @@ -203,7 +204,7 @@ adduct = meta_info['precursor_type'] else: if paramd["default_ion"]: - paramd["cli"]["--ion"] = paramd["default_ion"] + paramd["cli"]["--adduct"] = paramd["default_ion"] adduct = paramd["default_ion"] else: paramd["cli"]["--auto-charge"] = '' @@ -217,14 +218,24 @@ paramd['additional_details']['adduct'] = adduct # ============== Create CLI cmd for metfrag =============================== - cmd = "sirius --fingerid" - for k, v in six.iteritems(paramd["cli"]): - cmd += " {} {}".format(str(k), str(v)) + cmd = "sirius --no-citations --ms2 {} --adduct {} --precursor {} -o {} " \ + "formula -c {} --ppm-max {} --profile {} " \ + "structure --database {} canopus".format( + paramd["cli"]["--ms2"], + adduct, + paramd["cli"]["--precursor"], + paramd["cli"]["--output"], + paramd["cli"]["--candidates"], + paramd["cli"]["--ppm-max"], + paramd["cli"]["--profile"], + paramd["cli"]["--database"] + ) + print(cmd) paramds[paramd["SampleName"]] = paramd # =============== Run srius ============================================== # Filter before process with a minimum number of MS/MS peaks - if plinesread >= float(args.minMSMSpeaks): + if plinesread >= float(args.min_MSMS_peaks): if int(args.cores_top_level) == 1: os.system(cmd) @@ -331,57 +342,85 @@ pool.close() pool.join() + ###################################################################### # Concatenate and filter the output ###################################################################### # outputs might have different headers. Need to get a list of all the headers # before we start merging the files outfiles = [os.path.join(wd, f) for f in # glob.glob(os.path.join(wd, "*_metfrag_result.csv"))] -outfiles = glob.glob(os.path.join(wd, '*', '*', 'summary_csi_fingerid.csv')) +def concat_output(filename, result_pth, + rank_filter, confidence_filter, backwards_compatible): + outfiles = glob.glob(os.path.join(wd, '*', '*{}'.format(filename))) -# sort files nicely -outfiles.sort(key=lambda s: int(re.match(r'^.*/(' - r'\d+).*/.*/summary_csi_fingerid.csv', - s).group(1))) -print(outfiles) + # sort files nicely + outfiles.sort(key=lambda s: int(re.match(r'^.*/(' + r'\d+).*{}'.format(filename), + s).group(1))) + print(outfiles) + + if len(outfiles) == 0: + print('No results') + sys.exit() + + headers = [] -if len(outfiles) == 0: - print('No results') - sys.exit() + for fn in outfiles: + with open(fn, 'r') as infile: + reader = csv.reader(infile, delimiter='\t') + if sys.version_info >= (3, 0): + headers.extend(next(reader)) + else: + headers.extend(reader.next()) + break + + headers = list(paramd['additional_details'].keys()) + headers -headers = [] -c = 0 -for fn in outfiles: - with open(fn, 'r') as infile: - reader = csv.reader(infile, delimiter='\t') - if sys.version_info >= (3, 0): - headers.extend(next(reader)) - else: - headers.extend(reader.next()) - break + with open(result_pth, 'a') as merged_outfile: + dwriter = csv.DictWriter(merged_outfile, + fieldnames=headers, delimiter='\t') + dwriter.writeheader() + + for fn in sorted(outfiles): + print(fn) + + with open(fn) as infile: + reader = csv.DictReader(infile, delimiter='\t') -headers = list(paramd['additional_details'].keys()) + headers + ad = paramds[fn.split(os.sep)[-2]]['additional_details'] + + for line in reader: + if 'rank' in line and \ + 0 < int(rank_filter) < int(line['rank']): + # filter out those annotations greater than rank filter + # If rank_filter is zero then skip + continue -with open(args.result_pth, 'a') as merged_outfile: - dwriter = csv.DictWriter(merged_outfile, - fieldnames=headers, delimiter='\t') - dwriter.writeheader() + if 'ConfidenceScore' in line \ + and 0 < int(confidence_filter) < int(line['rank']): + # filter out those annotations greater than rank filter + # If rank_filter is zero then skip + continue + line.update(ad) - for fn in sorted(outfiles): - print(fn) - - with open(fn) as infile: - reader = csv.DictReader(infile, delimiter='\t') + dwriter.writerow(line) - ad = paramds[fn.split(os.sep)[-3]]['additional_details'] + if backwards_compatible: + # Headers required in this format for tools that used + # v4.9.3 of SIRIUS-CSI:FingerID + s1 = "sed 's/InChIkey2D/inchikey2d/g' {r} > {r}".format(r=result_pth) + os.system(s1) + s2 = "sed 's/CSI:FingerIDScore/Score/' {r} > {r}".format(r=result_pth) + os.system(s2) + - for line in reader: - if 0 < int(args.rank_filter) < int(line['rank']): - # filter out those annotations greater than rank filter - # If rank_filter is zero then skip - continue - line.update(ad) - # round score to 5 d.p. - line['score'] = round(float(line['score']), 5) - - dwriter.writerow(line) +concat_output('canopus_summary.tsv', + args.canopus_result_pth, + args.rank_filter, + args.confidence_filter, + args.backwards_compatible) +concat_output('compound_identifications.tsv', + args.annotations_result_pth, + 0, + 0, + False)
