Mercurial > repos > computational-metabolomics > sirius_csifingerid
diff sirius_csifingerid.py @ 2:856b3761277d draft
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
author | computational-metabolomics |
---|---|
date | Thu, 02 Jul 2020 11:01:45 -0400 |
parents | 9e6bf7278257 |
children | 4cbfd3d0a4c4 |
line wrap: on
line diff
--- a/sirius_csifingerid.py Thu Mar 19 07:30:54 2020 -0400 +++ b/sirius_csifingerid.py Thu Jul 02 11:01:45 2020 -0400 @@ -25,12 +25,15 @@ parser.add_argument('--out_dir') parser.add_argument('--tool_directory') parser.add_argument('--temp_dir') - parser.add_argument('--meta_select_col', default='all') parser.add_argument('--cores_top_level', default=1) parser.add_argument('--chunks', default=1) parser.add_argument('--minMSMSpeaks', default=1) +parser.add_argument('--rank_filter', default=0) parser.add_argument('--schema', default='msp') +parser.add_argument('-a', '--adducts', action='append', nargs=1, + required=False, default=[], help='Adducts used') + args = parser.parse_args() print(args) if os.stat(args.input_pth).st_size == 0: @@ -48,6 +51,15 @@ wd = os.path.join(td, str(uuid.uuid4())) os.mkdir(wd) +print(args.adducts) +if args.adducts: + adducts_from_cli = [ + a[0].replace('__ob__', '[').replace('__cb__', ']') for a in + args.adducts + ] +else: + adducts_from_cli = [] + ###################################################################### # Setup regular expressions for MSP parsing dictionary ###################################################################### @@ -62,6 +74,12 @@ r'^adduct(?:=|:)(.*)$', r'^ADDUCTIONNAME(?:=|:)(.*)$'] regex_msp['num_peaks'] = [r'^Num.*Peaks(?:=|:)\s*(\d*)$'] +regex_msp['retention_time'] = [r'^RETENTION.*TIME(?:=|:)\s*(.*)$', + r'^rt(?:=|:)\s*(.*)$', + r'^time(?:=|:)\s*(.*)$'] +# From example winter_pos.mspy from kristian +regex_msp['AlignmentID'] = [r'^AlignmentID(?:=|:)\s*(.*)$'] + regex_msp['msp'] = [r'^Name(?:=|:)(.*)$'] # Flag for standard MSP format regex_massbank = {} @@ -73,9 +91,12 @@ regex_massbank['precursor_type'] = \ [r'^MS\$FOCUSED_ION:\s+PRECURSOR_TYPE\s+(.*)$'] regex_massbank['num_peaks'] = [r'^PK\$NUM_PEAK:\s+(\d*)'] +regex_massbank['retention_time'] = [ + r'^AC\$CHROMATOGRAPHY:\s+RETENTION_TIME\s*(\d*\.?\d+).*'] regex_massbank['cols'] = [r'^PK\$PEAK:\s+(.*)'] regex_massbank['massbank'] = [r'^RECORD_TITLE:(.*)$'] # Flag for massbank + if args.schema == 'msp': meta_regex = regex_msp elif args.schema == 'massbank': @@ -141,6 +162,8 @@ # record name (i.e. when using msPurity and we have the columns # coded into the name) or just the spectra index (spectrac) paramd = init_paramd(args) + meta_info = {k: v for k, v in meta_info.items() if k + not in ['msp', 'massbank', 'cols']} if args.meta_select_col == 'name': # have additional column of just the name @@ -177,15 +200,22 @@ # Replace param details with details from MSP if required if 'precursor_type' in meta_info and meta_info['precursor_type']: paramd["cli"]["--ion"] = meta_info['precursor_type'] + adduct = meta_info['precursor_type'] else: if paramd["default_ion"]: paramd["cli"]["--ion"] = paramd["default_ion"] + adduct = paramd["default_ion"] else: paramd["cli"]["--auto-charge"] = '' if 'precursor_mz' in meta_info and meta_info['precursor_mz']: paramd["cli"]["--precursor"] = meta_info['precursor_mz'] + if not ('precursor_type' in paramd['additional_details'] or 'adduct' + in paramd['additional_details']): + # If possible always good to have the adduct in output as a column + paramd['additional_details']['adduct'] = adduct + # ============== Create CLI cmd for metfrag =============================== cmd = "sirius --fingerid" for k, v in six.iteritems(paramd["cli"]): @@ -248,11 +278,23 @@ elif plinesread and plinesread == pnumlines: # ======= Get sample name and additional details for output ======= - spectrac += 1 - paramd, cmd = run_sirius(meta_info, peaklist, args, wd, spectrac) + if adducts_from_cli: + for adduct in adducts_from_cli: + print(adduct) + spectrac += 1 + meta_info['precursor_type'] = adduct + paramd, cmd = run_sirius(meta_info, peaklist, args, wd, + spectrac) - paramds[paramd["SampleName"]] = paramd - cmds.append(cmd) + paramds[paramd["SampleName"]] = paramd + cmds.append(cmd) + else: + spectrac += 1 + paramd, cmd = run_sirius(meta_info, peaklist, args, wd, + spectrac) + + paramds[paramd["SampleName"]] = paramd + cmds.append(cmd) meta_info = {} pnumlines = 0 @@ -262,10 +304,23 @@ # run metfrag on still if plinesread and plinesread == pnumlines: - paramd, cmd = run_sirius(meta_info, peaklist, args, wd, spectrac + 1) + if adducts_from_cli: + for adduct in adducts_from_cli: + print(adduct) + spectrac += 1 + meta_info['precursor_type'] = adduct + paramd, cmd = run_sirius(meta_info, peaklist, args, wd, + spectrac) - paramds[paramd["SampleName"]] = paramd - cmds.append(cmd) + paramds[paramd["SampleName"]] = paramd + cmds.append(cmd) + else: + spectrac += 1 + paramd, cmd = run_sirius(meta_info, peaklist, args, wd, + spectrac) + + paramds[paramd["SampleName"]] = paramd + cmds.append(cmd) # Perform multiprocessing on command line call level if int(args.cores_top_level) > 1: @@ -321,6 +376,10 @@ ad = paramds[fn.split(os.sep)[-3]]['additional_details'] for line in reader: + if 0 < int(args.rank_filter) < int(line['rank']): + # filter out those annotations greater than rank filter + # If rank_filter is zero then skip + continue line.update(ad) # round score to 5 d.p. line['score'] = round(float(line['score']), 5)