Mercurial > repos > computational-metabolomics > sirius_csifingerid
comparison sirius_csifingerid.py @ 2:856b3761277d draft
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 3e3dee9a853b6133cf089b3c063f53c52b39463d"
| author | computational-metabolomics |
|---|---|
| date | Thu, 02 Jul 2020 11:01:45 -0400 |
| parents | 9e6bf7278257 |
| children | 4cbfd3d0a4c4 |
comparison
equal
deleted
inserted
replaced
| 1:1db83da40c54 | 2:856b3761277d |
|---|---|
| 23 parser.add_argument('--polarity') | 23 parser.add_argument('--polarity') |
| 24 parser.add_argument('--results_name') | 24 parser.add_argument('--results_name') |
| 25 parser.add_argument('--out_dir') | 25 parser.add_argument('--out_dir') |
| 26 parser.add_argument('--tool_directory') | 26 parser.add_argument('--tool_directory') |
| 27 parser.add_argument('--temp_dir') | 27 parser.add_argument('--temp_dir') |
| 28 | |
| 29 parser.add_argument('--meta_select_col', default='all') | 28 parser.add_argument('--meta_select_col', default='all') |
| 30 parser.add_argument('--cores_top_level', default=1) | 29 parser.add_argument('--cores_top_level', default=1) |
| 31 parser.add_argument('--chunks', default=1) | 30 parser.add_argument('--chunks', default=1) |
| 32 parser.add_argument('--minMSMSpeaks', default=1) | 31 parser.add_argument('--minMSMSpeaks', default=1) |
| 32 parser.add_argument('--rank_filter', default=0) | |
| 33 parser.add_argument('--schema', default='msp') | 33 parser.add_argument('--schema', default='msp') |
| 34 parser.add_argument('-a', '--adducts', action='append', nargs=1, | |
| 35 required=False, default=[], help='Adducts used') | |
| 36 | |
| 34 args = parser.parse_args() | 37 args = parser.parse_args() |
| 35 print(args) | 38 print(args) |
| 36 if os.stat(args.input_pth).st_size == 0: | 39 if os.stat(args.input_pth).st_size == 0: |
| 37 print('Input file empty') | 40 print('Input file empty') |
| 38 exit() | 41 exit() |
| 45 os.mkdir(wd) | 48 os.mkdir(wd) |
| 46 else: | 49 else: |
| 47 td = tempfile.mkdtemp() | 50 td = tempfile.mkdtemp() |
| 48 wd = os.path.join(td, str(uuid.uuid4())) | 51 wd = os.path.join(td, str(uuid.uuid4())) |
| 49 os.mkdir(wd) | 52 os.mkdir(wd) |
| 53 | |
| 54 print(args.adducts) | |
| 55 if args.adducts: | |
| 56 adducts_from_cli = [ | |
| 57 a[0].replace('__ob__', '[').replace('__cb__', ']') for a in | |
| 58 args.adducts | |
| 59 ] | |
| 60 else: | |
| 61 adducts_from_cli = [] | |
| 50 | 62 |
| 51 ###################################################################### | 63 ###################################################################### |
| 52 # Setup regular expressions for MSP parsing dictionary | 64 # Setup regular expressions for MSP parsing dictionary |
| 53 ###################################################################### | 65 ###################################################################### |
| 54 regex_msp = {} | 66 regex_msp = {} |
| 60 r'^precursor.*mz(?:=|:)\s*(\d*[.,]?\d*)$'] | 72 r'^precursor.*mz(?:=|:)\s*(\d*[.,]?\d*)$'] |
| 61 regex_msp['precursor_type'] = [r'^precursor.*type(?:=|:)(.*)$', | 73 regex_msp['precursor_type'] = [r'^precursor.*type(?:=|:)(.*)$', |
| 62 r'^adduct(?:=|:)(.*)$', | 74 r'^adduct(?:=|:)(.*)$', |
| 63 r'^ADDUCTIONNAME(?:=|:)(.*)$'] | 75 r'^ADDUCTIONNAME(?:=|:)(.*)$'] |
| 64 regex_msp['num_peaks'] = [r'^Num.*Peaks(?:=|:)\s*(\d*)$'] | 76 regex_msp['num_peaks'] = [r'^Num.*Peaks(?:=|:)\s*(\d*)$'] |
| 77 regex_msp['retention_time'] = [r'^RETENTION.*TIME(?:=|:)\s*(.*)$', | |
| 78 r'^rt(?:=|:)\s*(.*)$', | |
| 79 r'^time(?:=|:)\s*(.*)$'] | |
| 80 # From example winter_pos.mspy from kristian | |
| 81 regex_msp['AlignmentID'] = [r'^AlignmentID(?:=|:)\s*(.*)$'] | |
| 82 | |
| 65 regex_msp['msp'] = [r'^Name(?:=|:)(.*)$'] # Flag for standard MSP format | 83 regex_msp['msp'] = [r'^Name(?:=|:)(.*)$'] # Flag for standard MSP format |
| 66 | 84 |
| 67 regex_massbank = {} | 85 regex_massbank = {} |
| 68 regex_massbank['name'] = [r'^RECORD_TITLE:(.*)$'] | 86 regex_massbank['name'] = [r'^RECORD_TITLE:(.*)$'] |
| 69 regex_massbank['polarity'] = \ | 87 regex_massbank['polarity'] = \ |
| 71 regex_massbank['precursor_mz'] = \ | 89 regex_massbank['precursor_mz'] = \ |
| 72 [r'^MS\$FOCUSED_ION:\s+PRECURSOR_M/Z\s+(\d*[.,]?\d*)$'] | 90 [r'^MS\$FOCUSED_ION:\s+PRECURSOR_M/Z\s+(\d*[.,]?\d*)$'] |
| 73 regex_massbank['precursor_type'] = \ | 91 regex_massbank['precursor_type'] = \ |
| 74 [r'^MS\$FOCUSED_ION:\s+PRECURSOR_TYPE\s+(.*)$'] | 92 [r'^MS\$FOCUSED_ION:\s+PRECURSOR_TYPE\s+(.*)$'] |
| 75 regex_massbank['num_peaks'] = [r'^PK\$NUM_PEAK:\s+(\d*)'] | 93 regex_massbank['num_peaks'] = [r'^PK\$NUM_PEAK:\s+(\d*)'] |
| 94 regex_massbank['retention_time'] = [ | |
| 95 r'^AC\$CHROMATOGRAPHY:\s+RETENTION_TIME\s*(\d*\.?\d+).*'] | |
| 76 regex_massbank['cols'] = [r'^PK\$PEAK:\s+(.*)'] | 96 regex_massbank['cols'] = [r'^PK\$PEAK:\s+(.*)'] |
| 77 regex_massbank['massbank'] = [r'^RECORD_TITLE:(.*)$'] # Flag for massbank | 97 regex_massbank['massbank'] = [r'^RECORD_TITLE:(.*)$'] # Flag for massbank |
| 98 | |
| 78 | 99 |
| 79 if args.schema == 'msp': | 100 if args.schema == 'msp': |
| 80 meta_regex = regex_msp | 101 meta_regex = regex_msp |
| 81 elif args.schema == 'massbank': | 102 elif args.schema == 'massbank': |
| 82 meta_regex = regex_massbank | 103 meta_regex = regex_massbank |
| 139 # the msPurity pipeline) choose between getting additional details to | 160 # the msPurity pipeline) choose between getting additional details to |
| 140 # add as columns as either all meta data from msp, just details from the | 161 # add as columns as either all meta data from msp, just details from the |
| 141 # record name (i.e. when using msPurity and we have the columns | 162 # record name (i.e. when using msPurity and we have the columns |
| 142 # coded into the name) or just the spectra index (spectrac) | 163 # coded into the name) or just the spectra index (spectrac) |
| 143 paramd = init_paramd(args) | 164 paramd = init_paramd(args) |
| 165 meta_info = {k: v for k, v in meta_info.items() if k | |
| 166 not in ['msp', 'massbank', 'cols']} | |
| 144 | 167 |
| 145 if args.meta_select_col == 'name': | 168 if args.meta_select_col == 'name': |
| 146 # have additional column of just the name | 169 # have additional column of just the name |
| 147 paramd['additional_details'] = {'name': meta_info['name']} | 170 paramd['additional_details'] = {'name': meta_info['name']} |
| 148 elif args.meta_select_col == 'name_split': | 171 elif args.meta_select_col == 'name_split': |
| 175 | 198 |
| 176 # =============== Update param based on MSP metadata ====================== | 199 # =============== Update param based on MSP metadata ====================== |
| 177 # Replace param details with details from MSP if required | 200 # Replace param details with details from MSP if required |
| 178 if 'precursor_type' in meta_info and meta_info['precursor_type']: | 201 if 'precursor_type' in meta_info and meta_info['precursor_type']: |
| 179 paramd["cli"]["--ion"] = meta_info['precursor_type'] | 202 paramd["cli"]["--ion"] = meta_info['precursor_type'] |
| 203 adduct = meta_info['precursor_type'] | |
| 180 else: | 204 else: |
| 181 if paramd["default_ion"]: | 205 if paramd["default_ion"]: |
| 182 paramd["cli"]["--ion"] = paramd["default_ion"] | 206 paramd["cli"]["--ion"] = paramd["default_ion"] |
| 207 adduct = paramd["default_ion"] | |
| 183 else: | 208 else: |
| 184 paramd["cli"]["--auto-charge"] = '' | 209 paramd["cli"]["--auto-charge"] = '' |
| 185 | 210 |
| 186 if 'precursor_mz' in meta_info and meta_info['precursor_mz']: | 211 if 'precursor_mz' in meta_info and meta_info['precursor_mz']: |
| 187 paramd["cli"]["--precursor"] = meta_info['precursor_mz'] | 212 paramd["cli"]["--precursor"] = meta_info['precursor_mz'] |
| 213 | |
| 214 if not ('precursor_type' in paramd['additional_details'] or 'adduct' | |
| 215 in paramd['additional_details']): | |
| 216 # If possible always good to have the adduct in output as a column | |
| 217 paramd['additional_details']['adduct'] = adduct | |
| 188 | 218 |
| 189 # ============== Create CLI cmd for metfrag =============================== | 219 # ============== Create CLI cmd for metfrag =============================== |
| 190 cmd = "sirius --fingerid" | 220 cmd = "sirius --fingerid" |
| 191 for k, v in six.iteritems(paramd["cli"]): | 221 for k, v in six.iteritems(paramd["cli"]): |
| 192 cmd += " {} {}".format(str(k), str(v)) | 222 cmd += " {} {}".format(str(k), str(v)) |
| 246 | 276 |
| 247 peaklist.append(save_line) | 277 peaklist.append(save_line) |
| 248 | 278 |
| 249 elif plinesread and plinesread == pnumlines: | 279 elif plinesread and plinesread == pnumlines: |
| 250 # ======= Get sample name and additional details for output ======= | 280 # ======= Get sample name and additional details for output ======= |
| 251 spectrac += 1 | 281 if adducts_from_cli: |
| 252 paramd, cmd = run_sirius(meta_info, peaklist, args, wd, spectrac) | 282 for adduct in adducts_from_cli: |
| 253 | 283 print(adduct) |
| 254 paramds[paramd["SampleName"]] = paramd | 284 spectrac += 1 |
| 255 cmds.append(cmd) | 285 meta_info['precursor_type'] = adduct |
| 286 paramd, cmd = run_sirius(meta_info, peaklist, args, wd, | |
| 287 spectrac) | |
| 288 | |
| 289 paramds[paramd["SampleName"]] = paramd | |
| 290 cmds.append(cmd) | |
| 291 else: | |
| 292 spectrac += 1 | |
| 293 paramd, cmd = run_sirius(meta_info, peaklist, args, wd, | |
| 294 spectrac) | |
| 295 | |
| 296 paramds[paramd["SampleName"]] = paramd | |
| 297 cmds.append(cmd) | |
| 256 | 298 |
| 257 meta_info = {} | 299 meta_info = {} |
| 258 pnumlines = 0 | 300 pnumlines = 0 |
| 259 plinesread = 0 | 301 plinesread = 0 |
| 260 | 302 |
| 261 # end of file. Check if there is a MSP spectra to | 303 # end of file. Check if there is a MSP spectra to |
| 262 # run metfrag on still | 304 # run metfrag on still |
| 263 | 305 |
| 264 if plinesread and plinesread == pnumlines: | 306 if plinesread and plinesread == pnumlines: |
| 265 paramd, cmd = run_sirius(meta_info, peaklist, args, wd, spectrac + 1) | 307 if adducts_from_cli: |
| 266 | 308 for adduct in adducts_from_cli: |
| 267 paramds[paramd["SampleName"]] = paramd | 309 print(adduct) |
| 268 cmds.append(cmd) | 310 spectrac += 1 |
| 311 meta_info['precursor_type'] = adduct | |
| 312 paramd, cmd = run_sirius(meta_info, peaklist, args, wd, | |
| 313 spectrac) | |
| 314 | |
| 315 paramds[paramd["SampleName"]] = paramd | |
| 316 cmds.append(cmd) | |
| 317 else: | |
| 318 spectrac += 1 | |
| 319 paramd, cmd = run_sirius(meta_info, peaklist, args, wd, | |
| 320 spectrac) | |
| 321 | |
| 322 paramds[paramd["SampleName"]] = paramd | |
| 323 cmds.append(cmd) | |
| 269 | 324 |
| 270 # Perform multiprocessing on command line call level | 325 # Perform multiprocessing on command line call level |
| 271 if int(args.cores_top_level) > 1: | 326 if int(args.cores_top_level) > 1: |
| 272 cmds_chunks = [cmds[x:x + int(args.chunks)] | 327 cmds_chunks = [cmds[x:x + int(args.chunks)] |
| 273 for x in list(range(0, len(cmds), int(args.chunks)))] | 328 for x in list(range(0, len(cmds), int(args.chunks)))] |
| 319 reader = csv.DictReader(infile, delimiter='\t') | 374 reader = csv.DictReader(infile, delimiter='\t') |
| 320 | 375 |
| 321 ad = paramds[fn.split(os.sep)[-3]]['additional_details'] | 376 ad = paramds[fn.split(os.sep)[-3]]['additional_details'] |
| 322 | 377 |
| 323 for line in reader: | 378 for line in reader: |
| 379 if 0 < int(args.rank_filter) < int(line['rank']): | |
| 380 # filter out those annotations greater than rank filter | |
| 381 # If rank_filter is zero then skip | |
| 382 continue | |
| 324 line.update(ad) | 383 line.update(ad) |
| 325 # round score to 5 d.p. | 384 # round score to 5 d.p. |
| 326 line['score'] = round(float(line['score']), 5) | 385 line['score'] = round(float(line['score']), 5) |
| 327 | 386 |
| 328 dwriter.writerow(line) | 387 dwriter.writerow(line) |
