Mercurial > repos > computational-metabolomics > sirius_csifingerid
annotate sirius_csifingerid.py @ 0:9e6bf7278257 draft
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
| author | computational-metabolomics | 
|---|---|
| date | Wed, 05 Feb 2020 10:41:48 -0500 | 
| parents | |
| children | 856b3761277d | 
| rev | line source | 
|---|---|
| 0 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 1 from __future__ import absolute_import, print_function | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 2 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 3 import argparse | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 4 import csv | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 5 import glob | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 6 import multiprocessing | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 7 import os | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 8 import re | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 9 import sys | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 10 import tempfile | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 11 import uuid | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 12 from collections import defaultdict | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 13 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 14 import six | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 15 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 16 parser = argparse.ArgumentParser() | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 17 parser.add_argument('--input_pth') | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 18 parser.add_argument('--result_pth') | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 19 parser.add_argument('--database') | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 20 parser.add_argument('--profile') | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 21 parser.add_argument('--candidates') | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 22 parser.add_argument('--ppm_max') | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 23 parser.add_argument('--polarity') | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 24 parser.add_argument('--results_name') | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 25 parser.add_argument('--out_dir') | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 26 parser.add_argument('--tool_directory') | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 27 parser.add_argument('--temp_dir') | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 28 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 29 parser.add_argument('--meta_select_col', default='all') | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 30 parser.add_argument('--cores_top_level', default=1) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 31 parser.add_argument('--chunks', default=1) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 32 parser.add_argument('--minMSMSpeaks', default=1) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 33 parser.add_argument('--schema', default='msp') | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 34 args = parser.parse_args() | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 35 print(args) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 36 if os.stat(args.input_pth).st_size == 0: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 37 print('Input file empty') | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 38 exit() | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 39 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 40 if args.temp_dir: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 41 wd = os.path.join(args.temp_dir, 'temp') | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 42 os.mkdir(wd) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 43 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 44 if not os.path.exists(wd): | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 45 os.mkdir(wd) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 46 else: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 47 td = tempfile.mkdtemp() | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 48 wd = os.path.join(td, str(uuid.uuid4())) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 49 os.mkdir(wd) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 50 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 51 ###################################################################### | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 52 # Setup regular expressions for MSP parsing dictionary | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 53 ###################################################################### | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 54 regex_msp = {} | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 55 regex_msp['name'] = [r'^Name(?:=|:)(.*)$'] | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 56 regex_msp['polarity'] = [r'^ion.*mode(?:=|:)(.*)$', | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 57 r'^ionization.*mode(?:=|:)(.*)$', | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 58 r'^polarity(?:=|:)(.*)$'] | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 59 regex_msp['precursor_mz'] = [r'^precursor.*m/z(?:=|:)\s*(\d*[.,]?\d*)$', | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 60 r'^precursor.*mz(?:=|:)\s*(\d*[.,]?\d*)$'] | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 61 regex_msp['precursor_type'] = [r'^precursor.*type(?:=|:)(.*)$', | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 62 r'^adduct(?:=|:)(.*)$', | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 63 r'^ADDUCTIONNAME(?:=|:)(.*)$'] | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 64 regex_msp['num_peaks'] = [r'^Num.*Peaks(?:=|:)\s*(\d*)$'] | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 65 regex_msp['msp'] = [r'^Name(?:=|:)(.*)$'] # Flag for standard MSP format | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 66 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 67 regex_massbank = {} | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 68 regex_massbank['name'] = [r'^RECORD_TITLE:(.*)$'] | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 69 regex_massbank['polarity'] = \ | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 70 [r'^AC\$MASS_SPECTROMETRY:\s+ION_MODE\s+(.*)$'] | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 71 regex_massbank['precursor_mz'] = \ | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 72 [r'^MS\$FOCUSED_ION:\s+PRECURSOR_M/Z\s+(\d*[.,]?\d*)$'] | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 73 regex_massbank['precursor_type'] = \ | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 74 [r'^MS\$FOCUSED_ION:\s+PRECURSOR_TYPE\s+(.*)$'] | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 75 regex_massbank['num_peaks'] = [r'^PK\$NUM_PEAK:\s+(\d*)'] | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 76 regex_massbank['cols'] = [r'^PK\$PEAK:\s+(.*)'] | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 77 regex_massbank['massbank'] = [r'^RECORD_TITLE:(.*)$'] # Flag for massbank | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 78 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 79 if args.schema == 'msp': | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 80 meta_regex = regex_msp | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 81 elif args.schema == 'massbank': | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 82 meta_regex = regex_massbank | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 83 elif args.schema == 'auto': | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 84 # If auto we just check for all the available paramter names | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 85 # and then determine if Massbank or MSP based on | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 86 # the name parameter | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 87 meta_regex = {} | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 88 meta_regex.update(regex_massbank) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 89 meta_regex['name'].extend(regex_msp['name']) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 90 meta_regex['polarity'].extend(regex_msp['polarity']) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 91 meta_regex['precursor_mz'].extend(regex_msp['precursor_mz']) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 92 meta_regex['precursor_type'].extend(regex_msp['precursor_type']) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 93 meta_regex['num_peaks'].extend(regex_msp['num_peaks']) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 94 meta_regex['msp'] = regex_msp['msp'] | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 95 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 96 print(meta_regex) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 97 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 98 # this dictionary will store the meta data results form the MSp file | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 99 meta_info = {} | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 100 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 101 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 102 # function to extract the meta data using the regular expressions | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 103 def parse_meta(meta_regex, meta_info=None): | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 104 if meta_info is None: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 105 meta_info = {} | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 106 for k, regexes in six.iteritems(meta_regex): | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 107 for reg in regexes: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 108 m = re.search(reg, line, re.IGNORECASE) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 109 if m: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 110 meta_info[k] = '-'.join(m.groups()).strip() | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 111 return meta_info | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 112 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 113 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 114 ###################################################################### | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 115 # Setup parameter dictionary | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 116 ###################################################################### | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 117 def init_paramd(args): | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 118 paramd = defaultdict() | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 119 paramd["cli"] = {} | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 120 paramd["cli"]["--database"] = args.database | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 121 paramd["cli"]["--profile"] = args.profile | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 122 paramd["cli"]["--candidates"] = args.candidates | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 123 paramd["cli"]["--ppm-max"] = args.ppm_max | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 124 if args.polarity == 'positive': | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 125 paramd["default_ion"] = "[M+H]+" | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 126 elif args.polarity == 'negative': | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 127 paramd["default_ion"] = "[M-H]-" | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 128 else: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 129 paramd["default_ion"] = '' | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 130 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 131 return paramd | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 132 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 133 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 134 ###################################################################### | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 135 # Function to run sirius when all meta and spectra is obtained | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 136 ###################################################################### | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 137 def run_sirius(meta_info, peaklist, args, wd, spectrac): | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 138 # Get sample details (if possible to extract) e.g. if created as part of | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 139 # the msPurity pipeline) choose between getting additional details to | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 140 # add as columns as either all meta data from msp, just details from the | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 141 # record name (i.e. when using msPurity and we have the columns | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 142 # coded into the name) or just the spectra index (spectrac) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 143 paramd = init_paramd(args) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 144 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 145 if args.meta_select_col == 'name': | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 146 # have additional column of just the name | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 147 paramd['additional_details'] = {'name': meta_info['name']} | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 148 elif args.meta_select_col == 'name_split': | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 149 # have additional columns split by "|" and | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 150 # then on ":" e.g. MZ:100.2 | RT:20 | xcms_grp_id:1 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 151 paramd['additional_details'] = { | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 152 sm.split(":")[0].strip(): sm.split(":")[1].strip() for sm in | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 153 meta_info['name'].split("|")} | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 154 elif args.meta_select_col == 'all': | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 155 # have additional columns based on all | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 156 # the meta information extracted from the MSP | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 157 paramd['additional_details'] = meta_info | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 158 else: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 159 # Just have and index of the spectra in the MSP file | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 160 paramd['additional_details'] = {'spectra_idx': spectrac} | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 161 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 162 paramd["SampleName"] = "{}_sirius_result".format(spectrac) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 163 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 164 paramd["cli"]["--output"] = \ | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 165 os.path.join(wd, "{}_sirius_result".format(spectrac)) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 166 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 167 # =============== Output peaks to txt file ============================== | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 168 paramd["cli"]["--ms2"] = os.path.join(wd, | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 169 "{}_tmpspec.txt".format(spectrac)) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 170 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 171 # write spec file | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 172 with open(paramd["cli"]["--ms2"], 'w') as outfile: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 173 for p in peaklist: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 174 outfile.write(p[0] + "\t" + p[1] + "\n") | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 175 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 176 # =============== Update param based on MSP metadata ====================== | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 177 # Replace param details with details from MSP if required | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 178 if 'precursor_type' in meta_info and meta_info['precursor_type']: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 179 paramd["cli"]["--ion"] = meta_info['precursor_type'] | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 180 else: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 181 if paramd["default_ion"]: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 182 paramd["cli"]["--ion"] = paramd["default_ion"] | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 183 else: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 184 paramd["cli"]["--auto-charge"] = '' | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 185 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 186 if 'precursor_mz' in meta_info and meta_info['precursor_mz']: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 187 paramd["cli"]["--precursor"] = meta_info['precursor_mz'] | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 188 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 189 # ============== Create CLI cmd for metfrag =============================== | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 190 cmd = "sirius --fingerid" | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 191 for k, v in six.iteritems(paramd["cli"]): | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 192 cmd += " {} {}".format(str(k), str(v)) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 193 paramds[paramd["SampleName"]] = paramd | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 194 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 195 # =============== Run srius ============================================== | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 196 # Filter before process with a minimum number of MS/MS peaks | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 197 if plinesread >= float(args.minMSMSpeaks): | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 198 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 199 if int(args.cores_top_level) == 1: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 200 os.system(cmd) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 201 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 202 return paramd, cmd | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 203 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 204 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 205 def work(cmds): | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 206 return [os.system(cmd) for cmd in cmds] | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 207 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 208 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 209 ###################################################################### | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 210 # Parse MSP file and run SIRIUS CLI | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 211 ###################################################################### | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 212 # keep list of commands if performing in CLI in parallel | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 213 cmds = [] | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 214 # keep a dictionary of all params | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 215 paramds = {} | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 216 # keep count of spectra (for uid) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 217 spectrac = 0 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 218 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 219 with open(args.input_pth, "r") as infile: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 220 # number of lines for the peaks | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 221 pnumlines = 0 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 222 # number of lines read for the peaks | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 223 plinesread = 0 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 224 for line in infile: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 225 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 226 line = line.strip() | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 227 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 228 if pnumlines == 0: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 229 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 230 # ============== Extract metadata from MSP ======================== | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 231 meta_info = parse_meta(meta_regex, meta_info) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 232 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 233 if ('massbank' in meta_info and 'cols' in meta_info) or \ | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 234 ('msp' in meta_info and 'num_peaks' in meta_info): | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 235 pnumlines = int(meta_info['num_peaks']) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 236 peaklist = [] | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 237 plinesread = 0 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 238 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 239 elif plinesread < pnumlines: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 240 # =============== Extract peaks from MSP ========================== | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 241 # .split() will split on any empty space (i.e. tab and space) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 242 line = tuple(line.split()) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 243 # Keep only m/z and intensity, not relative intensity | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 244 save_line = tuple(line[0].split() + line[1].split()) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 245 plinesread += 1 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 246 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 247 peaklist.append(save_line) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 248 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 249 elif plinesread and plinesread == pnumlines: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 250 # ======= Get sample name and additional details for output ======= | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 251 spectrac += 1 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 252 paramd, cmd = run_sirius(meta_info, peaklist, args, wd, spectrac) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 253 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 254 paramds[paramd["SampleName"]] = paramd | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 255 cmds.append(cmd) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 256 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 257 meta_info = {} | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 258 pnumlines = 0 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 259 plinesread = 0 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 260 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 261 # end of file. Check if there is a MSP spectra to | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 262 # run metfrag on still | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 263 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 264 if plinesread and plinesread == pnumlines: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 265 paramd, cmd = run_sirius(meta_info, peaklist, args, wd, spectrac + 1) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 266 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 267 paramds[paramd["SampleName"]] = paramd | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 268 cmds.append(cmd) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 269 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 270 # Perform multiprocessing on command line call level | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 271 if int(args.cores_top_level) > 1: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 272 cmds_chunks = [cmds[x:x + int(args.chunks)] | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 273 for x in list(range(0, len(cmds), int(args.chunks)))] | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 274 pool = multiprocessing.Pool(processes=int(args.cores_top_level)) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 275 pool.map(work, cmds_chunks) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 276 pool.close() | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 277 pool.join() | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 278 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 279 ###################################################################### | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 280 # Concatenate and filter the output | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 281 ###################################################################### | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 282 # outputs might have different headers. Need to get a list of all the headers | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 283 # before we start merging the files outfiles = [os.path.join(wd, f) for f in | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 284 # glob.glob(os.path.join(wd, "*_metfrag_result.csv"))] | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 285 outfiles = glob.glob(os.path.join(wd, '*', '*', 'summary_csi_fingerid.csv')) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 286 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 287 # sort files nicely | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 288 outfiles.sort(key=lambda s: int(re.match(r'^.*/(' | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 289 r'\d+).*/.*/summary_csi_fingerid.csv', | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 290 s).group(1))) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 291 print(outfiles) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 292 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 293 if len(outfiles) == 0: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 294 print('No results') | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 295 sys.exit() | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 296 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 297 headers = [] | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 298 c = 0 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 299 for fn in outfiles: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 300 with open(fn, 'r') as infile: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 301 reader = csv.reader(infile, delimiter='\t') | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 302 if sys.version_info >= (3, 0): | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 303 headers.extend(next(reader)) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 304 else: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 305 headers.extend(reader.next()) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 306 break | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 307 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 308 headers = list(paramd['additional_details'].keys()) + headers | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 309 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 310 with open(args.result_pth, 'a') as merged_outfile: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 311 dwriter = csv.DictWriter(merged_outfile, | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 312 fieldnames=headers, delimiter='\t') | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 313 dwriter.writeheader() | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 314 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 315 for fn in sorted(outfiles): | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 316 print(fn) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 317 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 318 with open(fn) as infile: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 319 reader = csv.DictReader(infile, delimiter='\t') | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 320 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 321 ad = paramds[fn.split(os.sep)[-3]]['additional_details'] | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 322 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 323 for line in reader: | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 324 line.update(ad) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 325 # round score to 5 d.p. | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 326 line['score'] = round(float(line['score']), 5) | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 327 | 
| 
9e6bf7278257
"planemo upload for repository https://github.com/computational-metabolomics/sirius_csifingerid_galaxy commit 1d1b37a070f895c94069819237199c768da27258"
 computational-metabolomics parents: diff
changeset | 328 dwriter.writerow(line) | 
