Mercurial > repos > pieterlukasse > prims_metabolomics
annotate export_to_metexp_tabular.py @ 43:eb0e25d06060
updated dependency
| author | pieter.lukasse@wur.nl |
|---|---|
| date | Fri, 07 Nov 2014 15:55:06 +0100 |
| parents | 19d8fd10248e |
| children |
| rev | line source |
|---|---|
| 0 | 1 #!/usr/bin/env python |
| 2 # encoding: utf-8 | |
| 3 ''' | |
| 4 Module to combine output from the GCMS Galaxy tools RankFilter, CasLookup and MsClust | |
| 5 into a tabular file that can be uploaded to the MetExp database. | |
| 6 | |
| 7 RankFilter, CasLookup are already combined by combine_output.py so here we will use | |
|
21
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
8 this result. Furthermore here one of the MsClust |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
9 quantification files containing the respective spectra details are to be combined as well. |
| 0 | 10 |
| 11 Extra calculations performed: | |
| 12 - The column MW is also added here and is derived from the column FORMULA found | |
|
21
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
13 in RankFilter, CasLookup combined result. |
| 0 | 14 |
|
21
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
15 So in total here we merge 2 files and calculate one new column. |
| 0 | 16 ''' |
|
21
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
17 from pkg_resources import resource_filename # @UnresolvedImport # pylint: disable=E0611 |
| 0 | 18 import csv |
|
21
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
19 import re |
| 0 | 20 import sys |
| 21 from collections import OrderedDict | |
| 22 | |
| 23 __author__ = "Pieter Lukasse" | |
| 24 __contact__ = "pieter.lukasse@wur.nl" | |
| 25 __copyright__ = "Copyright, 2013, Plant Research International, WUR" | |
| 26 __license__ = "Apache v2" | |
| 27 | |
| 28 def _process_data(in_csv, delim='\t'): | |
| 29 ''' | |
| 30 Generic method to parse a tab-separated file returning a dictionary with named columns | |
| 31 @param in_csv: input filename to be parsed | |
| 32 ''' | |
| 33 data = list(csv.reader(open(in_csv, 'rU'), delimiter=delim)) | |
| 34 header = data.pop(0) | |
| 35 # Create dictionary with column name as key | |
| 36 output = OrderedDict() | |
| 37 for index in xrange(len(header)): | |
| 38 output[header[index]] = [row[index] for row in data] | |
| 39 return output | |
| 40 | |
| 41 ONE_TO_ONE = 'one_to_one' | |
| 42 N_TO_ONE = 'n_to_one' | |
| 43 | |
|
21
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
44 def _merge_data(set1, link_field_set1, set2, link_field_set2, compare_function, merge_function, metadata, relation_type=ONE_TO_ONE): |
| 0 | 45 ''' |
| 46 Merges data from both input dictionaries based on the link fields. This method will | |
| 47 build up a new list containing the merged hits as the items. | |
| 48 @param set1: dictionary holding set1 in the form of N lists (one list per attribute name) | |
| 49 @param set2: dictionary holding set2 in the form of N lists (one list per attribute name) | |
| 50 ''' | |
|
21
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
51 # TODO test for correct input files -> same link_field values should be there |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
52 # (test at least number of unique link_field values): |
| 0 | 53 # |
| 54 # if (len(set1[link_field_set1]) != len(set2[link_field_set2])): | |
| 55 # raise Exception('input files should have the same nr of key values ') | |
| 56 | |
| 57 | |
| 58 merged = [] | |
| 59 processed = {} | |
| 60 for link_field_set1_idx in xrange(len(set1[link_field_set1])): | |
| 61 link_field_set1_value = set1[link_field_set1][link_field_set1_idx] | |
| 62 if not link_field_set1_value in processed : | |
| 63 # keep track of processed items to not repeat them | |
| 64 processed[link_field_set1_value] = link_field_set1_value | |
| 65 | |
| 66 # Get the indices for current link_field_set1_value in both data-structures for proper matching | |
| 67 set1index = [index for index, value in enumerate(set1[link_field_set1]) if value == link_field_set1_value] | |
| 68 set2index = [index for index, value in enumerate(set2[link_field_set2]) if compare_function(value, link_field_set1_value)==True ] | |
|
21
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
69 # Validation : |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
70 if len(set2index) == 0: |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
71 # means that corresponding data could not be found in set2, then throw error |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
72 raise Exception("Datasets not compatible, merge not possible. " + link_field_set1 + "=" + |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
73 link_field_set1_value + " only found in first dataset. ") |
| 0 | 74 |
| 75 merged_hits = [] | |
| 76 # Combine hits | |
| 77 for hit in xrange(len(set1index)): | |
| 78 # Create records of hits to be merged ("keys" are the attribute names, so what the lines below do | |
| 79 # is create a new "dict" item with same "keys"/attributes, with each attribute filled with its | |
|
21
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
80 # corresponding value in the sets; i.e. |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
81 # set1[key] => returns the list/array with size = nrrows, with the values for the attribute |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
82 # represented by "key". |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
83 # set1index[hit] => points to the row nr=hit (hit is a rownr/index) |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
84 # So set1[x][set1index[n]] = set1.attributeX.instanceN |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
85 # |
| 0 | 86 # It just ensures the entry is made available as a plain named array for easy access. |
| 87 rf_record = OrderedDict(zip(set1.keys(), [set1[key][set1index[hit]] for key in set1.keys()])) | |
| 88 if relation_type == ONE_TO_ONE : | |
| 89 cl_record = OrderedDict(zip(set2.keys(), [set2[key][set2index[hit]] for key in set2.keys()])) | |
| 90 else: | |
| 91 # is N to 1: | |
| 92 cl_record = OrderedDict(zip(set2.keys(), [set2[key][set2index[0]] for key in set2.keys()])) | |
| 93 | |
|
21
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
94 merged_hit = merge_function(rf_record, cl_record, metadata) |
| 0 | 95 merged_hits.append(merged_hit) |
| 96 | |
| 97 merged.append(merged_hits) | |
| 98 | |
| 99 return merged, len(set1index) | |
| 100 | |
| 101 | |
| 102 def _compare_records(key1, key2): | |
| 103 ''' | |
| 104 in this case the compare method is really simple as both keys are expected to contain | |
| 105 same value when records are the same | |
| 106 ''' | |
| 107 if key1 == key2: | |
| 108 return True | |
| 109 else: | |
| 110 return False | |
| 111 | |
| 112 | |
| 113 | |
|
21
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
114 def _merge_records(rank_caslookup_combi, msclust_quant_record, metadata): |
| 0 | 115 ''' |
| 116 Combines single records from both the RankFilter+CasLookup combi file and from MsClust file | |
| 117 | |
| 118 @param rank_caslookup_combi: rankfilter and caslookup combined record (see combine_output.py) | |
| 119 @param msclust_quant_record: msclust quantification + spectrum record | |
| 120 ''' | |
| 121 record = [] | |
| 122 for column in rank_caslookup_combi: | |
| 123 record.append(rank_caslookup_combi[column]) | |
| 124 | |
| 125 for column in msclust_quant_record: | |
| 126 record.append(msclust_quant_record[column]) | |
|
21
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
127 |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
128 for column in metadata: |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
129 record.append(metadata[column]) |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
130 |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
131 # add MOLECULAR MASS (MM) |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
132 molecular_mass = get_molecular_mass(rank_caslookup_combi['FORMULA']) |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
133 # limit to two decimals: |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
134 record.append("{0:.2f}".format(molecular_mass)) |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
135 |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
136 # add MOLECULAR WEIGHT (MW) - TODO - calculate this |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
137 record.append('0.0') |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
138 |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
139 # level of identification and Location of reference standard |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
140 record.append('0') |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
141 record.append('') |
| 0 | 142 |
| 143 return record | |
| 144 | |
| 145 | |
|
21
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
146 def get_molecular_mass(formula): |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
147 ''' |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
148 Calculates the molecular mass (MM). |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
149 E.g. MM of H2O = (relative)atomic mass of H x2 + (relative)atomic mass of O |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
150 ''' |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
151 |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
152 # Each element is represented by a capital letter, followed optionally by |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
153 # lower case, with one or more digits as for how many elements: |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
154 element_pattern = re.compile("([A-Z][a-z]?)(\d*)") |
| 0 | 155 |
|
21
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
156 total_mass = 0 |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
157 for (element_name, count) in element_pattern.findall(formula): |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
158 if count == "": |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
159 count = 1 |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
160 else: |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
161 count = int(count) |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
162 element_mass = float(elements_and_masses_map[element_name]) # "found: Python's built-in float type has double precision " (? check if really correct ?) |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
163 total_mass += element_mass * count |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
164 |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
165 return total_mass |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
166 |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
167 |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
168 |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
169 def _save_data(data, headers, out_csv): |
| 0 | 170 ''' |
| 171 Writes tab-separated data to file | |
| 172 @param data: dictionary containing merged dataset | |
| 173 @param out_csv: output csv file | |
| 174 ''' | |
| 175 | |
| 176 # Open output file for writing | |
| 177 outfile_single_handle = open(out_csv, 'wb') | |
| 178 output_single_handle = csv.writer(outfile_single_handle, delimiter="\t") | |
| 179 | |
| 180 # Write headers | |
| 181 output_single_handle.writerow(headers) | |
| 182 | |
|
21
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
183 # Write |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
184 for item_idx in xrange(len(data)): |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
185 for hit in data[item_idx]: |
| 0 | 186 output_single_handle.writerow(hit) |
| 187 | |
| 188 | |
|
21
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
189 def _get_map_for_elements_and_masses(elements_and_masses): |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
190 ''' |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
191 This method will read out the column 'Chemical symbol' and make a map |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
192 of this, storing the column 'Relative atomic mass' as its value |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
193 ''' |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
194 resultMap = {} |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
195 index = 0 |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
196 for entry in elements_and_masses['Chemical symbol']: |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
197 resultMap[entry] = elements_and_masses['Relative atomic mass'][index] |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
198 index += 1 |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
199 |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
200 return resultMap |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
201 |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
202 |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
203 def init_elements_and_masses_map(): |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
204 ''' |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
205 Initializes the lookup map containing the elements and their respective masses |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
206 ''' |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
207 elements_and_masses = _process_data(resource_filename(__name__, "static_resources/elements_and_masses.tab")) |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
208 global elements_and_masses_map |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
209 elements_and_masses_map = _get_map_for_elements_and_masses(elements_and_masses) |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
210 |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
211 |
| 0 | 212 def main(): |
| 213 ''' | |
| 214 Combine Output main function | |
| 215 | |
| 216 RankFilter, CasLookup are already combined by combine_output.py so here we will use | |
| 217 this result. Furthermore here the MsClust spectra file (.MSP) and one of the MsClust | |
| 218 quantification files are to be combined with combine_output.py result as well. | |
| 219 ''' | |
| 220 rankfilter_and_caslookup_combined_file = sys.argv[1] | |
| 221 msclust_quantification_and_spectra_file = sys.argv[2] | |
| 222 output_csv = sys.argv[3] | |
|
21
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
223 # metadata |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
224 metadata = OrderedDict() |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
225 metadata['organism'] = sys.argv[4] |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
226 metadata['tissue'] = sys.argv[5] |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
227 metadata['experiment_name'] = sys.argv[6] |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
228 metadata['user_name'] = sys.argv[7] |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
229 metadata['column_type'] = sys.argv[8] |
| 0 | 230 |
| 231 # Read RankFilter and CasLookup output files | |
| 232 rankfilter_and_caslookup_combined = _process_data(rankfilter_and_caslookup_combined_file) | |
| 233 msclust_quantification_and_spectra = _process_data(msclust_quantification_and_spectra_file, ',') | |
| 234 | |
|
21
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
235 # Read elements and masses to use for the MW/MM calculation : |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
236 init_elements_and_masses_map() |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
237 |
| 0 | 238 merged, nhits = _merge_data(rankfilter_and_caslookup_combined, 'Centrotype', |
|
21
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
239 msclust_quantification_and_spectra, 'centrotype', |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
240 _compare_records, _merge_records, metadata, |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
241 N_TO_ONE) |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
242 headers = rankfilter_and_caslookup_combined.keys() + msclust_quantification_and_spectra.keys() + metadata.keys() + ['MM','MW', 'Level of identification', 'Location of reference standard'] |
|
19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
pieter.lukasse@wur.nl
parents:
0
diff
changeset
|
243 _save_data(merged, headers, output_csv) |
| 0 | 244 |
| 245 | |
| 246 if __name__ == '__main__': | |
| 247 main() |
