Mercurial > repos > pieterlukasse > prims_metabolomics
comparison combine_output.py @ 21:19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
* Improved quantification output files of MsClust, a.o. sorting
mass list based on intensity (last two columns of quantification
files)
* Added Molecular Mass calculation method
author | pieter.lukasse@wur.nl |
---|---|
date | Wed, 05 Mar 2014 17:20:11 +0100 |
parents | 9d5f4f5f764b |
children |
comparison
equal
deleted
inserted
replaced
20:24fb75fedee0 | 21:19d8fd10248e |
---|---|
153 ''' | 153 ''' |
154 Writes tab-separated data to file | 154 Writes tab-separated data to file |
155 @param data: dictionary containing merged dataset | 155 @param data: dictionary containing merged dataset |
156 @param out_csv: output csv file | 156 @param out_csv: output csv file |
157 ''' | 157 ''' |
158 header = ['Centrotype', | 158 # Columns we don't repeat: |
159 header_part1 = ['Centrotype', | |
159 'cent.Factor', | 160 'cent.Factor', |
160 'scan nr.', | 161 'scan nr.', |
161 'R.T. (umin)', | 162 'R.T. (umin)', |
162 'nr. Peaks', | 163 'nr. Peaks', |
163 'R.T.', | 164 'R.T.'] |
165 # These are the headers/columns we repeat in case of | |
166 # combining hits in one line (see alternative_headers method below): | |
167 header_part2 = [ | |
164 'Name', | 168 'Name', |
165 'FORMULA', | 169 'FORMULA', |
166 'Library', | 170 'Library', |
167 'CAS', | 171 'CAS', |
168 'Forward', | 172 'Forward', |
188 outfile_multi_handle = open(out_csv_multi, 'wb') | 192 outfile_multi_handle = open(out_csv_multi, 'wb') |
189 output_single_handle = csv.writer(outfile_single_handle, delimiter="\t") | 193 output_single_handle = csv.writer(outfile_single_handle, delimiter="\t") |
190 output_multi_handle = csv.writer(outfile_multi_handle, delimiter="\t") | 194 output_multi_handle = csv.writer(outfile_multi_handle, delimiter="\t") |
191 | 195 |
192 # Write headers | 196 # Write headers |
193 output_single_handle.writerow(header) | 197 output_single_handle.writerow(header_part1 + header_part2) |
194 output_multi_handle.writerow(header * nhits) | 198 output_multi_handle.writerow(header_part1 + header_part2 + alternative_headers(header_part2, nhits-1)) |
195 # Combine all hits for each centrotype into one line | 199 # Combine all hits for each centrotype into one line |
196 line = [] | 200 line = [] |
197 for centrotype_idx in xrange(len(data)): | 201 for centrotype_idx in xrange(len(data)): |
202 i = 0 | |
198 for hit in data[centrotype_idx]: | 203 for hit in data[centrotype_idx]: |
199 line.extend(hit) | 204 if i==0: |
205 line.extend(hit) | |
206 else: | |
207 line.extend(hit[6:]) | |
208 i = i+1 | |
209 # small validation (if error, it is a programming error): | |
210 if i > nhits: | |
211 raise Exception('Error: more hits that expected for centrotype_idx ' + centrotype_idx) | |
200 output_multi_handle.writerow(line) | 212 output_multi_handle.writerow(line) |
201 line = [] | 213 line = [] |
202 | 214 |
203 # Write one line for each centrotype | 215 # Write one line for each centrotype |
204 for centrotype_idx in xrange(len(data)): | 216 for centrotype_idx in xrange(len(data)): |
205 for hit in data[centrotype_idx]: | 217 for hit in data[centrotype_idx]: |
206 output_single_handle.writerow(hit) | 218 output_single_handle.writerow(hit) |
207 | 219 |
220 def alternative_headers(header_part2, nr_alternative_hits): | |
221 ''' | |
222 This method will iterate over the header names and add the string 'ALT#_' before each, | |
223 where # is the number of the alternative, according to number of alternative hits we want to add | |
224 to final csv/tsv | |
225 ''' | |
226 result = [] | |
227 for i in xrange(nr_alternative_hits): | |
228 for header_name in header_part2: | |
229 result.append("ALT" + str(i+1) + "_" + header_name) | |
230 return result | |
208 | 231 |
209 def main(): | 232 def main(): |
210 ''' | 233 ''' |
211 Combine Output main function | 234 Combine Output main function |
212 It will merge the result files from "RankFilter" and "Lookup RI for CAS numbers" | 235 It will merge the result files from "RankFilter" and "Lookup RI for CAS numbers" |