Mercurial > repos > pieterlukasse > prims_metabolomics
diff combine_output.py @ 21:19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode
* Improved quantification output files of MsClust, a.o. sorting
mass list based on intensity (last two columns of quantification
files)
* Added Molecular Mass calculation method
author | pieter.lukasse@wur.nl |
---|---|
date | Wed, 05 Mar 2014 17:20:11 +0100 |
parents | 9d5f4f5f764b |
children |
line wrap: on
line diff
--- a/combine_output.py Tue Feb 11 12:29:50 2014 +0100 +++ b/combine_output.py Wed Mar 05 17:20:11 2014 +0100 @@ -155,12 +155,16 @@ @param data: dictionary containing merged dataset @param out_csv: output csv file ''' - header = ['Centrotype', + # Columns we don't repeat: + header_part1 = ['Centrotype', 'cent.Factor', 'scan nr.', 'R.T. (umin)', 'nr. Peaks', - 'R.T.', + 'R.T.'] + # These are the headers/columns we repeat in case of + # combining hits in one line (see alternative_headers method below): + header_part2 = [ 'Name', 'FORMULA', 'Library', @@ -190,13 +194,21 @@ output_multi_handle = csv.writer(outfile_multi_handle, delimiter="\t") # Write headers - output_single_handle.writerow(header) - output_multi_handle.writerow(header * nhits) + output_single_handle.writerow(header_part1 + header_part2) + output_multi_handle.writerow(header_part1 + header_part2 + alternative_headers(header_part2, nhits-1)) # Combine all hits for each centrotype into one line line = [] for centrotype_idx in xrange(len(data)): + i = 0 for hit in data[centrotype_idx]: - line.extend(hit) + if i==0: + line.extend(hit) + else: + line.extend(hit[6:]) + i = i+1 + # small validation (if error, it is a programming error): + if i > nhits: + raise Exception('Error: more hits that expected for centrotype_idx ' + centrotype_idx) output_multi_handle.writerow(line) line = [] @@ -205,6 +217,17 @@ for hit in data[centrotype_idx]: output_single_handle.writerow(hit) +def alternative_headers(header_part2, nr_alternative_hits): + ''' + This method will iterate over the header names and add the string 'ALT#_' before each, + where # is the number of the alternative, according to number of alternative hits we want to add + to final csv/tsv + ''' + result = [] + for i in xrange(nr_alternative_hits): + for header_name in header_part2: + result.append("ALT" + str(i+1) + "_" + header_name) + return result def main(): '''