Mercurial > repos > pieterlukasse > prims_metabolomics

diff combine_output.py @ 21:19d8fd10248e
* Added interface to METEXP data store, including tool to fire queries in batch mode * Improved quantification output files of MsClust, a.o. sorting mass list based on intensity (last two columns of quantification files) * Added Molecular Mass calculation method
author: pieter.lukasse@wur.nl
date: Wed, 05 Mar 2014 17:20:11 +0100
parents: 9d5f4f5f764b
--- a/combine_output.py	Tue Feb 11 12:29:50 2014 +0100
+++ b/combine_output.py	Wed Mar 05 17:20:11 2014 +0100
@@ -155,12 +155,16 @@
     @param data: dictionary containing merged dataset
     @param out_csv: output csv file
     '''
-    header = ['Centrotype',
+    # Columns we don't repeat:
+    header_part1 = ['Centrotype',
               'cent.Factor',
               'scan nr.',
               'R.T. (umin)',
               'nr. Peaks',
-              'R.T.',
+              'R.T.']
+    # These are the headers/columns we repeat in case of 
+    # combining hits in one line (see alternative_headers method below):
+    header_part2 = [
               'Name',
               'FORMULA',
               'Library',
@@ -190,13 +194,21 @@
     output_multi_handle = csv.writer(outfile_multi_handle, delimiter="\t")
 
     # Write headers
-    output_single_handle.writerow(header)
-    output_multi_handle.writerow(header * nhits)
+    output_single_handle.writerow(header_part1 + header_part2)
+    output_multi_handle.writerow(header_part1 + header_part2 + alternative_headers(header_part2, nhits-1))
     # Combine all hits for each centrotype into one line
     line = []
     for centrotype_idx in xrange(len(data)):
+        i = 0
         for hit in data[centrotype_idx]:
-            line.extend(hit)
+            if i==0:
+                line.extend(hit)
+            else:
+                line.extend(hit[6:])
+            i = i+1
+        # small validation (if error, it is a programming error):
+        if i > nhits:
+            raise Exception('Error: more hits that expected for  centrotype_idx ' + centrotype_idx)
         output_multi_handle.writerow(line)
         line = []
 
@@ -205,6 +217,17 @@
         for hit in data[centrotype_idx]:
             output_single_handle.writerow(hit)
 
+def alternative_headers(header_part2, nr_alternative_hits):
+    ''' 
+    This method will iterate over the header names and add the string 'ALT#_' before each, 
+    where # is the number of the alternative, according to number of alternative hits we want to add
+    to final csv/tsv
+    '''
+    result = []
+    for i in xrange(nr_alternative_hits): 
+        for header_name in header_part2:
+            result.append("ALT" + str(i+1) + "_" + header_name) 
+    return result
 
 def main():
     '''
author	pieter.lukasse@wur.nl
date	Wed, 05 Mar 2014 17:20:11 +0100
parents	9d5f4f5f764b
children