# HG changeset patch
# User pieter.lukasse@wur.nl
# Date 1427096442 -3600
# Node ID fe4682eb938c021813b11eed26e2030cb6562404
# Parent  05ff1c55db84776913547e407dc832d16e40ed34
small improvement

diff -r 05ff1c55db84 -r fe4682eb938c GCMS/combine_output.py
--- a/GCMS/combine_output.py	Fri Mar 20 17:11:04 2015 +0100
+++ b/GCMS/combine_output.py	Mon Mar 23 08:40:42 2015 +0100
@@ -5,7 +5,6 @@
 '''
 
 import csv
-import re
 import sys
 import math
 import pprint
@@ -81,13 +80,15 @@
     # The ID in the RankFilter output contains the following 5 fields:
     rf_id = rankfilter['ID'].split('-')
     try:
+        if 'Formula' not in rankfilter:
+            raise Exception("Error: old Rankfilter format detected (the selected Rankfilter data does not contain the column 'Formula'). Solution: rerun Rankfilter again.")
         hit = [rf_id[0], # Centrotype
                rf_id[1], # cent.Factor
                rf_id[2], # scan nr
                rf_id[3], # R.T. (umin)
                rf_id[4], # nr. Peaks
+               rankfilter['R.T.'],
                # Appending other fields
-               rankfilter['R.T.'],
                rankfilter['Name'],
                rankfilter['Formula'],
                rankfilter['Library'].strip(),
diff -r 05ff1c55db84 -r fe4682eb938c GCMS/combine_output.xml
--- a/GCMS/combine_output.xml	Fri Mar 20 17:11:04 2015 +0100
+++ b/GCMS/combine_output.xml	Mon Mar 23 08:40:42 2015 +0100
@@ -15,13 +15,13 @@
     <data format="tabular" label="${tool.name} (Multi) on ${on_string}" name="out_multi" />
   </outputs>
   <help>
-Performs a combination of output files from the 'RankFilter' and 'Lookup RI for CAS' tools into two tab-separated files.
+Performs a combination of given 'RankFilter' and 'Lookup RI for CAS' files into two tab-separated files.
 
-Merges data from both input dictionaries based on the Centrotype field.
+This combination is a merge of the given files based on the Centrotype field.
 In the 'RIQC-RankFilter output' the centrotype is found in the 'ID' field (first part before the "-"). In the 'RIQC-Lookup RI for CAS output'
 the centrotype is found in the 'Centrotype' field. 
 
-The files produced are contain either all hits for a compound on a single line (Single) or on separate lines 
+The files produced contain either all hits for a compound on a single line (Single) or on separate lines 
 (Multi). 
 
 .. class:: infomark
diff -r 05ff1c55db84 -r fe4682eb938c rankfilter_GCMS/rankfilter.py
--- a/rankfilter_GCMS/rankfilter.py	Fri Mar 20 17:11:04 2015 +0100
+++ b/rankfilter_GCMS/rankfilter.py	Mon Mar 23 08:40:42 2015 +0100
@@ -142,6 +142,8 @@
     # Convert 'Name' data to list in order to be indexed
     # library_data['Name']=list(library_data['Name'])
 
+    # tries to match on CAS first. If this is not possible (cas is 'undef' 
+    # or not found) then tries to match on name:
     for hit_cas, hit_name in zip(hit_list['CAS'], hit_list['Name']):
         index = 0
         if hit_cas != 'undef':