# HG changeset patch # User pieter.lukasse@wur.nl # Date 1398331718 -7200 # Node ID 637830ac8bcd4c185054ae50183979b0a62c3645 # Parent ab7f9ec70ffc71edbea8ec0eb6b7ef2f21299e50 added validation in metexp to tabular tool; added workaround/fix for L and D compound types diff -r ab7f9ec70ffc -r 637830ac8bcd MsClust.jar Binary file MsClust.jar has changed diff -r ab7f9ec70ffc -r 637830ac8bcd export_to_metexp_tabular.xml --- a/export_to_metexp_tabular.xml Fri Apr 04 10:25:19 2014 +0200 +++ b/export_to_metexp_tabular.xml Thu Apr 24 11:28:38 2014 +0200 @@ -1,6 +1,6 @@ + version="0.2.0"> Create tabular file for loading into METabolomics EXPlorer database export_to_metexp_tabular.py $rankfilter_and_caslookup_combi $msclust_quant_file $output_result @@ -15,22 +15,33 @@ + help="Metadata information to accompany the results when stored in MetExp DB." > + + + + help="Metadata information to accompany the results when stored in MetExp DB." > + + + help="Name or code to store the results under. This can help you find the results back in MetExpDB." > + + + help="User name or code to store the results under. This can help you find the results back in MetExpDB." > + + + help="Column type to report with the results. This can help you find the results back in MetExpDB." > + + diff -r ab7f9ec70ffc -r 637830ac8bcd rankfilter_GCMS/pdfread.py --- a/rankfilter_GCMS/pdfread.py Fri Apr 04 10:25:19 2014 +0200 +++ b/rankfilter_GCMS/pdfread.py Thu Apr 24 11:28:38 2014 +0200 @@ -52,8 +52,9 @@ for line in hit_list: line = line.strip().translate(None, '\r') if line != '': - hits = line.replace('\n', ' ').replace('\x0c', '').replace('^L', '').split('Hit') - + hits = line.replace('\n', ' ').replace('\x0c', '').replace('^L', '').split('Hit') #solution? : if we wouldn't replace the \n by ' ' but by some special sign, then reading formula would be simpler! + #strange....code seems fine actually...debug! See test/data/download.pdf + # strange thing is that it looks like the new line does not end up in the text file, eventhough it looks like there is a new line in the pdf...perhaps a bug in the pdf2text command in linux? spec_id = hits.pop(0).split(' ')[1] j = 0 for hh in hits: @@ -69,8 +70,13 @@ name_tmp = ':'.join(cell[0].split(':')[1:]) else: name_tmp = cell[0].split(':')[1] + + # uggly workaround for the cases where there ends up to be no space between the name and the formula: exaustive + # replaces of known cases by the same with a white space: name_tmp = name_tmp.replace('lC', 'l C').replace(']C', '] C').replace('sC', 's C').replace('9C', '9 C').replace('.C', '. C') name_tmp = name_tmp.replace(')C', ') C').replace('eC', 'e C').replace('yC', 'y C').replace('oC', 'o C').replace('-C', '- C').replace('dC', 'd C').replace('rC', 'r C') + name_tmp = name_tmp.replace('-, LC', '-, L C').replace('-, DC', '-, D C') + name.append((' '.join(name_tmp.split(' ')[0:len(name_tmp) - 1])).replace(" ", " ")) if name_tmp: if name_tmp.split(' ')[-1][0] == 'C' or name_tmp.split(' ')[-1][0] == 'F' or name_tmp.split(' ')[-1][0] == 'H': diff -r ab7f9ec70ffc -r 637830ac8bcd rankfilter_GCMS/test/test_pdfread.py --- a/rankfilter_GCMS/test/test_pdfread.py Fri Apr 04 10:25:19 2014 +0200 +++ b/rankfilter_GCMS/test/test_pdfread.py Thu Apr 24 11:28:38 2014 +0200 @@ -24,6 +24,13 @@ '18495-0.142537-21284-2.26544e+07-135', '22.6544', ' 714')) self.failUnless(expected_element in data) self.failUnless(len(hitlist_missed) != 0) + ''' + Check for last (dummy) hit: + Hit 6 : (dummy hit)Sorbopyranose, 1,2,3,4,5-pentakis-O-(trimethylsilyl)-, LC21H52O6Si5;MF: 658; RMF: 658; Prob 15.6%; CAS: 30645-02-4; Lib: mainlib; ID: 37062. + ''' + expected_element = set(['C21H52O6Si5', ' 30645-02-4', ' mainlib', '15.6', ' (dummy hit)Sorbopyranose, 1,2,3,4,5-pentakis-O-(trimethylsilyl)-, L C21H52O6Si5', '7298-1-9580-1.29014e+07-9', ' 658', '12.9014', '37062']) + self.failUnless(expected_element in data) + if __name__ == "__main__": #import sys;sys.argv = ['', 'Test.test_getPDF'] diff -r ab7f9ec70ffc -r 637830ac8bcd test/test_query_mass_repos.py --- a/test/test_query_mass_repos.py Fri Apr 04 10:25:19 2014 +0200 +++ b/test/test_query_mass_repos.py Thu Apr 24 11:28:38 2014 +0200 @@ -31,7 +31,7 @@ input_file = resource_filename(__name__, "data/service_query_tabular.txt") - molecular_mass_col = "MM" + molecular_mass_col = "mass (Da)" dblink_file = resource_filename(__name__, "data/MFSearcher ExactMassDB service.txt") output_result = resource_filename(__name__, outdir + "metexp_query_results_added.txt")