Mercurial > repos > pieterlukasse > prims_metabolomics
comparison match_library.py @ 5:b1d339e0147e
files in library reader
author | pieter.lukasse@wur.nl |
---|---|
date | Tue, 21 Jan 2014 15:47:39 +0100 |
parents | 9d5f4f5f764b |
children | eabfda6213ae |
comparison
equal
deleted
inserted
replaced
4:80075a4c6543 | 5:b1d339e0147e |
---|---|
15 ''' | 15 ''' |
16 Returns a Galaxy formatted list of tuples containing all possibilities for the | 16 Returns a Galaxy formatted list of tuples containing all possibilities for the |
17 GC-column types. Used by the library_lookup.xml tool | 17 GC-column types. Used by the library_lookup.xml tool |
18 @param library_file: given library file from which the list of GC-column types is extracted | 18 @param library_file: given library file from which the list of GC-column types is extracted |
19 ''' | 19 ''' |
20 (data, header) = read_library(library_file) | 20 if library_file == "": |
21 | 21 galaxy_output = [("", "", False)] |
22 if 'columntype' not in header: | 22 else: |
23 raise IOError('Missing columns in ', library_file) | 23 (data, header) = read_library(library_file) |
24 | 24 |
25 # Filter data on column type | 25 if 'columntype' not in header: |
26 column_type = header.index("columntype") | 26 raise IOError('Missing columns in ', library_file) |
27 amounts_in_list_dict = count_occurrence([row[column_type] for row in data]) | 27 |
28 galaxy_output = [(str(a) + "(" + str(b) + ")", a, False) for a, b in amounts_in_list_dict.items()] | 28 # Filter data on column type |
29 column_type = header.index("columntype") | |
30 amounts_in_list_dict = count_occurrence([row[column_type] for row in data]) | |
31 galaxy_output = [(str(a) + "(" + str(b) + ")", a, False) for a, b in amounts_in_list_dict.items()] | |
32 | |
29 return(galaxy_output) | 33 return(galaxy_output) |
30 | 34 |
31 | 35 |
32 def filter_column(library_file, column_type_name): | 36 def filter_column(library_file, column_type_name): |
33 ''' | 37 ''' |
34 Filters the Retention Index database on column type | 38 Filters the Retention Index database on column type |
35 @param library_file: file containing the database | 39 @param library_file: file containing the database |
36 @param column_type_name: column type to filter on | 40 @param column_type_name: column type to filter on |
37 ''' | 41 ''' |
38 (data, header) = read_library(library_file) | 42 if library_file == "": |
39 | 43 galaxy_output = [("", "", False)] |
40 if ('columntype' not in header or | 44 else: |
41 'columnphasetype' not in header): | 45 (data, header) = read_library(library_file) |
42 raise IOError('Missing columns in ', library_file) | 46 |
43 | 47 if ('columntype' not in header or |
44 column_type = header.index("columntype") | 48 'columnphasetype' not in header): |
45 statphase = header.index("columnphasetype") | 49 raise IOError('Missing columns in ', library_file) |
46 | 50 |
47 # Filter data on colunn type name | 51 column_type = header.index("columntype") |
48 statphase_list = [line[statphase] for line in data if line[column_type] == column_type_name] | 52 statphase = header.index("columnphasetype") |
49 amounts_in_list_dict = count_occurrence(statphase_list) | 53 |
50 galaxy_output = [(str(a) + "(" + str(b) + ")", a, False)for a, b in amounts_in_list_dict.items()] | 54 # Filter data on colunn type name |
55 statphase_list = [line[statphase] for line in data if line[column_type] == column_type_name] | |
56 amounts_in_list_dict = count_occurrence(statphase_list) | |
57 galaxy_output = [(str(a) + "(" + str(b) + ")", a, False)for a, b in amounts_in_list_dict.items()] | |
58 | |
51 return(sorted(galaxy_output)) | 59 return(sorted(galaxy_output)) |
52 | 60 |
53 | 61 |
54 def filter_column2(library_file, column_type_name, statphase): | 62 def filter_column2(library_file, column_type_name, statphase): |
55 ''' | 63 ''' |
56 Filters the Retention Index database on column type | 64 Filters the Retention Index database on column type |
57 @param library_file: file containing the database | 65 @param library_file: file containing the database |
58 @param column_type_name: column type to filter on | 66 @param column_type_name: column type to filter on |
59 @param statphase: stationary phase of the column to filter on | 67 @param statphase: stationary phase of the column to filter on |
60 ''' | 68 ''' |
61 (data, header) = read_library(library_file) | 69 if library_file == "": |
62 | 70 galaxy_output = [("", "", False)] |
63 if ('columntype' not in header or | 71 else: |
64 'columnphasetype' not in header or | 72 (data, header) = read_library(library_file) |
65 'columnname' not in header): | 73 |
66 raise IOError('Missing columns in ', library_file) | 74 if ('columntype' not in header or |
67 | 75 'columnphasetype' not in header or |
68 column_type_column = header.index("columntype") | 76 'columnname' not in header): |
69 statphase_column = header.index("columnphasetype") | 77 raise IOError('Missing columns in ', library_file) |
70 column_name_column = header.index("columnname") | 78 |
71 | 79 column_type_column = header.index("columntype") |
72 # Filter data on given column type name and stationary phase | 80 statphase_column = header.index("columnphasetype") |
73 statphase_list = [line[column_name_column] for line in data if line[column_type_column] == column_type_name and | 81 column_name_column = header.index("columnname") |
74 line[statphase_column] == statphase] | 82 |
75 amounts_in_list_dict = count_occurrence(statphase_list) | 83 # Filter data on given column type name and stationary phase |
76 galaxy_output = [(str(a) + "(" + str(b) + ")", a, False)for a, b in amounts_in_list_dict.items()] | 84 statphase_list = [line[column_name_column] for line in data if line[column_type_column] == column_type_name and |
85 line[statphase_column] == statphase] | |
86 amounts_in_list_dict = count_occurrence(statphase_list) | |
87 galaxy_output = [(str(a) + "(" + str(b) + ")", a, False)for a, b in amounts_in_list_dict.items()] | |
88 | |
77 return(sorted(galaxy_output)) | 89 return(sorted(galaxy_output)) |
78 | 90 |
79 | 91 |
80 def read_library(filename): | 92 def read_library(filename): |
81 ''' | 93 ''' |
96 fill a Galaxy drop-down combo box. | 108 fill a Galaxy drop-down combo box. |
97 | 109 |
98 ''' | 110 ''' |
99 files = glob.glob(dir_name + "/*.txt") | 111 files = glob.glob(dir_name + "/*.txt") |
100 if len(files) == 0: | 112 if len(files) == 0: |
101 raise Exception("Configuration error: no library files found in <galaxy-home-dir>/" + dir_name) | 113 # Configuration error: no library files found in <galaxy-home-dir>/" + dir_name : |
114 galaxy_output = [("Configuration error: no library files found", "", False)] | |
102 else: | 115 else: |
103 galaxy_output = [(str(get_file_name_no_ext(file_name)), str(os.path.abspath(file_name)), False) for file_name in files] | 116 galaxy_output = [(str(get_file_name_no_ext(file_name)), str(os.path.abspath(file_name)), False) for file_name in files] |
104 return(galaxy_output) | 117 return(galaxy_output) |
105 | 118 |
106 def get_file_name_no_ext(full_name): | 119 def get_file_name_no_ext(full_name): |