Mercurial > repos > proteore > proteore_filter_keywords_values
comparison filter_kw_val.py @ 5:33ca9ba2495a draft
planemo upload commit 395d6aa47cce1fb7642b7c06133636c43d80f3c7-dirty
author | proteore |
---|---|
date | Tue, 05 Mar 2019 07:37:10 -0500 |
parents | 2080e2a4f209 |
children | b4641c0f8a82 |
comparison
equal
deleted
inserted
replaced
4:2080e2a4f209 | 5:33ca9ba2495a |
---|---|
9 This option can be repeated: --kw "kw1,c1,true" --kw "kw2,c1,false" --kw "kw3,c2,true" | 9 This option can be repeated: --kw "kw1,c1,true" --kw "kw2,c1,false" --kw "kw3,c2,true" |
10 --kwfile A file that contains keywords to be filter, the column where this filter applies and | 10 --kwfile A file that contains keywords to be filter, the column where this filter applies and |
11 boolean value if the keyword should be filtered in exact ["filename,ncol,true/false"] | 11 boolean value if the keyword should be filtered in exact ["filename,ncol,true/false"] |
12 --value The value to be filtered, the column number where this filter applies and the | 12 --value The value to be filtered, the column number where this filter applies and the |
13 operation symbol ["value,ncol,=/>/>=/</<=/!="] | 13 operation symbol ["value,ncol,=/>/>=/</<=/!="] |
14 --values_range range of values to be keep, example : --values_range 5 20 c1 true | 14 --values_range range of values to be keep, example : --values_range 5 20 c1 true |
15 --operation 'keep' or 'discard' lines concerned by filter(s) | |
15 --operator The operator used to filter with several keywords/values : AND or OR | 16 --operator The operator used to filter with several keywords/values : AND or OR |
16 --o --output The output filename | 17 --o --output The output filename |
17 --filtered_file The file contains removed lines | 18 --discarded_lines The file contains removed lines |
18 -s --sort_col Used column to sort the file, ",true" for reverse sorting, ",false" otherwise example : c1,false | 19 -s --sort_col Used column to sort the file, ",true" for reverse sorting, ",false" otherwise example : c1,false |
19 """ | 20 """ |
20 parser = argparse.ArgumentParser() | 21 parser = argparse.ArgumentParser() |
21 parser.add_argument("-i", "--input", help="Input file", required=True) | 22 parser.add_argument("-i", "--input", help="Input file", required=True) |
22 parser.add_argument("--kw", nargs="+", action="append", help="") | 23 parser.add_argument("--kw", nargs="+", action="append", help="") |
23 parser.add_argument("--kw_file", nargs="+", action="append", help="") | 24 parser.add_argument("--kw_file", nargs="+", action="append", help="") |
24 parser.add_argument("--value", nargs="+", action="append", help="") | 25 parser.add_argument("--value", nargs="+", action="append", help="") |
25 parser.add_argument("--values_range", nargs="+", action="append", help="") | 26 parser.add_argument("--values_range", nargs="+", action="append", help="") |
27 parser.add_argument("--operation", default="keep", type=str, choices=['keep','discard'],help='') | |
26 parser.add_argument("--operator", default="OR", type=str, choices=['AND','OR'],help='') | 28 parser.add_argument("--operator", default="OR", type=str, choices=['AND','OR'],help='') |
27 parser.add_argument("-o", "--output", default="output.txt") | 29 parser.add_argument("-o", "--output", default="output.txt") |
28 parser.add_argument("--filtered_file", default="filtered_output.txt") | 30 parser.add_argument("--discarded_lines", default="filtered_output.txt") |
29 parser.add_argument("-s","--sort_col", help="") | 31 parser.add_argument("-s","--sort_col", help="") |
30 | 32 |
31 args = parser.parse_args() | 33 args = parser.parse_args() |
34 | |
32 filters(args) | 35 filters(args) |
33 | 36 |
34 def str_to_bool(v): | 37 def str_to_bool(v): |
35 if v.lower() in ('yes', 'true', 't', 'y', '1'): | 38 if v.lower() in ('yes', 'true', 't', 'y', '1'): |
36 return True | 39 return True |
60 def filters(args): | 63 def filters(args): |
61 filename = args.input.split(",")[0] | 64 filename = args.input.split(",")[0] |
62 header = str_to_bool(args.input.split(",")[1]) | 65 header = str_to_bool(args.input.split(",")[1]) |
63 csv_file = blank_to_NA(read_file(filename)) | 66 csv_file = blank_to_NA(read_file(filename)) |
64 results_dict = {} | 67 results_dict = {} |
68 operator_dict = { "Equal" : "=" , "Higher" : ">" , "Equal-or-higher" : ">=" , "Lower" : "<" , "Equal-or-lower" : "<=" , "Different" : "!=" } | |
65 | 69 |
66 if args.kw: | 70 if args.kw: |
67 keywords = args.kw | 71 keywords = args.kw |
68 for k in keywords: | 72 for k in keywords: |
69 results_dict=filter_keyword(csv_file, header, results_dict, k[0], k[1], k[2]) | 73 results_dict=filter_keyword(csv_file, header, results_dict, k[0], k[1], k[2]) |
77 results_dict=filter_keyword(csv_file, header, results_dict, keywords, kf[3], kf[4]) | 81 results_dict=filter_keyword(csv_file, header, results_dict, keywords, kf[3], kf[4]) |
78 | 82 |
79 if args.value: | 83 if args.value: |
80 for v in args.value: | 84 for v in args.value: |
81 v[0] = v[0].replace(",",".") | 85 v[0] = v[0].replace(",",".") |
86 v[2] = operator_dict[v[2]] | |
82 if is_number("float", v[0]): | 87 if is_number("float", v[0]): |
83 csv_file = comma_number_to_float(csv_file,column_from_txt(v[1]),header) | 88 csv_file = comma_number_to_float(csv_file,column_from_txt(v[1]),header) |
84 results_dict = filter_value(csv_file, header, results_dict, v[0], v[1], v[2]) | 89 results_dict = filter_value(csv_file, header, results_dict, v[0], v[1], v[2]) |
85 else: | 90 else: |
86 raise ValueError("Please enter a number in filter by value") | 91 raise ValueError("Please enter a number in filter by value") |
121 sort_col=args.sort_col.split(",")[0] | 126 sort_col=args.sort_col.split(",")[0] |
122 sort_col=column_from_txt(sort_col) | 127 sort_col=column_from_txt(sort_col) |
123 reverse=str_to_bool(args.sort_col.split(",")[1]) | 128 reverse=str_to_bool(args.sort_col.split(",")[1]) |
124 remaining_lines= sort_by_column(remaining_lines,sort_col,reverse,header) | 129 remaining_lines= sort_by_column(remaining_lines,sort_col,reverse,header) |
125 filtered_lines = sort_by_column(filtered_lines,sort_col,reverse,header) | 130 filtered_lines = sort_by_column(filtered_lines,sort_col,reverse,header) |
131 | |
132 #swap lists of lines (files) if 'keep' option selected | |
133 if args.operation == "keep" : | |
134 swap = remaining_lines, filtered_lines | |
135 remaining_lines = swap[1] | |
136 filtered_lines = swap[0] | |
126 | 137 |
127 # Write results to output | 138 # Write results to output |
128 with open(args.output,"w") as output : | 139 with open(args.output,"w") as output : |
129 writer = csv.writer(output,delimiter="\t") | 140 writer = csv.writer(output,delimiter="\t") |
130 writer.writerows(remaining_lines) | 141 writer.writerows(remaining_lines) |
131 | 142 |
132 # Write filtered lines to filtered_output | 143 # Write filtered lines to filtered_output |
133 with open(args.filtered_file,"w") as filtered_output : | 144 with open(args.discarded_lines,"w") as filtered_output : |
134 writer = csv.writer(filtered_output,delimiter="\t") | 145 writer = csv.writer(filtered_output,delimiter="\t") |
135 writer.writerows(filtered_lines) | 146 writer.writerows(filtered_lines) |
136 | 147 |
137 #function to sort the csv_file by value in a specific column | 148 #function to sort the csv_file by value in a specific column |
138 def sort_by_column(tab,sort_col,reverse,header): | 149 def sort_by_column(tab,sort_col,reverse,header): |