comparison filter_kw_val.py @ 5:33ca9ba2495a draft

planemo upload commit 395d6aa47cce1fb7642b7c06133636c43d80f3c7-dirty
author proteore
date Tue, 05 Mar 2019 07:37:10 -0500
parents 2080e2a4f209
children b4641c0f8a82
comparison
equal deleted inserted replaced
4:2080e2a4f209 5:33ca9ba2495a
9 This option can be repeated: --kw "kw1,c1,true" --kw "kw2,c1,false" --kw "kw3,c2,true" 9 This option can be repeated: --kw "kw1,c1,true" --kw "kw2,c1,false" --kw "kw3,c2,true"
10 --kwfile A file that contains keywords to be filter, the column where this filter applies and 10 --kwfile A file that contains keywords to be filter, the column where this filter applies and
11 boolean value if the keyword should be filtered in exact ["filename,ncol,true/false"] 11 boolean value if the keyword should be filtered in exact ["filename,ncol,true/false"]
12 --value The value to be filtered, the column number where this filter applies and the 12 --value The value to be filtered, the column number where this filter applies and the
13 operation symbol ["value,ncol,=/>/>=/</<=/!="] 13 operation symbol ["value,ncol,=/>/>=/</<=/!="]
14 --values_range range of values to be keep, example : --values_range 5 20 c1 true 14 --values_range range of values to be keep, example : --values_range 5 20 c1 true
15 --operation 'keep' or 'discard' lines concerned by filter(s)
15 --operator The operator used to filter with several keywords/values : AND or OR 16 --operator The operator used to filter with several keywords/values : AND or OR
16 --o --output The output filename 17 --o --output The output filename
17 --filtered_file The file contains removed lines 18 --discarded_lines The file contains removed lines
18 -s --sort_col Used column to sort the file, ",true" for reverse sorting, ",false" otherwise example : c1,false 19 -s --sort_col Used column to sort the file, ",true" for reverse sorting, ",false" otherwise example : c1,false
19 """ 20 """
20 parser = argparse.ArgumentParser() 21 parser = argparse.ArgumentParser()
21 parser.add_argument("-i", "--input", help="Input file", required=True) 22 parser.add_argument("-i", "--input", help="Input file", required=True)
22 parser.add_argument("--kw", nargs="+", action="append", help="") 23 parser.add_argument("--kw", nargs="+", action="append", help="")
23 parser.add_argument("--kw_file", nargs="+", action="append", help="") 24 parser.add_argument("--kw_file", nargs="+", action="append", help="")
24 parser.add_argument("--value", nargs="+", action="append", help="") 25 parser.add_argument("--value", nargs="+", action="append", help="")
25 parser.add_argument("--values_range", nargs="+", action="append", help="") 26 parser.add_argument("--values_range", nargs="+", action="append", help="")
27 parser.add_argument("--operation", default="keep", type=str, choices=['keep','discard'],help='')
26 parser.add_argument("--operator", default="OR", type=str, choices=['AND','OR'],help='') 28 parser.add_argument("--operator", default="OR", type=str, choices=['AND','OR'],help='')
27 parser.add_argument("-o", "--output", default="output.txt") 29 parser.add_argument("-o", "--output", default="output.txt")
28 parser.add_argument("--filtered_file", default="filtered_output.txt") 30 parser.add_argument("--discarded_lines", default="filtered_output.txt")
29 parser.add_argument("-s","--sort_col", help="") 31 parser.add_argument("-s","--sort_col", help="")
30 32
31 args = parser.parse_args() 33 args = parser.parse_args()
34
32 filters(args) 35 filters(args)
33 36
34 def str_to_bool(v): 37 def str_to_bool(v):
35 if v.lower() in ('yes', 'true', 't', 'y', '1'): 38 if v.lower() in ('yes', 'true', 't', 'y', '1'):
36 return True 39 return True
60 def filters(args): 63 def filters(args):
61 filename = args.input.split(",")[0] 64 filename = args.input.split(",")[0]
62 header = str_to_bool(args.input.split(",")[1]) 65 header = str_to_bool(args.input.split(",")[1])
63 csv_file = blank_to_NA(read_file(filename)) 66 csv_file = blank_to_NA(read_file(filename))
64 results_dict = {} 67 results_dict = {}
68 operator_dict = { "Equal" : "=" , "Higher" : ">" , "Equal-or-higher" : ">=" , "Lower" : "<" , "Equal-or-lower" : "<=" , "Different" : "!=" }
65 69
66 if args.kw: 70 if args.kw:
67 keywords = args.kw 71 keywords = args.kw
68 for k in keywords: 72 for k in keywords:
69 results_dict=filter_keyword(csv_file, header, results_dict, k[0], k[1], k[2]) 73 results_dict=filter_keyword(csv_file, header, results_dict, k[0], k[1], k[2])
77 results_dict=filter_keyword(csv_file, header, results_dict, keywords, kf[3], kf[4]) 81 results_dict=filter_keyword(csv_file, header, results_dict, keywords, kf[3], kf[4])
78 82
79 if args.value: 83 if args.value:
80 for v in args.value: 84 for v in args.value:
81 v[0] = v[0].replace(",",".") 85 v[0] = v[0].replace(",",".")
86 v[2] = operator_dict[v[2]]
82 if is_number("float", v[0]): 87 if is_number("float", v[0]):
83 csv_file = comma_number_to_float(csv_file,column_from_txt(v[1]),header) 88 csv_file = comma_number_to_float(csv_file,column_from_txt(v[1]),header)
84 results_dict = filter_value(csv_file, header, results_dict, v[0], v[1], v[2]) 89 results_dict = filter_value(csv_file, header, results_dict, v[0], v[1], v[2])
85 else: 90 else:
86 raise ValueError("Please enter a number in filter by value") 91 raise ValueError("Please enter a number in filter by value")
121 sort_col=args.sort_col.split(",")[0] 126 sort_col=args.sort_col.split(",")[0]
122 sort_col=column_from_txt(sort_col) 127 sort_col=column_from_txt(sort_col)
123 reverse=str_to_bool(args.sort_col.split(",")[1]) 128 reverse=str_to_bool(args.sort_col.split(",")[1])
124 remaining_lines= sort_by_column(remaining_lines,sort_col,reverse,header) 129 remaining_lines= sort_by_column(remaining_lines,sort_col,reverse,header)
125 filtered_lines = sort_by_column(filtered_lines,sort_col,reverse,header) 130 filtered_lines = sort_by_column(filtered_lines,sort_col,reverse,header)
131
132 #swap lists of lines (files) if 'keep' option selected
133 if args.operation == "keep" :
134 swap = remaining_lines, filtered_lines
135 remaining_lines = swap[1]
136 filtered_lines = swap[0]
126 137
127 # Write results to output 138 # Write results to output
128 with open(args.output,"w") as output : 139 with open(args.output,"w") as output :
129 writer = csv.writer(output,delimiter="\t") 140 writer = csv.writer(output,delimiter="\t")
130 writer.writerows(remaining_lines) 141 writer.writerows(remaining_lines)
131 142
132 # Write filtered lines to filtered_output 143 # Write filtered lines to filtered_output
133 with open(args.filtered_file,"w") as filtered_output : 144 with open(args.discarded_lines,"w") as filtered_output :
134 writer = csv.writer(filtered_output,delimiter="\t") 145 writer = csv.writer(filtered_output,delimiter="\t")
135 writer.writerows(filtered_lines) 146 writer.writerows(filtered_lines)
136 147
137 #function to sort the csv_file by value in a specific column 148 #function to sort the csv_file by value in a specific column
138 def sort_by_column(tab,sort_col,reverse,header): 149 def sort_by_column(tab,sort_col,reverse,header):