proteore_filter_keywords_values: filter_kw

comparison filter_kw_val.py @ 5:33ca9ba2495a draft

planemo upload commit 395d6aa47cce1fb7642b7c06133636c43d80f3c7-dirty

author	proteore
date	Tue, 05 Mar 2019 07:37:10 -0500
parents	2080e2a4f209
children	b4641c0f8a82

comparison

equal deleted inserted replaced

-:2080e2a4f209
+:33ca9ba2495a
 This option can be repeated: --kw "kw1,c1,true" --kw "kw2,c1,false" --kw "kw3,c2,true"
 --kwfile        A file that contains keywords to be filter, the column where this filter applies and
 boolean value if the keyword should be filtered in exact ["filename,ncol,true/false"]
 --value         The value to be filtered, the column number where this filter applies and the
 operation symbol ["value,ncol,=/>/>=/</<=/!="]
 --values_range  range of values to be keep, example : --values_range 5 20 c1 true
+--operation     'keep' or 'discard' lines concerned by filter(s)
 --operator      The operator used to filter with several keywords/values : AND or OR
 --o --output    The output filename
---filtered_file    The file contains removed lines
+--discarded_lines    The file contains removed lines
 -s --sort_col   Used column to sort the file, ",true" for reverse sorting, ",false" otherwise example : c1,false
 """
 parser = argparse.ArgumentParser()
 parser.add_argument("-i", "--input", help="Input file", required=True)
 parser.add_argument("--kw", nargs="+", action="append", help="")
 parser.add_argument("--kw_file", nargs="+", action="append", help="")
 parser.add_argument("--value", nargs="+", action="append", help="")
 parser.add_argument("--values_range", nargs="+", action="append", help="")
+parser.add_argument("--operation", default="keep", type=str, choices=['keep','discard'],help='')
 parser.add_argument("--operator", default="OR", type=str, choices=['AND','OR'],help='')
 parser.add_argument("-o", "--output", default="output.txt")
-parser.add_argument("--filtered_file", default="filtered_output.txt")
+parser.add_argument("--discarded_lines", default="filtered_output.txt")
 parser.add_argument("-s","--sort_col", help="")
 args = parser.parse_args()
 filters(args)
 def str_to_bool(v):
 if v.lower() in ('yes', 'true', 't', 'y', '1'):
 return True
 def filters(args):
 filename = args.input.split(",")[0]
 header = str_to_bool(args.input.split(",")[1])
 csv_file = blank_to_NA(read_file(filename))
 results_dict = {}
+operator_dict = { "Equal" : "=" , "Higher" : ">" , "Equal-or-higher" : ">=" , "Lower" : "<" , "Equal-or-lower" : "<=" , "Different" : "!=" }
 if args.kw:
 keywords = args.kw
 for k in keywords:
 results_dict=filter_keyword(csv_file, header, results_dict, k[0], k[1], k[2])
 results_dict=filter_keyword(csv_file, header, results_dict, keywords, kf[3], kf[4])
 if args.value:
 for v in args.value:
 v[0] = v[0].replace(",",".")
+v[2] = operator_dict[v[2]]
 if is_number("float", v[0]):
 csv_file = comma_number_to_float(csv_file,column_from_txt(v[1]),header)
 results_dict = filter_value(csv_file, header, results_dict, v[0], v[1], v[2])
 else:
 raise ValueError("Please enter a number in filter by value")
 sort_col=args.sort_col.split(",")[0]
 sort_col=column_from_txt(sort_col)
 reverse=str_to_bool(args.sort_col.split(",")[1])
 remaining_lines= sort_by_column(remaining_lines,sort_col,reverse,header)
 filtered_lines = sort_by_column(filtered_lines,sort_col,reverse,header)
+#swap lists of lines (files) if 'keep' option selected
+if args.operation == "keep" :
+swap = remaining_lines, filtered_lines
+remaining_lines = swap[1]
+filtered_lines = swap[0]
 # Write results to output
 with open(args.output,"w") as output :
 writer = csv.writer(output,delimiter="\t")
 writer.writerows(remaining_lines)
 # Write filtered lines to filtered_output
-with open(args.filtered_file,"w") as filtered_output :
+with open(args.discarded_lines,"w") as filtered_output :
 writer = csv.writer(filtered_output,delimiter="\t")
 writer.writerows(filtered_lines)
 #function to sort the csv_file by value in a specific column
 def sort_by_column(tab,sort_col,reverse,header):

Mercurial > repos > proteore > proteore_filter_keywords_values

comparison filter_kw_val.py @ 5:33ca9ba2495a draft