annotate filter_kw_val.py @ 7:6f32c1e12572 draft default tip

planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
author proteore
date Fri, 01 Jun 2018 11:10:47 -0400
parents c6ba1e6f6869
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
1 import argparse, re, csv
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
2
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
3 def options():
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
4 """
5
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
5 Parse options:
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
6 -i, --input Input filename and boolean value if the file contains header ["filename,true/false"]
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
7 --kw Keyword to be filtered, the column number where this filter applies,
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
8 boolean value if the keyword should be filtered in exact ["keyword,ncol,true/false"].
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
9 This option can be repeated: --kw "kw1,c1,true" --kw "kw2,c1,false" --kw "kw3,c2,true"
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
10 --kwfile A file that contains keywords to be filter, the column where this filter applies and
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
11 boolean value if the keyword should be filtered in exact ["filename,ncol,true/false"]
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
12 --value The value to be filtered, the column number where this filter applies and the
7
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
13 operation symbol ["value,ncol,=/>/>=/</<=/!="]
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
14 --values_range range of values to be keep, example : --values_range 5 20 c1 true
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
15 --operator The operator used to filter with several keywords/values : AND or OR
5
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
16 --o --output The output filename
7
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
17 --filtered_file The file contains removed lines
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
18 -s --sort_col Used column to sort the file, ",true" for reverse sorting, ",false" otherwise example : c1,false
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
19 """
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
20 parser = argparse.ArgumentParser()
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
21 parser.add_argument("-i", "--input", help="Input file", required=True)
5
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
22 parser.add_argument("--kw", nargs="+", action="append", help="")
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
23 parser.add_argument("--kw_file", nargs="+", action="append", help="")
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
24 parser.add_argument("--value", nargs="+", action="append", help="")
7
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
25 parser.add_argument("--values_range", nargs="+", action="append", help="")
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
26 parser.add_argument("--operator", default="OR", type=str, choices=['AND','OR'],help='')
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
27 parser.add_argument("-o", "--output", default="output.txt")
7
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
28 parser.add_argument("--filtered_file", default="filtered_output.txt")
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
29 parser.add_argument("-s","--sort_col", help="")
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
30
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
31 args = parser.parse_args()
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
32 filters(args)
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
33
7
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
34 def str_to_bool(v):
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
35 if v.lower() in ('yes', 'true', 't', 'y', '1'):
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
36 return True
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
37 elif v.lower() in ('no', 'false', 'f', 'n', '0'):
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
38 return False
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
39 else:
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
40 raise argparse.ArgumentTypeError('Boolean value expected.')
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
41
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
42 #Check if a variable is a float or an integer
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
43 def is_number(number_format, n):
6
c6ba1e6f6869 planemo upload commit 74b6a02a2e64d02551c05b52d571b888ac73cac9
proteore
parents: 5
diff changeset
44 float_format = re.compile(r"^[-]?[0-9][0-9]*.?[0-9]+$")
c6ba1e6f6869 planemo upload commit 74b6a02a2e64d02551c05b52d571b888ac73cac9
proteore
parents: 5
diff changeset
45 int_format = re.compile(r"^[-]?[0-9][0-9]*$")
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
46 test = ""
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
47 if number_format == "int":
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
48 test = re.match(int_format, n)
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
49 elif number_format == "float":
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
50 test = re.match(float_format, n)
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
51 if test:
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
52 return True
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
53
7
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
54 #Filter the document
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
55 def filters(args):
7
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
56 filename = args.input.split(",")[0]
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
57 header = str_to_bool(args.input.split(",")[1])
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
58 csv_file = read_file(filename)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
59 results_dict = {}
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
60
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
61 if args.kw:
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
62 keywords = args.kw
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
63 for k in keywords:
7
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
64 results_dict=filter_keyword(csv_file, header, results_dict, k[0], k[1], k[2])
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
65
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
66 if args.kw_file:
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
67 key_files = args.kw_file
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
68 for kf in key_files:
7
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
69 keywords = read_option(kf[0])
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
70 results_dict=filter_keyword(csv_file, header, results_dict, keywords, kf[1], kf[2])
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
71
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
72 if args.value:
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
73 for v in args.value:
7
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
74 if is_number("float", v[0]):
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
75 results_dict = filter_value(csv_file, header, results_dict, v[0], v[1], v[2])
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
76 else:
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
77 raise ValueError("Please enter a number in filter by value")
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
78
7
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
79 if args.values_range:
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
80 for vr in args.values_range:
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
81 if (is_number("float", vr[0]) or is_number("int", vr[0])) and (is_number("float",vr[1]) or is_number("int",vr[1])):
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
82 results_dict = filter_values_range(csv_file, header, results_dict, vr[0], vr[1], vr[2], vr[3])
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
83
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
84 remaining_lines=[]
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
85 filtered_lines=[]
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
86
7
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
87 if header is True :
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
88 remaining_lines.append(csv_file[0])
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
89 filtered_lines.append(csv_file[0])
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
90
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
91 for id_line,line in enumerate(csv_file) :
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
92 if id_line in results_dict : #skip header and empty lines
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
93 if args.operator == 'OR' :
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
94 if any(results_dict[id_line]) :
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
95 filtered_lines.append(line)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
96 else :
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
97 remaining_lines.append(line)
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
98
7
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
99 elif args.operator == "AND" :
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
100 if all(results_dict[id_line]) :
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
101 filtered_lines.append(line)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
102 else :
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
103 remaining_lines.append(line)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
104
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
105 #sort of results by column
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
106 if args.sort_col :
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
107 sort_col=args.sort_col.split(",")[0]
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
108 sort_col=column_from_txt(sort_col)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
109 reverse=str_to_bool(args.sort_col.split(",")[1])
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
110 remaining_lines= sort_by_column(remaining_lines,sort_col,reverse,header)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
111 filtered_lines = sort_by_column(filtered_lines,sort_col,reverse,header)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
112
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
113 # Write results to output
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
114 with open(args.output,"w") as output :
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
115 writer = csv.writer(output,delimiter="\t")
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
116 writer.writerows(remaining_lines)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
117
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
118 # Write filtered lines to filtered_output
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
119 with open(args.filtered_file,"w") as filtered_output :
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
120 writer = csv.writer(filtered_output,delimiter="\t")
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
121 writer.writerows(filtered_lines)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
122
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
123 #function to sort the csv_file by value in a specific column
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
124 def sort_by_column(tab,sort_col,reverse,header):
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
125
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
126 if len(tab) > 1 : #if there's more than just a header or 1 row
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
127 if header is True :
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
128 head=tab[0]
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
129 tab=tab[1:]
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
130
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
131 if is_number("int",tab[0][sort_col]) :
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
132 tab = sorted(tab, key=lambda row: int(row[sort_col]), reverse=reverse)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
133 elif is_number("float",tab[0][sort_col]) :
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
134 tab = sorted(tab, key=lambda row: float(row[sort_col]), reverse=reverse)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
135 else :
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
136 tab = sorted(tab, key=lambda row: row[sort_col], reverse=reverse)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
137
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
138 if header is True : tab = [head]+tab
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
139
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
140 return tab
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
141
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
142 #Read the keywords file to extract the list of keywords
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
143 def read_option(filename):
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
144 with open(filename, "r") as f:
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
145 filter_list=f.read().splitlines()
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
146 filter_list=[key for key in filter_list if len(key.replace(' ',''))!=0]
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
147 filters=";".join(filter_list)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
148
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
149 return filters
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
150
7
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
151 # Read input file
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
152 def read_file(filename):
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
153 with open(filename,"r") as f :
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
154 reader=csv.reader(f,delimiter="\t")
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
155 tab=list(reader)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
156
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
157 # Remove empty lines (contain only space or new line or "")
7
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
158 #[tab.remove(blank) for blank in tab if blank.isspace() or blank == ""]
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
159 tab=[line for line in tab if len("".join(line).replace(" ","")) !=0 ]
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
160
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
161 return tab
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
162
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
163 #seek for keywords in rows of csvfile, return a dictionary of boolean (true if keyword found, false otherwise)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
164 def filter_keyword(csv_file, header, results_dict, keywords, ncol, match):
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
165 match=str_to_bool(match)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
166 ncol=column_from_txt(ncol)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
167
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
168 keywords = keywords.upper().split(";") # Split list of filter keyword
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
169 [keywords.remove(blank) for blank in keywords if blank.isspace() or blank == ""] # Remove blank keywords
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
170 keywords = [k.strip() for k in keywords] # Remove space from 2 heads of keywords
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
171
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
172 for id_line,line in enumerate(csv_file):
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
173 if header is True and id_line == 0 : continue
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
174 #line = line.replace("\n", "")
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
175 keyword_inline = line[ncol].replace('"', "").split(";")
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
176 #line = line + "\n"
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
177
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
178 #Perfect match or not
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
179 if match is True :
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
180 found_in_line = any(pid.upper() in keywords for pid in keyword_inline)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
181 else:
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
182 found_in_line = any(ft in pid.upper() for pid in keyword_inline for ft in keywords)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
183
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
184 #if the keyword is found in line
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
185 if id_line in results_dict : results_dict[id_line].append(found_in_line)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
186 else : results_dict[id_line]=[found_in_line]
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
187
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
188 return results_dict
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
189
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
190 #filter ba determined value in rows of csvfile, return a dictionary of boolean (true if value filtered, false otherwise)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
191 def filter_value(csv_file, header, results_dict, filter_value, ncol, opt):
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
192
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
193 filter_value = float(filter_value)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
194 ncol=column_from_txt(ncol)
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
195
7
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
196 for id_line,line in enumerate(csv_file):
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
197 if header is True and id_line == 0 : continue
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
198 value = line[ncol].replace('"', "").strip()
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
199 if value.replace(".", "", 1).isdigit():
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
200 to_filter=value_compare(value,filter_value,opt)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
201
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
202 #adding the result to the dictionary
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
203 if id_line in results_dict : results_dict[id_line].append(to_filter)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
204 else : results_dict[id_line]=[to_filter]
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
205
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
206 return results_dict
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
207
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
208 #filter ba determined value in rows of csvfile, return a dictionary of boolean (true if value filtered, false otherwise)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
209 def filter_values_range(csv_file, header, results_dict, bottom_value, top_value, ncol, inclusive):
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
210 inclusive=str_to_bool(inclusive)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
211 bottom_value = float(bottom_value)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
212 top_value=float(top_value)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
213 ncol=column_from_txt(ncol)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
214
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
215 for id_line, line in enumerate(csv_file):
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
216 if header is True and id_line == 0 : continue
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
217 value = line[ncol].replace('"', "").strip()
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
218 if value.replace(".", "", 1).isdigit():
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
219 value=float(value)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
220 if inclusive is True:
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
221 in_range = not (bottom_value <= value <= top_value)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
222 else :
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
223 in_range = not (bottom_value < value < top_value)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
224
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
225 #adding the result to the dictionary
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
226 if id_line in results_dict : results_dict[id_line].append(in_range)
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
227 else : results_dict[id_line]=[in_range]
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
228
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
229 return results_dict
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
230
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
231 def column_from_txt(ncol):
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
232 if is_number("int", ncol.replace("c", "")):
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
233 ncol = int(ncol.replace("c", "")) - 1
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
234 else:
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
235 raise ValueError("Please specify the column where "
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
236 "you would like to apply the filter "
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
237 "with valid format")
7
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
238 return ncol
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
239
7
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
240 #return True if value is in the determined values, false otherwise
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
241 def value_compare(value,filter_value,opt):
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
242 test_value=False
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
243
7
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
244 if opt == "<":
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
245 if float(value) < filter_value:
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
246 test_value = True
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
247 elif opt == "<=":
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
248 if float(value) <= filter_value:
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
249 test_value = True
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
250 elif opt == ">":
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
251 if float(value) > filter_value:
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
252 test_value = True
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
253 elif opt == ">=":
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
254 if float(value) >= filter_value:
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
255 test_value = True
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
256 elif opt == "=":
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
257 if float(value) == filter_value:
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
258 test_value = True
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
259 elif opt == "!=":
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
260 if float(value) != filter_value:
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
261 test_value = True
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
262
7
6f32c1e12572 planemo upload commit 72b345a7df2c87f07a9df71ecee1f252c9355337
proteore
parents: 6
diff changeset
263 return test_value
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
264
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
265 if __name__ == "__main__":
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
266 options()