annotate filter_kw_val.py @ 6:c6ba1e6f6869 draft

planemo upload commit 74b6a02a2e64d02551c05b52d571b888ac73cac9
author proteore
date Fri, 20 Apr 2018 09:07:23 -0400
parents 1e9911190142
children 6f32c1e12572
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
1 import argparse
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
2 import re
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
3
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
4
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
5 def options():
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
6 """
5
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
7 Parse options:
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
8 -i, --input Input filename and boolean value if the file contains header ["filename,true/false"]
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
9 -m, --match if the keywords should be filtered in exact
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
10 --kw Keyword to be filtered, the column number where this filter applies,
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
11 boolean value if the keyword should be filtered in exact ["keyword,ncol,true/false"].
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
12 This option can be repeated: --kw "kw1,c1,true" --kw "kw2,c1,false" --kw "kw3,c2,true"
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
13 --kwfile A file that contains keywords to be filter, the column where this filter applies and
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
14 boolean value if the keyword should be filtered in exact ["filename,ncol,true/false"]
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
15 --value The value to be filtered, the column number where this filter applies and the
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
16 operation symbol ["value,ncol,=/>/>=/</<="]
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
17 --o --output The output filename
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
18 --trash_file The file contains removed lines
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
19 """
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
20 parser = argparse.ArgumentParser()
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
21 parser.add_argument("-i", "--input", help="Input file", required=True)
5
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
22 parser.add_argument("--kw", nargs="+", action="append", help="")
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
23 parser.add_argument("--kw_file", nargs="+", action="append", help="")
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
24 parser.add_argument("--value", nargs="+", action="append", help="")
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
25 parser.add_argument("-o", "--output", default="output.txt")
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
26 parser.add_argument("--trash_file", default="trash_MQfilter.txt")
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
27
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
28 args = parser.parse_args()
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
29
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
30 filters(args)
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
31
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
32 def isnumber(number_format, n):
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
33 """
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
34 Check if a variable is a float or an integer
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
35 """
6
c6ba1e6f6869 planemo upload commit 74b6a02a2e64d02551c05b52d571b888ac73cac9
proteore
parents: 5
diff changeset
36 float_format = re.compile(r"^[-]?[0-9][0-9]*.?[0-9]+$")
c6ba1e6f6869 planemo upload commit 74b6a02a2e64d02551c05b52d571b888ac73cac9
proteore
parents: 5
diff changeset
37 int_format = re.compile(r"^[-]?[0-9][0-9]*$")
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
38 test = ""
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
39 if number_format == "int":
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
40 test = re.match(int_format, n)
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
41 elif number_format == "float":
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
42 test = re.match(float_format, n)
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
43 if test:
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
44 return True
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
45
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
46 def filters(args):
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
47 """
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
48 Filter the document
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
49 """
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
50 MQfilename = args.input.split(",")[0]
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
51 header = args.input.split(",")[1]
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
52 MQfile = readMQ(MQfilename)
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
53 results = [MQfile, None]
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
54
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
55 if args.kw:
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
56 keywords = args.kw
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
57 for k in keywords:
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
58 results = filter_keyword(results[0], header, results[1], k[0], k[1], k[2])
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
59 if args.kw_file:
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
60 key_files = args.kw_file
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
61 for kf in key_files:
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
62 ids = readOption(kf[0])
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
63 results = filter_keyword(results[0], header, results[1], ids, kf[1], kf[2])
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
64 if args.value:
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
65 for v in args.value:
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
66 if isnumber("float", v[0]):
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
67 results = filter_value(results[0], header, results[1], v[0], v[1], v[2])
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
68 else:
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
69 raise ValueError("Please enter a number in filter by value")
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
70
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
71 # Write results to output
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
72 output = open(args.output, "w")
5
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
73 output.write("".join(results[0]))
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
74 output.close()
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
75
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
76 # Write deleted lines to trash_file
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
77 trash = open(args.trash_file, "w")
5
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
78 trash.write("".join(results[1]))
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
79 trash.close()
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
80
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
81 def readOption(filename):
5
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
82 # Read the keywords file to extract the list of keywords
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
83 f = open(filename, "r")
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
84 file_content = f.read()
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
85 filter_list = file_content.split("\n")
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
86 filters = ""
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
87 for i in filter_list:
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
88 filters += i + ";"
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
89 filters = filters[:-1]
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
90 return filters
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
91
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
92 def readMQ(MQfilename):
5
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
93 # Read input file
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
94 mqfile = open(MQfilename, "r")
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
95 mq = mqfile.readlines()
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
96 # Remove empty lines (contain only space or new line or "")
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
97 [mq.remove(blank) for blank in mq if blank.isspace() or blank == ""]
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
98 return mq
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
99
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
100 def filter_keyword(MQfile, header, filtered_lines, ids, ncol, match):
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
101 mq = MQfile
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
102 if isnumber("int", ncol.replace("c", "")):
5
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
103 id_index = int(ncol.replace("c", "")) - 1
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
104 else:
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
105 raise ValueError("Please specify the column where "
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
106 "you would like to apply the filter "
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
107 "with valid format")
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
108
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
109 # Split list of filter IDs
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
110 ids = ids.upper().split(";")
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
111 # Remove blank IDs
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
112 [ids.remove(blank) for blank in ids if blank.isspace() or blank == ""]
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
113 # Remove space from 2 heads of IDs
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
114 ids = [id.strip() for id in ids]
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
115
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
116
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
117 if header == "true":
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
118 header = mq[0]
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
119 content = mq[1:]
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
120 else:
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
121 header = ""
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
122 content = mq[:]
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
123
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
124 if not filtered_lines: # In case there is already some filtered lines from other filters
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
125 filtered_lines = []
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
126 if header != "":
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
127 filtered_lines.append(header)
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
128
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
129 for line in content:
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
130 line = line.replace("\n", "")
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
131 id_inline = line.split("\t")[id_index].replace('"', "").split(";")
5
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
132 # Take only first IDs
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
133 #one_id_line = line.replace(line.split("\t")[id_index], id_inline[0])
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
134 line = line + "\n"
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
135
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
136 if match != "false":
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
137 # Filter protein IDs
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
138 if any(pid.upper() in ids for pid in id_inline):
5
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
139 filtered_lines.append(line)
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
140 mq.remove(line)
5
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
141 #else:
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
142 # mq[mq.index(line)] = one_id_line
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
143 else:
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
144 if any(ft in pid.upper() for pid in id_inline for ft in ids):
5
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
145 filtered_lines.append(line)
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
146 mq.remove(line)
5
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
147 #else:
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
148 # mq[mq.index(line)] = one_id_line
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
149 return mq, filtered_lines
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
150
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
151 def filter_value(MQfile, header, filtered_prots, filter_value, ncol, opt):
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
152 mq = MQfile
5
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
153 if ncol and isnumber("int", ncol.replace("c", "")):
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
154 index = int(ncol.replace("c", "")) - 1
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
155 else:
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
156 raise ValueError("Please specify the column where "
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
157 "you would like to apply the filter "
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
158 "with valid format")
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
159 if header == "true":
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
160 header = mq[0]
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
161 content = mq[1:]
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
162 else:
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
163 header = ""
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
164 content = mq[:]
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
165 if not filtered_prots: # In case there is already some filtered lines from other filters
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
166 filtered_prots = []
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
167 if header != "":
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
168 filtered_prots.append(header)
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
169
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
170 for line in content:
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
171 prot = line.replace("\n","")
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
172 filter_value = float(filter_value)
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
173 pep = prot.split("\t")[index].replace('"', "")
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
174 if pep.replace(".", "", 1).isdigit():
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
175 if opt == "<":
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
176 if float(pep) >= filter_value:
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
177 filtered_prots.append(line)
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
178 mq.remove(line)
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
179 elif opt == "<=":
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
180 if float(pep) > filter_value:
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
181 filtered_prots.append(line)
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
182 mq.remove(line)
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
183 elif opt == ">":
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
184 #print(prot.number_of_prots, filter_value, int(prot.number_of_prots) > filter_value)
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
185 if float(pep) <= filter_value:
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
186 filtered_prots.append(line)
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
187 mq.remove(line)
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
188 elif opt == ">=":
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
189 if float(pep) < filter_value:
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
190 filtered_prots.append(line)
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
191 mq.remove(line)
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
192 else:
1
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
193 if float(pep) != filter_value:
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
194 filtered_prots.append(line)
d29e469b6b20 planemo upload commit 5774fd6a5a746f36f6bf4671a51a39ea2b978300-dirty
proteore
parents: 0
diff changeset
195 mq.remove(line)
5
1e9911190142 planemo upload commit 08f1831e097df5d74bf60ff5955e7e9c8e524cc8-dirty
proteore
parents: 1
diff changeset
196 return mq, filtered_prots
0
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
197
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
198 if __name__ == "__main__":
6a45ccfc0e4c planemo upload commit abb24d36c776520e73220d11386252d848173697-dirty
proteore
parents:
diff changeset
199 options()