Mercurial > repos > fabio > btman
comparison query.py @ 19:7f712cc0d3d5 draft
Uploaded 20190304.2
author | fabio |
---|---|
date | Mon, 04 Mar 2019 08:31:28 -0500 |
parents | |
children | c619ad82600e |
comparison
equal
deleted
inserted
replaced
18:be864d79c9c7 | 19:7f712cc0d3d5 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import sys, os, optparse, shutil | |
4 | |
5 __version__ = "1.0.0" | |
6 VALID_CHARS = '.-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' | |
7 # in the case of collections, exitcodes equal to 0 and 1 are not considered errors | |
8 ERR_EXIT_CODE = 2 | |
9 OK_EXIT_CODE = 0 | |
10 | |
11 def printLog( logfilepath, message, exitcode=OK_EXIT_CODE, exit=False ): | |
12 print message | |
13 with open( logfilepath, 'a+' ) as out: | |
14 out.write( message + '\n' ) | |
15 if exit: | |
16 sys.exit( exitcode ) | |
17 | |
18 def querySBT( options, args ): | |
19 output_dir_path = options.outputdir | |
20 outlogfile = options.outfile | |
21 | |
22 tree_file_paths = options.treep.split( ',' ) | |
23 tree_file_names = options.treen.split( ',' ) | |
24 tree_def_filepath = None | |
25 leafnames_filepath = None | |
26 for idx, tree_file_name in enumerate( tree_file_names ): | |
27 if tree_file_name == 'howde': | |
28 tree_def_filepath = tree_file_paths[ idx ] | |
29 elif tree_file_name == 'leafnames': | |
30 leafnames_filepath = tree_file_paths[ idx ] | |
31 if tree_def_filepath is not None and leafnames_filepath is not None: | |
32 break | |
33 | |
34 if tree_def_filepath is not None and leafnames_filepath is not None: | |
35 leafnames_counter = 0 | |
36 with open( leafnames_filepath ) as leafnames_file: | |
37 for line in leafnames_file: | |
38 if line.strip(): | |
39 leafnames_counter += 1 | |
40 if leafnames_counter > 0: | |
41 printLog( outlogfile, 'The selected collection contains a valid tree' ) | |
42 shutil.copyfile( tree_def_filepath, 'howde.txt' ) | |
43 tree_def_filepath = 'howde.txt' | |
44 for idx, tree_file_name in enumerate( tree_file_names ): | |
45 if tree_file_name.endswith( 'detbrief.rrr' ): | |
46 shutil.copyfile( tree_file_paths[ idx ], tree_file_name + '.bf' ) | |
47 | |
48 printLog( outlogfile, 'Creating batch of queries' ) | |
49 # create tmp batch file | |
50 batch_file_name = 'queries.fa' | |
51 batch_file = open( batch_file_name, 'w' ) | |
52 | |
53 comma_sep_file_paths = options.files | |
54 # check if options.files contains at least one file path | |
55 if comma_sep_file_paths is not None: | |
56 # split file paths | |
57 file_paths = comma_sep_file_paths.split(",") | |
58 # split file names | |
59 file_names = options.names.split(",") | |
60 for idx, file_path in enumerate(file_paths): | |
61 fixed_file_name = ''.join( c for c in file_names[ idx ] if c in VALID_CHARS ) | |
62 printLog( outlogfile, '> processing file ' + file_names[ idx ] + ' ( fixed_name=\"' + fixed_file_name + '\" ) ' ) | |
63 with open(file_path, 'r') as content_file: | |
64 for line in content_file: | |
65 line = line.strip() | |
66 if line: | |
67 line_split = line.strip().split("\t") # split on tab | |
68 if len(line_split) == 2: # 0:id , 1:seq , otherwise skip line | |
69 original_seq_id = line_split[0] | |
70 # fix seq_id using valid chars only | |
71 seq_id = ''.join( c for c in original_seq_id if c in VALID_CHARS ) | |
72 printLog( outlogfile, '> sequence ' + original_seq_id + ' ( fixed_name=\"' + seq_id + '\" )' ) | |
73 seq_text = line_split[1] | |
74 | |
75 # write on batch file | |
76 batch_file.write( '> ' + fixed_file_name + '_' + seq_id + '\n' + seq_text + '\n' ) | |
77 batch_file.close() | |
78 # query the tree | |
79 printLog( outlogfile, 'Querying the tree' ) | |
80 query_res_file_path = os.path.abspath( 'answer.txt' ) | |
81 sort_param = '--sort' | |
82 if options.sort == 0: | |
83 sort_param = '' | |
84 query_exitcode = os.system( 'howdesbt query --tree=' + os.path.abspath( tree_def_filepath ) + ' ' + os.path.abspath( batch_file_name ) + '=' + str(options.threshold) + ' --out=' + query_res_file_path ) + ' ' + sort_param | |
85 if query_exitcode > 0: | |
86 printLog( outlogfile, '> ERROR: an error has occurred while querying the tree with the sequence [id: ' + seq_id + '] in input file ' + file_names[ idx ] ) | |
87 else: | |
88 if os.path.exists( query_res_file_path ): | |
89 with open( query_res_file_path ) as query_res_file: | |
90 file_path = '' | |
91 theta_matches = 0 | |
92 for line in query_res_file: | |
93 line = line.strip() | |
94 if line: | |
95 if line.startswith( '*' ): | |
96 line_split = line.split( ' ' ) | |
97 theta_matches = int( line_split[ 1 ] ) | |
98 file_name = line_split[ 0 ].replace( '*', '' ) | |
99 file_path = os.path.join( output_dir_path, file_name + '_txt' ) | |
100 open( file_path, 'a' ).close() | |
101 else: | |
102 res_file = open( file_path, 'a+' ) | |
103 fraction = str( theta_matches ) + '/' + str( leafnames_counter ) | |
104 score = format( round( float( theta_matches ) / float( leafnames_counter ) , 6 ), '6f' ) | |
105 res_file.write( line + '\t' + fraction + '\t' + score + '\n' ) | |
106 res_file.close() | |
107 else: | |
108 printLog( outlogfile, 'An error has occurred while querying the tree', exitcode=ERR_EXIT_CODE, exit=True ) | |
109 else: | |
110 printLog( outlogfile, 'The selected collection does not contain a valid tree', exitcode=ERR_EXIT_CODE, exit=True ) | |
111 else: | |
112 printLog( outlogfile, 'The selected collection does not contain a valid tree', exitcode=ERR_EXIT_CODE, exit=True ) | |
113 | |
114 def __main__(): | |
115 # Parse the command line options | |
116 usage = "Usage: query.py --files comma_sep_file_paths --names comma_seq_file_names --sequences sequences_text --search search_mode --exact exact_alg --sthreshold threshold --outputdir output_dir_path" | |
117 parser = optparse.OptionParser(usage = usage) | |
118 parser.add_option("-v", "--version", action="store_true", dest="version", | |
119 default=False, help="display version and exit") | |
120 parser.add_option("-f", "--files", type="string", | |
121 action="store", dest="files", help="comma separated files path") | |
122 parser.add_option("-n", "--names", type="string", | |
123 action="store", dest="names", help="comma separated names associated to the files specified in --files") | |
124 parser.add_option("-k", "--treep", type="string", | |
125 action="store", dest="treep", help="paths of files in collection") | |
126 parser.add_option("-m", "--treen", type="string", | |
127 action="store", dest="treen", help="names of files in collection") | |
128 parser.add_option("-t", "--threshold", type="float", default=0.7, | |
129 action="store", dest="threshold", help="search threshold") | |
130 parser.add_option("-s", "--sort", type="int", default=1, | |
131 action="store", dest="sort", help="sort results") | |
132 parser.add_option("-o", "--outputdir", type="string", default="output", | |
133 action="store", dest="outputdir", help="output directory (collection) path") | |
134 parser.add_option("-r", "--outfile", type="string", default="query.txt", | |
135 action="store", dest="outfile", help="output log file path") | |
136 | |
137 (options, args) = parser.parse_args() | |
138 if options.version: | |
139 print __version__ | |
140 else: | |
141 # create output dir (collection) | |
142 output_dir_path = options.outputdir | |
143 if not os.path.exists(output_dir_path): | |
144 os.makedirs(output_dir_path) | |
145 | |
146 querySBT( options, args ) | |
147 | |
148 if __name__ == "__main__": __main__() |