Mercurial > repos > fabio > btman
comparison query.py @ 19:7f712cc0d3d5 draft
Uploaded 20190304.2
| author | fabio |
|---|---|
| date | Mon, 04 Mar 2019 08:31:28 -0500 |
| parents | |
| children | c619ad82600e |
comparison
equal
deleted
inserted
replaced
| 18:be864d79c9c7 | 19:7f712cc0d3d5 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import sys, os, optparse, shutil | |
| 4 | |
| 5 __version__ = "1.0.0" | |
| 6 VALID_CHARS = '.-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' | |
| 7 # in the case of collections, exitcodes equal to 0 and 1 are not considered errors | |
| 8 ERR_EXIT_CODE = 2 | |
| 9 OK_EXIT_CODE = 0 | |
| 10 | |
| 11 def printLog( logfilepath, message, exitcode=OK_EXIT_CODE, exit=False ): | |
| 12 print message | |
| 13 with open( logfilepath, 'a+' ) as out: | |
| 14 out.write( message + '\n' ) | |
| 15 if exit: | |
| 16 sys.exit( exitcode ) | |
| 17 | |
| 18 def querySBT( options, args ): | |
| 19 output_dir_path = options.outputdir | |
| 20 outlogfile = options.outfile | |
| 21 | |
| 22 tree_file_paths = options.treep.split( ',' ) | |
| 23 tree_file_names = options.treen.split( ',' ) | |
| 24 tree_def_filepath = None | |
| 25 leafnames_filepath = None | |
| 26 for idx, tree_file_name in enumerate( tree_file_names ): | |
| 27 if tree_file_name == 'howde': | |
| 28 tree_def_filepath = tree_file_paths[ idx ] | |
| 29 elif tree_file_name == 'leafnames': | |
| 30 leafnames_filepath = tree_file_paths[ idx ] | |
| 31 if tree_def_filepath is not None and leafnames_filepath is not None: | |
| 32 break | |
| 33 | |
| 34 if tree_def_filepath is not None and leafnames_filepath is not None: | |
| 35 leafnames_counter = 0 | |
| 36 with open( leafnames_filepath ) as leafnames_file: | |
| 37 for line in leafnames_file: | |
| 38 if line.strip(): | |
| 39 leafnames_counter += 1 | |
| 40 if leafnames_counter > 0: | |
| 41 printLog( outlogfile, 'The selected collection contains a valid tree' ) | |
| 42 shutil.copyfile( tree_def_filepath, 'howde.txt' ) | |
| 43 tree_def_filepath = 'howde.txt' | |
| 44 for idx, tree_file_name in enumerate( tree_file_names ): | |
| 45 if tree_file_name.endswith( 'detbrief.rrr' ): | |
| 46 shutil.copyfile( tree_file_paths[ idx ], tree_file_name + '.bf' ) | |
| 47 | |
| 48 printLog( outlogfile, 'Creating batch of queries' ) | |
| 49 # create tmp batch file | |
| 50 batch_file_name = 'queries.fa' | |
| 51 batch_file = open( batch_file_name, 'w' ) | |
| 52 | |
| 53 comma_sep_file_paths = options.files | |
| 54 # check if options.files contains at least one file path | |
| 55 if comma_sep_file_paths is not None: | |
| 56 # split file paths | |
| 57 file_paths = comma_sep_file_paths.split(",") | |
| 58 # split file names | |
| 59 file_names = options.names.split(",") | |
| 60 for idx, file_path in enumerate(file_paths): | |
| 61 fixed_file_name = ''.join( c for c in file_names[ idx ] if c in VALID_CHARS ) | |
| 62 printLog( outlogfile, '> processing file ' + file_names[ idx ] + ' ( fixed_name=\"' + fixed_file_name + '\" ) ' ) | |
| 63 with open(file_path, 'r') as content_file: | |
| 64 for line in content_file: | |
| 65 line = line.strip() | |
| 66 if line: | |
| 67 line_split = line.strip().split("\t") # split on tab | |
| 68 if len(line_split) == 2: # 0:id , 1:seq , otherwise skip line | |
| 69 original_seq_id = line_split[0] | |
| 70 # fix seq_id using valid chars only | |
| 71 seq_id = ''.join( c for c in original_seq_id if c in VALID_CHARS ) | |
| 72 printLog( outlogfile, '> sequence ' + original_seq_id + ' ( fixed_name=\"' + seq_id + '\" )' ) | |
| 73 seq_text = line_split[1] | |
| 74 | |
| 75 # write on batch file | |
| 76 batch_file.write( '> ' + fixed_file_name + '_' + seq_id + '\n' + seq_text + '\n' ) | |
| 77 batch_file.close() | |
| 78 # query the tree | |
| 79 printLog( outlogfile, 'Querying the tree' ) | |
| 80 query_res_file_path = os.path.abspath( 'answer.txt' ) | |
| 81 sort_param = '--sort' | |
| 82 if options.sort == 0: | |
| 83 sort_param = '' | |
| 84 query_exitcode = os.system( 'howdesbt query --tree=' + os.path.abspath( tree_def_filepath ) + ' ' + os.path.abspath( batch_file_name ) + '=' + str(options.threshold) + ' --out=' + query_res_file_path ) + ' ' + sort_param | |
| 85 if query_exitcode > 0: | |
| 86 printLog( outlogfile, '> ERROR: an error has occurred while querying the tree with the sequence [id: ' + seq_id + '] in input file ' + file_names[ idx ] ) | |
| 87 else: | |
| 88 if os.path.exists( query_res_file_path ): | |
| 89 with open( query_res_file_path ) as query_res_file: | |
| 90 file_path = '' | |
| 91 theta_matches = 0 | |
| 92 for line in query_res_file: | |
| 93 line = line.strip() | |
| 94 if line: | |
| 95 if line.startswith( '*' ): | |
| 96 line_split = line.split( ' ' ) | |
| 97 theta_matches = int( line_split[ 1 ] ) | |
| 98 file_name = line_split[ 0 ].replace( '*', '' ) | |
| 99 file_path = os.path.join( output_dir_path, file_name + '_txt' ) | |
| 100 open( file_path, 'a' ).close() | |
| 101 else: | |
| 102 res_file = open( file_path, 'a+' ) | |
| 103 fraction = str( theta_matches ) + '/' + str( leafnames_counter ) | |
| 104 score = format( round( float( theta_matches ) / float( leafnames_counter ) , 6 ), '6f' ) | |
| 105 res_file.write( line + '\t' + fraction + '\t' + score + '\n' ) | |
| 106 res_file.close() | |
| 107 else: | |
| 108 printLog( outlogfile, 'An error has occurred while querying the tree', exitcode=ERR_EXIT_CODE, exit=True ) | |
| 109 else: | |
| 110 printLog( outlogfile, 'The selected collection does not contain a valid tree', exitcode=ERR_EXIT_CODE, exit=True ) | |
| 111 else: | |
| 112 printLog( outlogfile, 'The selected collection does not contain a valid tree', exitcode=ERR_EXIT_CODE, exit=True ) | |
| 113 | |
| 114 def __main__(): | |
| 115 # Parse the command line options | |
| 116 usage = "Usage: query.py --files comma_sep_file_paths --names comma_seq_file_names --sequences sequences_text --search search_mode --exact exact_alg --sthreshold threshold --outputdir output_dir_path" | |
| 117 parser = optparse.OptionParser(usage = usage) | |
| 118 parser.add_option("-v", "--version", action="store_true", dest="version", | |
| 119 default=False, help="display version and exit") | |
| 120 parser.add_option("-f", "--files", type="string", | |
| 121 action="store", dest="files", help="comma separated files path") | |
| 122 parser.add_option("-n", "--names", type="string", | |
| 123 action="store", dest="names", help="comma separated names associated to the files specified in --files") | |
| 124 parser.add_option("-k", "--treep", type="string", | |
| 125 action="store", dest="treep", help="paths of files in collection") | |
| 126 parser.add_option("-m", "--treen", type="string", | |
| 127 action="store", dest="treen", help="names of files in collection") | |
| 128 parser.add_option("-t", "--threshold", type="float", default=0.7, | |
| 129 action="store", dest="threshold", help="search threshold") | |
| 130 parser.add_option("-s", "--sort", type="int", default=1, | |
| 131 action="store", dest="sort", help="sort results") | |
| 132 parser.add_option("-o", "--outputdir", type="string", default="output", | |
| 133 action="store", dest="outputdir", help="output directory (collection) path") | |
| 134 parser.add_option("-r", "--outfile", type="string", default="query.txt", | |
| 135 action="store", dest="outfile", help="output log file path") | |
| 136 | |
| 137 (options, args) = parser.parse_args() | |
| 138 if options.version: | |
| 139 print __version__ | |
| 140 else: | |
| 141 # create output dir (collection) | |
| 142 output_dir_path = options.outputdir | |
| 143 if not os.path.exists(output_dir_path): | |
| 144 os.makedirs(output_dir_path) | |
| 145 | |
| 146 querySBT( options, args ) | |
| 147 | |
| 148 if __name__ == "__main__": __main__() |
