# HG changeset patch # User fabio # Date 1551706203 18000 # Node ID be864d79c9c731ce3277eaa7935cf89762bbfe32 # Parent f02c2c58a6f9522fefc26791d522cbe2ce1b2557 Uploaded 20190304 diff -r f02c2c58a6f9 -r be864d79c9c7 .shed.yml --- a/.shed.yml Fri Jan 18 10:12:40 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -name: btman -owner: iuc -categories: - - Data Source - - Web Services -description: BloomTree Manager -long_description: | - A fast querying tool to identify all publicly available sequenced - samples which express a transcript of interest -remote_repository_url: https://github.com/fabio-cumbo/bloomtree-manager -homepage_url: https://github.com/fabio-cumbo/bloomtree-manager -type: unrestricted -auto_tool_repositories: - name_template: "{{ tool_id }}" - descriptor_template: "Wrapper for BloomTree Manager: {{ tool_name }}." -suite: - name: "btman_suite" - description: "A suite of Galaxy tools designed to work with the BloomTree Manager." - long_description: | - A fast querying tool to identify all publicly available sequenced - samples which express a transcript of interest diff -r f02c2c58a6f9 -r be864d79c9c7 btman-1.0.0/.shed.yml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/btman-1.0.0/.shed.yml Mon Mar 04 08:30:03 2019 -0500 @@ -0,0 +1,21 @@ +name: btman +owner: iuc +categories: + - Data Source + - Web Services +description: BloomTree Manager +long_description: | + A suite of tools to fast create and query Sequence Bloom Trees + supporting determined/how split filters +remote_repository_url: https://github.com/fabio-cumbo/bloomtree-manager +homepage_url: https://github.com/fabio-cumbo/bloomtree-manager +type: unrestricted +auto_tool_repositories: + name_template: "{{ tool_id }}" + descriptor_template: "Wrapper for BloomTree Manager: {{ tool_name }}." +suite: + name: "btman_suite" + description: "A suite of Galaxy tools designed to work with the BloomTree Manager." + long_description: | + A suite of tools to fast create and query Sequence Bloom Trees + supporting determined/how split filters diff -r f02c2c58a6f9 -r be864d79c9c7 btman-1.0.0/build.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/btman-1.0.0/build.sh Mon Mar 04 08:30:03 2019 -0500 @@ -0,0 +1,7 @@ +#!/bin/bash + +outExpDir=$1 + +cd ${outExpDir} + +howdesbt build --HowDe --tree=union.txt --outtree=howde.txt diff -r f02c2c58a6f9 -r be864d79c9c7 btman-1.0.0/cluster.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/btman-1.0.0/cluster.sh Mon Mar 04 08:30:03 2019 -0500 @@ -0,0 +1,10 @@ +#!/bin/bash + +outExpDir=$1 +bfsize=$2 + +cd ${outExpDir} + +ls *.bf > leafnames.txt +howdesbt cluster --list=leafnames.txt --bits=${bfsize} --tree=union.txt --nodename=node{number} --keepallnodes +#rm leafnames_txt_? diff -r f02c2c58a6f9 -r be864d79c9c7 btman-1.0.0/create.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/btman-1.0.0/create.py Mon Mar 04 08:30:03 2019 -0500 @@ -0,0 +1,233 @@ +#!/usr/bin/env python + +import sys, os, optparse, shutil, glob + +__version__ = "1.0.0" +# in the case of collections, exitcodes equal to 0 and 1 are not considered errors +ERR_EXIT_CODE = 2 +OK_EXIT_CODE = 0 +VALID_CHARS = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' + +def printLog( logfilepath, message, exitcode=OK_EXIT_CODE, exit=False ): + print message + with open( logfilepath, 'a+' ) as out: + out.write( message + '\n' ) + if exit: + sys.exit( exitcode ) + +def downloadAccessions(formats, filepaths, outlogfile, outdirpath): + downloaded_files = { } + for dataset_idx in range(0, len(formats)): + if formats[ dataset_idx ] == 'accessions': + print filepaths[ dataset_idx ] + with open( filepaths[ dataset_idx ] ) as accessions: + for line in accessions: + print line + accession = line.split( '\t' )[0].strip() + if accession: + printLog( outlogfile, 'Downloading \"' + accession.upper() + '\" with the fastq-dump tool (part of the sra-tools utility)' ) + fastq_dump_exitcode = os.system( 'fastq-dump --outdir ' + outdirpath + ' --fasta ' + accession.upper() ) + if fastq_dump_exitcode > 0: + printLog( outlogfile, '> FASTA file: FAILED ( \"' + accession.upper() + '\" will be excluded )' ) + else: + #os.rename( os.path.join( outdirpath, accession.upper() + '.fasta' ), os.path.join( outdirpath, accession.upper() + '_fasta' ) ) + printLog( outlogfile, '> FASTA file: \"' + accession.upper() + '.fasta\"' ) + accession_data = { + 'format': '.fasta', + 'filepath': os.path.join( outdirpath, accession.upper() + '.fasta' ), + 'filename': ''.join( c for c in accession.upper() if c in VALID_CHARS ) + } + downloaded_files[ accession.upper() ] = accession_data + return downloaded_files + +# format = { fasta, fastq, accession } +# this version skip the quality control procedure +def createSBT( options, args ): + outlogfile = str( options.outfile ) + outdirpath = str( options.outdir ) + if not outdirpath.endswith('/'): outdirpath += '/' + if not os.path.exists( outdirpath ): + os.mkdir( outdirpath ) + outdirpath = os.path.abspath( outdirpath ) + os.chdir( outdirpath ) + tooldirpath = os.path.abspath( str( options.tooldir ) ) + if not tooldirpath.endswith('/'): tooldirpath += '/' + + formats = [ fo for fo in str( options.formats ).split( '|' ) if fo.strip() ] + filepaths = [ filepath for filepath in str( options.filepaths ).split( '|' ) if filepath.strip() ] + filenames = [ filename for filename in str( options.filenames ).split( '|' ) if filename.strip() ] + compressed = [ True == int(c) for c in str( options.compressed ).split( '|' ) if c.strip() ] + minabundances = [ int(minab) for minab in str( options.minabundances ).split( '|' ) if minab.strip() ] + qualitythresholds = [ float(qthres) for qthres in str( options.qualitythresholds ).split( '|' ) if qthres.strip() ] + + klen = int( options.klen ) + bfsize = int( options.bfsize ) + + if len(formats) == len(filepaths) == len(filenames) == len(compressed) == len(minabundances) == len(qualitythresholds): + printLog( outlogfile, 'Retrieving experiments' ) + accessions = downloadAccessions( formats, filepaths, outlogfile, outdirpath ) + printLog( outlogfile, '> ' + str( len( accessions ) ) + ' experiments retrieved from the Sequence Read Archive' ) + acc_arr = [ a for a in accessions ] + print str( acc_arr ) + if bfsize < 0: # estimate bloom filter size + data_paths = ' '.join( accessions[ accession ][ 'filepath' ] for accession in accessions if 'filepath' in accessions[ accession ] ) + print data_paths + if len( data_paths ) > 0: + data_paths += ' ' + for dataset_idx in range(0, len(formats)): + if formats[ dataset_idx ] != 'accessions': + data_paths += ' '.join( path for path in filepaths[ dataset_idx ].split( ',' ) ) + # ntcard + printLog( outlogfile, 'Estimating the Bloom Filter size with ntcard' ) + if len( data_paths ) > 0: + ntcard_res_filepath = os.path.join( outdirpath, 'freq_k' + str( klen ) + '.hist' ) + ntcard_exitcode = os.system( 'ntcard --kmer=' + str( klen ) + ' ' + data_paths ) + print 'ntcard --kmer=' + str( klen ) + ' ' + data_paths + if ntcard_exitcode > 0: + printLog( outlogfile, '> [exitcode: ' + str(ntcard_exitcode) + '] an error with ntcard has occurred', exitcode=ERR_EXIT_CODE, exit=True ) + else: + if os.path.exists( ntcard_res_filepath ): + os.rename( ntcard_res_filepath, os.path.join( outdirpath, 'ntcard' + str( klen ) + '.txt' ) ) + ntcard_res_filepath = os.path.join( outdirpath, 'ntcard' + str( klen ) + '.txt' ) + var_F0 = None + var_f1 = None + with open( ntcard_res_filepath ) as ntcard_res: + for line in ntcard_res: + line = line.strip() + if line: + line_split = line.split( '\t' ) + if len(line_split) == 2: + if line_split[0] == 'F0': + var_F0 = int( line_split[1] ) + elif line_split[0] == 'f1': + var_f1 = int( line_split[1] ) + if var_F0 is not None and var_f1 is not None: + break + if var_F0 is not None and var_f1 is not None: + bfsize = var_F0 - var_f1 + printLog( outlogfile, '> estimated Bloom Filter size: ' + str(bfsize) ) + else: + printLog( outlogfile, '> an error has occurred while estimating the Bloom Filter size', exitcode=ERR_EXIT_CODE, exit=True ) + else: + printLog( outlogfile, '> an error with ntcard has occurred', exitcode=ERR_EXIT_CODE, exit=True ) + else: + printLog( outlogfile, '> unable to estimate the Bloom Filter size', exitcode=ERR_EXIT_CODE, exit=True ) + + if bfsize > 0: + for dataset_idx in range(0, len(formats)): + if formats[ dataset_idx ] == 'accessions': + with open( filepaths[ dataset_idx ] ) as accessions_file: + for line in accessions_file: + accession = line.split( '\t' )[0].strip().upper() + if accession in accessions: + curr_format = accessions[ accession ][ 'format' ] + curr_compressed = 'uncompress' + curr_filepath = accessions[ accession ][ 'filepath' ] + curr_filename = accessions[ accession ][ 'filename' ] + printLog( outlogfile, 'Processing \"' + accession + '\" ( format=\"' + curr_format + + '\", compressed=\"' + str(False) + '\", fixed_name=\"' + curr_filename + '\" )' ) + print 'sh ' + tooldirpath + 'makebf.sh ' + curr_filepath + ' ' + curr_filename + ' ' + curr_format + ' ' + str(curr_compressed) + ' ' + outdirpath + ' ' + str( klen ) + ' ' + str( minabundances[ dataset_idx ] ) + ' ' + str( bfsize ) + ' 1 1' + makebf_exitcode = os.system( 'sh ' + tooldirpath + 'makebf.sh ' + curr_filepath + ' ' + curr_filename + ' ' + + curr_format + ' ' + str(curr_compressed) + ' ' + outdirpath + ' ' + str( klen ) + ' ' + + str( minabundances[ dataset_idx ] ) + ' ' + str( bfsize ) + ' 1 1' ) + if makebf_exitcode > 0: + printLog( outlogfile, '> [exitcode: ' + str(makebf_exitcode) + '] Bloom Filter file: FAILED ( \"' + accession + '\" will be excluded )' ) + else: + printLog( outlogfile, '> Bloom Filter file: \"' + curr_filename + '.bf\"' ) + else: + curr_format = '.' + formats[ dataset_idx ].lower() + curr_compressed = '.gz' if compressed[ dataset_idx ] else 'uncompress' + curr_filepaths = filepaths[ dataset_idx ].split( ',' ) + curr_filenames = filenames[ dataset_idx ].split( ',' ) + for curr_idx in range(0, len(curr_formats)): + curr_filename_fixed = ''.join( c for c in curr_filenames[ curr_idx ] if c in VALID_CHARS ) + printLog( outlogfile, 'Processing \"' + curr_filenames[ curr_idx ] + '\" ( format=\"' + curr_format + + '\", compressed=\"' + str(compressed[ dataset_idx ]) + '\", fixed_name=\"' + curr_filename_fixed + '\" )' ) + if compressed[ dataset_idx ]: + makebf_exitcode = os.system( 'sh ' + tooldirpath + 'makebf.sh ' + curr_filepaths[ dataset_idx ] + ' ' + curr_filename_fixed + ' ' + + curr_format + ' ' + str(curr_compressed) + ' ' + outdirpath + ' ' + str( klen ) + ' ' + + str( minabundances[ dataset_idx ] ) + ' ' + str( bfsize ) + ' 0 1' ) + else: + makebf_exitcode = os.system( 'sh ' + tooldirpath + 'makebf.sh ' + curr_filepaths[ dataset_idx ] + ' ' + curr_filename_fixed + ' ' + + curr_format + ' ' + str(curr_compressed) + ' ' + outdirpath + ' ' + str( klen ) + ' ' + + str( minabundances[ dataset_idx ] ) + ' ' + str( bfsize ) + ' 0 0' ) + if makebf_exitcode > 0: + printLog( outlogfile, '> [exitcode: ' + str(makebf_exitcode) + '] Bloom Filter file: FAILED ( \"' + curr_filenames[ curr_idx ] + '\" will be excluded )' ) + else: + printLog( outlogfile, '> Bloom Filter file: \"' + curr_filename_fixed + '.bf\"' ) + # Create a tree topology + printLog( outlogfile, 'Creating a tree topology file' ) + bf_counter = len( glob.glob1( outdirpath, '*.bf' ) ) + if bf_counter > 0: + cluster_exitcode = os.system( 'sh ' + tooldirpath + 'cluster.sh ' + outdirpath + ' ' + str( bfsize ) ) + if cluster_exitcode > 0: + printLog( outlogfile, '> [exitcode: ' + str(cluster_exitcode) + '] an error has occurred during the creation of the topology file', exitcode=ERR_EXIT_CODE, exit=True ) + else: + # Build the HowDeSBT nodes + if os.path.exists( os.path.join( outdirpath, 'leafnames.txt' ) ): + printLog( outlogfile, 'Building the Bloom Filter files for the tree' ) + build_exitcode = os.system( 'sh ' + tooldirpath + 'build.sh ' + outdirpath ) + if build_exitcode > 0: + printLog( outlogfile, '> [exitcode: ' + str(build_exitcode) + '] an error has occurred during the creation of the Bloom Filter files for the tree', exitcode=ERR_EXIT_CODE, exit=True ) + else: + printLog( outlogfile, '> the tree has been successfully built: \"howde.txt\"', exitcode=OK_EXIT_CODE, exit=True ) + ''' + howde_filepath = os.path.join( outdirpath, 'howde.txt' ) + howde_galaxy_filepath = os.path.join( outdirpath, 'howde_galaxy.txt' ) + howde_galaxy = open( howde_galaxy_filepath, 'w' ) + with open( howde_filepath ) as howde_file: + for line in howde_file: + line = line.strip() + if line: + # trim stars * and get node name + # find galaxy file path to the node name + # rewrite path with stars + howde_galaxy.close() + ''' + else: + printLog( outlogfile, '> an error has occurred during the creation of the topology file', exitcode=ERR_EXIT_CODE, exit=True ) + else: + printLog( outlogfile, '> no Bloom Filter files found', exitcode=ERR_EXIT_CODE, exit=True ) + else: + printLog( outlogfile, '> ERROR: the Bloom Filter size is ' + str( bfsize ), exitcode=ERR_EXIT_CODE, exit=True ) + else: + printLog( outlogfile, 'Something went wrong with the input parameters', exitcode=ERR_EXIT_CODE, exit=True ) + +def __main__(): + # Parse the command line options + usage = ("Usage: create.py --formats file_formats --filepaths file_paths --filenames file_names " + "--compressed file_compressed --minabundance min_abundance --qualitythresholds quality_thresholds " + "--klen kmer_len --bfsize bloom_filter_size --outfile out_log_file_path --outdir out_dir_path") + parser = optparse.OptionParser(usage = usage) + parser.add_option("-v", "--version", action="store_true", dest="version", + default=False, help="display version and exit") + parser.add_option("-f", "--formats", type="string", + action="store", dest="formats", help="list of file formats separated by a tab character") + parser.add_option("-p", "--filepaths", type="string", + action="store", dest="filepaths", help="list of input file paths separated by a tab character") + parser.add_option("-n", "--filenames", type="string", + action="store", dest="filenames", help="list of input file names separated by a tab character") + parser.add_option("-c", "--compressed", type="string", + action="store", dest="compressed", help="list of compressed flags related to the imput files separated by a tab character") + parser.add_option("-m", "--minabundances", type="string", + action="store", dest="minabundances", help="list of blooom filter minimum abundances related to the imput files separated by a tab character") + parser.add_option("-q", "--qualitythresholds", type="string", + action="store", dest="qualitythresholds", help="list of quality thresholds related to the imput files separated by a tab character") + parser.add_option("-k", "--klen", type="int", default=21, + action="store", dest="klen", help="k-mer length") + parser.add_option("-b", "--bfsize", type="int", default=-1, + action="store", dest="bfsize", help="bloom filter size") + parser.add_option("-o", "--outfile", type="string", default="sbtres.txt", + action="store", dest="outfile", help="output log file path") + parser.add_option("-d", "--outdir", type="string", default="sbtres.txt", + action="store", dest="outdir", help="output directory path") + parser.add_option("-t", "--tooldir", type="string", default="./", + action="store", dest="tooldir", help="tool directory path") + + (options, args) = parser.parse_args() + if options.version: + print __version__ + else: + createSBT( options, args ) + +if __name__ == "__main__": __main__() diff -r f02c2c58a6f9 -r be864d79c9c7 btman-1.0.0/create.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/btman-1.0.0/create.xml Mon Mar 04 08:30:03 2019 -0500 @@ -0,0 +1,173 @@ + + + a Sequence Bloom Tree + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r f02c2c58a6f9 -r be864d79c9c7 btman-1.0.0/dataset.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/btman-1.0.0/dataset.tsv Mon Mar 04 08:30:03 2019 -0500 @@ -0,0 +1,11 @@ +SRR833714 +SRR833713 +SRR833715 +SRR567161 +SRR567146 +SRR191393 +SRR191449 +SRR191448 +SRR191447 +SRR191446 +SRR191445 diff -r f02c2c58a6f9 -r be864d79c9c7 btman-1.0.0/macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/btman-1.0.0/macros.xml Mon Mar 04 08:30:03 2019 -0500 @@ -0,0 +1,16 @@ + + + + python + sra-tools + ntcard + howdesbt + + + + + + 10.1101/090464 + + + \ No newline at end of file diff -r f02c2c58a6f9 -r be864d79c9c7 btman-1.0.0/makebf.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/btman-1.0.0/makebf.sh Mon Mar 04 08:30:03 2019 -0500 @@ -0,0 +1,33 @@ +#!/bin/bash + +expPath=$1 +expName=$2 +expFormat=$3 +expCompress=$4 + +outExpDir=$5 + +klen=$6 +minab=$7 +bfsize=$8 + +rmCompressed=$9 +rmSource=${10} + +cd ${outExpDir} + +if [ "${expCompress}" == ".gz" ]; then + gzip -dc ${expPath} > ${expName}${expFormat} + howdesbt makebf K=${klen} --min=${minab} --bits=${bfsize} ${expName}${expFormat} --out=${expName}.bf + if [ "${rmCompressed}" -eq "1" ]; then + rm ${expPath} + fi + if [ "${rmSource}" -eq "1" ]; then + rm ${expName}${expFormat} + fi +else + howdesbt makebf K=${klen} --min=${minab} --bits=${bfsize} ${expPath} --out=${expName}.bf + if [ "${rmSource}" -eq "1" ]; then + rm ${expPath} + fi +fi diff -r f02c2c58a6f9 -r be864d79c9c7 btman-1.0.0/query.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/btman-1.0.0/query.py Mon Mar 04 08:30:03 2019 -0500 @@ -0,0 +1,148 @@ +#!/usr/bin/env python + +import sys, os, optparse, shutil + +__version__ = "1.0.0" +VALID_CHARS = '.-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' +# in the case of collections, exitcodes equal to 0 and 1 are not considered errors +ERR_EXIT_CODE = 2 +OK_EXIT_CODE = 0 + +def printLog( logfilepath, message, exitcode=OK_EXIT_CODE, exit=False ): + print message + with open( logfilepath, 'a+' ) as out: + out.write( message + '\n' ) + if exit: + sys.exit( exitcode ) + +def querySBT( options, args ): + output_dir_path = options.outputdir + outlogfile = options.outfile + + tree_file_paths = options.treep.split( ',' ) + tree_file_names = options.treen.split( ',' ) + tree_def_filepath = None + leafnames_filepath = None + for idx, tree_file_name in enumerate( tree_file_names ): + if tree_file_name == 'howde': + tree_def_filepath = tree_file_paths[ idx ] + elif tree_file_name == 'leafnames': + leafnames_filepath = tree_file_paths[ idx ] + if tree_def_filepath is not None and leafnames_filepath is not None: + break + + if tree_def_filepath is not None and leafnames_filepath is not None: + leafnames_counter = 0 + with open( leafnames_filepath ) as leafnames_file: + for line in leafnames_file: + if line.strip(): + leafnames_counter += 1 + if leafnames_counter > 0: + printLog( outlogfile, 'The selected collection contains a valid tree' ) + shutil.copyfile( tree_def_filepath, 'howde.txt' ) + tree_def_filepath = 'howde.txt' + for idx, tree_file_name in enumerate( tree_file_names ): + if tree_file_name.endswith( 'detbrief.rrr' ): + shutil.copyfile( tree_file_paths[ idx ], tree_file_name + '.bf' ) + + printLog( outlogfile, 'Creating batch of queries' ) + # create tmp batch file + batch_file_name = 'queries.fa' + batch_file = open( batch_file_name, 'w' ) + + comma_sep_file_paths = options.files + # check if options.files contains at least one file path + if comma_sep_file_paths is not None: + # split file paths + file_paths = comma_sep_file_paths.split(",") + # split file names + file_names = options.names.split(",") + for idx, file_path in enumerate(file_paths): + fixed_file_name = ''.join( c for c in file_names[ idx ] if c in VALID_CHARS ) + printLog( outlogfile, '> processing file ' + file_names[ idx ] + ' ( fixed_name=\"' + fixed_file_name + '\" ) ' ) + with open(file_path, 'r') as content_file: + for line in content_file: + line = line.strip() + if line: + line_split = line.strip().split("\t") # split on tab + if len(line_split) == 2: # 0:id , 1:seq , otherwise skip line + original_seq_id = line_split[0] + # fix seq_id using valid chars only + seq_id = ''.join( c for c in original_seq_id if c in VALID_CHARS ) + printLog( outlogfile, '> sequence ' + original_seq_id + ' ( fixed_name=\"' + seq_id + '\" )' ) + seq_text = line_split[1] + + # write on batch file + batch_file.write( '> ' + fixed_file_name + '_' + seq_id + '\n' + seq_text + '\n' ) + batch_file.close() + # query the tree + printLog( outlogfile, 'Querying the tree' ) + query_res_file_path = os.path.abspath( 'answer.txt' ) + sort_param = '--sort' + if options.sort == 0: + sort_param = '' + query_exitcode = os.system( 'howdesbt query --tree=' + os.path.abspath( tree_def_filepath ) + ' ' + os.path.abspath( batch_file_name ) + '=' + str(options.threshold) + ' --out=' + query_res_file_path ) + ' ' + sort_param + if query_exitcode > 0: + printLog( outlogfile, '> ERROR: an error has occurred while querying the tree with the sequence [id: ' + seq_id + '] in input file ' + file_names[ idx ] ) + else: + if os.path.exists( query_res_file_path ): + with open( query_res_file_path ) as query_res_file: + file_path = '' + theta_matches = 0 + for line in query_res_file: + line = line.strip() + if line: + if line.startswith( '*' ): + line_split = line.split( ' ' ) + theta_matches = int( line_split[ 1 ] ) + file_name = line_split[ 0 ].replace( '*', '' ) + file_path = os.path.join( output_dir_path, file_name + '_txt' ) + open( file_path, 'a' ).close() + else: + res_file = open( file_path, 'a+' ) + fraction = str( theta_matches ) + '/' + str( leafnames_counter ) + score = format( round( float( theta_matches ) / float( leafnames_counter ) , 6 ), '6f' ) + res_file.write( line + '\t' + fraction + '\t' + score + '\n' ) + res_file.close() + else: + printLog( outlogfile, 'An error has occurred while querying the tree', exitcode=ERR_EXIT_CODE, exit=True ) + else: + printLog( outlogfile, 'The selected collection does not contain a valid tree', exitcode=ERR_EXIT_CODE, exit=True ) + else: + printLog( outlogfile, 'The selected collection does not contain a valid tree', exitcode=ERR_EXIT_CODE, exit=True ) + +def __main__(): + # Parse the command line options + usage = "Usage: query.py --files comma_sep_file_paths --names comma_seq_file_names --sequences sequences_text --search search_mode --exact exact_alg --sthreshold threshold --outputdir output_dir_path" + parser = optparse.OptionParser(usage = usage) + parser.add_option("-v", "--version", action="store_true", dest="version", + default=False, help="display version and exit") + parser.add_option("-f", "--files", type="string", + action="store", dest="files", help="comma separated files path") + parser.add_option("-n", "--names", type="string", + action="store", dest="names", help="comma separated names associated to the files specified in --files") + parser.add_option("-k", "--treep", type="string", + action="store", dest="treep", help="paths of files in collection") + parser.add_option("-m", "--treen", type="string", + action="store", dest="treen", help="names of files in collection") + parser.add_option("-t", "--threshold", type="float", default=0.7, + action="store", dest="threshold", help="search threshold") + parser.add_option("-s", "--sort", type="int", default=1, + action="store", dest="sort", help="sort results") + parser.add_option("-o", "--outputdir", type="string", default="output", + action="store", dest="outputdir", help="output directory (collection) path") + parser.add_option("-r", "--outfile", type="string", default="query.txt", + action="store", dest="outfile", help="output log file path") + + (options, args) = parser.parse_args() + if options.version: + print __version__ + else: + # create output dir (collection) + output_dir_path = options.outputdir + if not os.path.exists(output_dir_path): + os.makedirs(output_dir_path) + + querySBT( options, args ) + +if __name__ == "__main__": __main__() diff -r f02c2c58a6f9 -r be864d79c9c7 btman-1.0.0/query.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/btman-1.0.0/query.tsv Mon Mar 04 08:30:03 2019 -0500 @@ -0,0 +1,3 @@ +0 CCAACCAAAGGGAAAACTTTTTTCCGACTTTGGCCTAAAGGGTTTAACGGCCAAGTCAGAAGGGAAAAAGTTGCGCCAAAAATGGCGTTAAAATGTGTAATCAGAGAAGCGACACGAAAAGGGGATCAGCTCTTGGCTGGCAATTGGTAGGTCAGAGGTGGATTGGGAAAAGGCAAGTCAGCAACTGTCGATGACGGCGACTGACTGTTAATGAAAATTGTTTTGGCTGTGTGGAAAAAAATACGCGGGAATCCGTGAATTTTCCGAGGAGCTGGTGGAGCGAAGAAAACGGGGTGCTGCTGTTGTAAATGATTGGTGAAAGTCACACGCCCGCAGCCTTGCCAAACTAATTAACGCCAAATGGAGCTAAGGCCTTTGAATGATGGCTGCAGGCTAGCTTATGAAAAGGGGTTGAAGAGAAGTGGAAAAATTGGTAGAAAGGGATTTGCTCAAGATGCC +1 TTAATGACAGGGCCACATGATGTGAAAAAAAATCAGAAACCGAGTCAACGTGAGAAGATAGTACGTACTACCGCAAATGAATGGCCATTTCATTTGCATGTTGGGAGCAACAGAAATGAGAGAGCATCCGAAGCTAACCACAAAAATGGACTTTGCTTCATTATGCACAAACACGCCAATAAATGTAACGAGAAAGATAGTAGGAGCGAAAGACGAGACGAGACAAACAGGAAGAAGACGAGTGGACGAGTGTTTTTTGTAACGAAACTCTTAATCGCTCCTTTGCAGGCTTAAGCTGATAGTTGCTACGTTTATGCCATGAATTTCAAGATCTCTCAAATGCGTGAAAATCCAGTTTATGCGACAGACAAATTCATGTATTTGAAAAATCTTAGCTGATAGAAATCAAAGGTGATT +2 CAATTAATGATAAATATTTTATAAGGTGCGGAAATAAAGTGAGGAATATCTTTTAAATTCAAGTTCAATTCTGAAAGC \ No newline at end of file diff -r f02c2c58a6f9 -r be864d79c9c7 btman-1.0.0/query.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/btman-1.0.0/query.xml Mon Mar 04 08:30:03 2019 -0500 @@ -0,0 +1,83 @@ + + + a Sequence Bloom Tree + + macros.xml + + + + + + + + + + + + + + + + + + + + + + diff -r f02c2c58a6f9 -r be864d79c9c7 create.py --- a/create.py Fri Jan 18 10:12:40 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,132 +0,0 @@ -#!/usr/bin/env python - -# https://github.com/ross/requests-futures -# http://docs.python-requests.org/en/master/user/quickstart/#more-complicated-post-requests - -import sys, os, uuid, optparse, requests, json, time -#from requests_futures.sessions import FuturesSession - -#### NN14 #### -SERVICE_URL = "http://nn14.galaxyproject.org:8080/"; -#service_url = "http://127.0.0.1:8082/"; -CREATE_URL = SERVICE_URL+"tree/create"; -STATUS_URL = SERVICE_URL+"status/"; -############## -# query delay in seconds -QUERY_DELAY = 30; -############## - -__version__ = "1.0.0"; -ERR_EXIT_CODE = 1; -OK_EXIT_CODE = 0; - -def raiseException( exitcode, message, errorfilepath ): - with open(errorfilepath, 'w') as out: - out.write(message); - sys.exit(exitcode); - -def create_request( options, args, data ): - outfilepath = options.outfile; - cluster_id_2_query_id = { }; - - for cluster_id in data: - payload = { }; - payload["accessions"] = data[cluster_id]; - # add additional parameters to the payload - payload["qualitycontrol"] = int(options.qualitycontrol); - payload["qualitythreshold"] = float(options.qualitythreshold); - payload["klen"] = int(options.klen); - payload["minabundance"] = int(options.minabundance); - # set the content type to application/json - headers = {'Content-type': 'application/json'}; - # create a session - session = requests.Session(); - # make a synchronous post request to the create route - req = session.post(CREATE_URL, headers=headers, json=payload); - resp_code = req.status_code; - #print(str(req.content)+"\n\n"); - if resp_code == requests.codes.ok: - resp_content = str(req.content); - # convert out to json - json_content = json.loads(resp_content); - # retrieve query id - query_id = json_content['query_id']; - cluster_id_2_query_id[cluster_id] = query_id; - else: - with open(outfilepath, 'a+') as outfile: - outfile.write( "An error has occurred while submitting data to the /tree/create endpoint for the cluster " + cluster_id + "\n\n" ); - - build_flags = [ ] - while len(build_flags) < len(cluster_id_2_query_id): - for idx, cluster_id in enumerate( cluster_id_2_query_id ): - if cluster_id not in build_flags: - query_id = cluster_id_2_query_id[ cluster_id ]; - # create a new session - session = requests.Session(); - # make a synchronous get request to the status route - status_query_url = STATUS_URL.replace("", query_id); - status_req = session.get(status_query_url); - status_resp_content = str(status_req.content); - #print(status_resp_content+"\n\n"); - # convert out to json - json_status_content = json.loads(status_resp_content); - # take a look at the state - # state attribute is always available - if json_status_content['state'] == 'SUCCESS': - build_flags.append( cluster_id ); - built_tree_id = json_status_content['results']['tree_id']; - with open(outfilepath, 'a+') as outfile: - outfile.write( "Query ID: " + str(query_id) + "\n" + "Query status: " + str(json_status_content['state']) + "\n" + "Cluster ID: " + cluster_id + "\n" + "Sequence Bloom Tree ID: " + built_tree_id + "\n\n" ); - elif json_status_content['state'] in ['FAILURE', 'REVOKED']: - build_flags.append( cluster_id ); - with open(outfilepath, 'a+') as outfile: - outfile.write( "Query ID: " + str(query_id) + "\n" + "Query status: " + str(json_status_content['state']) + "\n" + "Cluster ID: " + cluster_id + "\n\n" ); - if len(build_flags) < len(cluster_id_2_query_id): - time.sleep(QUERY_DELAY); # in seconds - return sys.exit(OK_EXIT_CODE); - -def create( options, args ): - multiple_data = {}; - experiment_list_file_path = options.explist; - with open(experiment_list_file_path) as explist: - for line in explist: - if line.strip() != "": - line_split = line.strip().split("\t"); # split on tab - if len(line_split) == 2: # 0:accession , 1:cluster_id , otherwise skip line - accession = line_split[0]; - cluster_id = line_split[1]; - if cluster_id in multiple_data: - multiple_data[cluster_id].append( accession ); - else: - multiple_data[cluster_id] = [ accession ]; - if len(multiple_data) > 0: - return create_request( options, args, multiple_data ); - else: - return raiseException( ERR_EXIT_CODE, "An error has occurred. Please be sure that your input file is valid.", options.outfile ); - -def __main__(): - # Parse the command line options - usage = "Usage: create.py --explist experiment_list --qualitycontrol quality_control --qualitythreshold quality_threshold --klen kmer_len --minabundance min_abundance --outfile output_file_path"; - parser = optparse.OptionParser(usage = usage); - parser.add_option("-v", "--version", action="store_true", dest="version", - default=False, help="display version and exit") - parser.add_option("-l", "--explist", type="string", - action="store", dest="explist", help="tabular file with a list of SRA accessions and their cluster label"); - parser.add_option("-q", "--qualitycontrol", type="int", default=0 - action="store", dest="qualitycontrol", help="flag to enable or disable the experiment quality control"); - parser.add_option("-t", "--qualitythreshold", type="float", default=0.0 - action="store", dest="qualitythreshold", help="quality threshold, if quality control is enabled only"); - parser.add_option("-k", "--klen", type="int", default=21, - action="store", dest="klen", help="k-mer length"); - parser.add_option("-m", "--minabundance", type="int", default=2, - action="store", dest="minabundance", help="minimum abundance"); - parser.add_option("-o", "--outfile", type="string", default="outfile_txt", - action="store", dest="outfile", help="output file path"); - - (options, args) = parser.parse_args(); - if options.version: - print __version__; - else: - return create( options, args ); - -if __name__ == "__main__": __main__() diff -r f02c2c58a6f9 -r be864d79c9c7 create.xml --- a/create.xml Fri Jan 18 10:12:40 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,104 +0,0 @@ - - - a Sequence Bloom Tree - - macros.xml - - - - - - - - - - - - - - - - - - - - - - - - - - diff -r f02c2c58a6f9 -r be864d79c9c7 macros.xml --- a/macros.xml Fri Jan 18 10:12:40 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,14 +0,0 @@ - - - - python - requests - - - - - - 10.1101/090464 - - - \ No newline at end of file diff -r f02c2c58a6f9 -r be864d79c9c7 query.py --- a/query.py Fri Jan 18 10:12:40 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,221 +0,0 @@ -#!/usr/bin/env python - -# https://github.com/ross/requests-futures -# http://docs.python-requests.org/en/master/user/quickstart/#more-complicated-post-requests - -import sys, os, uuid, optparse, requests, json, time -#from requests_futures.sessions import FuturesSession - -#### NN14 #### -SERVICE_URL = "http://nn14.galaxyproject.org:8080/"; -#service_url = "http://127.0.0.1:8082/"; -QUERY_URL = SERVICE_URL+"tree//query"; -STATUS_URL = SERVICE_URL+"status/"; -############## -# query delay in seconds -QUERY_DELAY = 30; -############## - -__version__ = "1.0.0"; -VALID_CHARS = '.-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ ' -# in the case of collections, exitcodes equal to 0 and 1 are not considered errors -ERR_EXIT_CODE = 2; -OK_EXIT_CODE = 0; - -def raiseException( exitcode, message, output_dir_path, errorfilename ): - errorfilepath = os.path.join(output_dir_path, errorfilename+"_txt"); - with open(errorfilepath, 'w') as out: - out.write(message); - sys.exit(exitcode); - -def query_request( options, args, payload ): - output_dir_path = options.outputdir; - # add additional parameters to the payload - #payload["tree_id"] = str(options.treeid); - payload["search_mode"] = str(options.search); - payload["exact_algorithm"] = int(options.exact); - payload["search_threshold"] = float(options.sthreshold); - payload["sort"] = int(options.sortcontrol); - # set the content type to application/json - headers = {'Content-type': 'application/json'}; - - # create a session - session = requests.Session(); - # make a synchronous post request to the query route - req = session.post(QUERY_URL.replace("", str(options.treeid)), headers=headers, json=payload); - resp_code = req.status_code; - #print(str(req.content)+"\n\n"); - if resp_code == requests.codes.ok: - resp_content = str(req.content); - # convert out to json - json_content = json.loads(resp_content); - # retrieve query id - query_id = json_content['query_id']; - query_processed = False; - # results json content - json_status_content = None; - while query_processed is False: - # create a new session - session = requests.Session(); - # make a synchronous get request to the status route - status_query_url = STATUS_URL.replace("", query_id); - status_req = session.get(status_query_url); - status_resp_content = str(status_req.content); - #print(status_resp_content+"\n\n"); - # convert out to json - json_status_content = json.loads(status_resp_content); - # take a look at the state - # state attribute is always available - if json_status_content['state'] == 'SUCCESS': - query_processed = True; - break; - elif json_status_content['state'] in ['FAILURE', 'REVOKED']: - return raiseException( ERR_EXIT_CODE, "Query ID: "+str(query_id)+"\nQuery status: "+str(json_status_content['state']), output_dir_path, str(options.errorfile) ); - else: - time.sleep(QUERY_DELAY); # in seconds - - out_file_format = "tabular"; - for block in json_status_content['results']: - seq_id = block['sequence_id']; - # put response block in the output collection - output_file_path = os.path.join(output_dir_path, seq_id + "_" + out_file_format); - accessions_list = ""; - hits_block = block['hits']; - accessions_dict = { }; - is_sabutan = False; - for hit in hits_block: - if type(hit) is dict: # sabutan - #accessions_list = accessions_list + str(hit['accession_number']) + "\t" + str(hit['score']) + "\n"; - accession_number = hit['accession_number']; - #------------ - #score = hit['score']; - #score_split = score.split("/"); - #accessions_dict[accession_number] = "{0:.6f}".format(float(score_split[0])/float(score_split[1])); - #------------ - fraction = hit['fraction']; - score = hit['score']; - accession_scores = { - "fraction": str(fraction), - "score": float(score) - } - accessions_dict[accession_number] = accession_scores; - is_sabutan = True; - else: # all-some - accessions_list = accessions_list + str(hit) + "\n"; - if is_sabutan: - sorted_accessions = sorted(accessions_dict, key=lambda i: float(accessions_dict[i]["score"]), reverse=True); - for acc in sorted_accessions: - accessions_list = accessions_list + str(acc) + "\t" + str(accessions_dict[acc]["fraction"]) + "\t" + str(accessions_dict[acc]["score"]) + "\n"; - with open(output_file_path, 'w') as out: - out.write(accessions_list.strip()); - return sys.exit(OK_EXIT_CODE); - else: - return raiseException( ERR_EXIT_CODE, "Unable to query the remote server. Please try again in a while.", output_dir_path, str(options.errorfile) ); - -def query( options, args ): - output_dir_path = options.outputdir; - multiple_data = {}; - comma_sep_file_paths = options.files; - #print("files: "+str(comma_sep_file_paths)+" - "+str(type(comma_sep_file_paths))); - # check if options.files contains at least one file path - if comma_sep_file_paths is not None: - # split file paths - file_paths = comma_sep_file_paths.split(","); - # split file names - comma_sep_file_names = str(options.names); - #print("names: "+str(comma_sep_file_names)); - file_names = comma_sep_file_names.split(","); - for idx, file_path in enumerate(file_paths): - #file_name = file_names[idx]; - with open(file_path, 'r') as content_file: - for line in content_file: - if line.strip() != "": - line_split = line.strip().split("\t"); # split on tab - if len(line_split) == 2: # 0:id , 1:seq , otherwise skip line - seq_id = line_split[0]; - # fix seq_id using valid chars only - seq_id = ''.join(e for e in seq_id if e in VALID_CHARS) - seq_text = line_split[1]; - if seq_id in multiple_data: - return raiseException( ERR_EXIT_CODE, "Error: the id '"+seq_id+"' is duplicated", output_dir_path, str(options.errorfile) ); - multiple_data[seq_id] = seq_text; - if len(multiple_data) > 0: - return query_request( options, args, multiple_data ); - #return echo( options, args ); - else: - return raiseException( ERR_EXIT_CODE, "An error has occurred. Please be sure that your input files are valid.", output_dir_path, str(options.errorfile) ); - else: - # try with the sequence in --sequence - text_content = options.sequences; - #print("sequences: "+text_content); - # check if options.sequences contains a list of sequences (one for each row) - if text_content is not None: - text_content = str(text_content); - if text_content.strip(): - # populate a dictionary with the files containing the sequences to query - text_content = text_content.strip().split("__cn__"); # split on new line - for line in text_content: - if line.strip() != "": - line_split = line.strip().split("__tc__"); # split on tab - if len(line_split) == 2: # 0:id , 1:seq , otherwise skip line - seq_id = line_split[0]; - # fix seq_id using valid chars only - seq_id = ''.join(e for e in seq_id if e in VALID_CHARS) - seq_text = line_split[1]; - if seq_id in multiple_data: - return raiseException( ERR_EXIT_CODE, "Error: the id '"+seq_id+"' is duplicated", output_dir_path, str(options.errorfile) ); - multiple_data[seq_id] = seq_text; - if len(multiple_data) > 0: - return query_request( options, args, multiple_data ); - #return echo( options, args ); - else: - return raiseException( ERR_EXIT_CODE, "An error has occurred. Please be sure that your input files are valid.", output_dir_path, str(options.errorfile) ); - else: - return raiseException( ERR_EXIT_CODE, "You have to insert at least one row formatted as a tab delimited (ID, SEQUENCE) couple", output_dir_path, str(options.errorfile) ); - return ERR_EXIT_CODE; - -def __main__(): - # Parse the command line options - usage = "Usage: query.py --files comma_sep_file_paths --names comma_seq_file_names --sequences sequences_text --search search_mode --exact exact_alg --sthreshold threshold --outputdir output_dir_path"; - parser = optparse.OptionParser(usage = usage); - parser.add_option("-v", "--version", action="store_true", dest="version", - default=False, help="display version and exit") - parser.add_option("-f", "--files", type="string", - action="store", dest="files", help="comma separated files path"); - parser.add_option("-n", "--names", type="string", - action="store", dest="names", help="comma separated names associated to the files specified in --files"); - parser.add_option("-s", "--sequences", type="string", - action="store", dest="sequences", help="contains a list of sequences (one for each row)"); - parser.add_option("-a", "--fasta", type="string", - action="store", dest="fasta", help="contains the content of a fasta file"); - parser.add_option("-x", "--search", type="string", default="rrr", - action="store", dest="search", help="search mode"); - parser.add_option("-e", "--exact", type="int", default=0, - action="store", dest="exact", help="exact algorithm (required if search is 1 only)"); - parser.add_option("-k", "--tree", type="string", default=0, - action="store", dest="treeid", help="the id of the tree that will be queried"); - parser.add_option("-t", "--sthreshold", type="float", - action="store", dest="sthreshold", help="threshold applied to the search algrithm"); - parser.add_option("-z", "--sort", type="int", default=1, - action="store", dest="sortcontrol", help="boolean required to sort the result"); - parser.add_option("-o", "--outputdir", type="string", default="output", - action="store", dest="outputdir", help="output directory (collection) path"); - parser.add_option("-r", "--errorfile", type="string", default="error_txt", - action="store", dest="errorfile", help="error file name containing error messages"); - - # TEST - #sequences = 'NM_001169378.2__tc__atttcggatgctttggagggaggaactctagtgctgcattgattggggcgtgtgttaatgatattcccagttcgcatggcgagcatcgattcctggtacgtatgtgggccccttgactcccacttatcgcacttgtcgttcgcaatttgcatgaattccgcttcgtctgaaacgcacttgcgccagacttctccggctggtctgatctggtctgtgatccggtctggtggggcgccagttgcgtttcgagctcatcaccagtcactccgcagtcgcattctgccagaggtctccgatcaagagcgcttctccattcgagattcaaacgcagcgcggtctgacgccgccacatcgagtgaaatccatatcgatggccacattcacacaggacgagatcgacttcctgcgcagccatggcaacgagctgtgtgccaagacctggctgggattgtgggatccgaagcgggctgtgcaccagcaggagcagcgcgaactgatgatggacaagtatgagcggaagcgatactacctggagccggccagtcctcttaagtcgctggccaatgcggtcaacctgaagtcgtctgctccggcgacgaaccacactcagaatggccaccaaaatgggtatgccagcatccatttgacgcctcctgctgcccagcggacctcggccaatggattgcagaaggtggccaactcgtcgagtaactcttctggaaagacctcatcctcgatcagtaggccacactataatcaccagaacaacagccaaaacaacaatcacgatgcctttggcctgggtggcggattgagcagcctgaacagcgccggttccacatccactggagctctttccgacaccagcagttgtgctagcaatggcttcggtgcggactgcgactttgtggctgactttggctcggccaacattttcgacgccacatcggcgcgttccacaggatcgccggcggtgtcgtccgtgtcctcagtgggttccagcaatggctacgccaaggtgcagcccatccgggcagctcatctccagcagcaacagcagttgcagcagcagctgcatcagcagcagctcctcaatggcaatggtcatcagggcactgagaactttgccgacttcgatcacgctcccatctacaatgcagtggctccaccgacttttaacgattggatcagcgactggagcaggcggggcttccacgatcccttcgacgattgcgatgactcgccaccaggtgcccgccctccagcacctgcgccagctcctgctcaagttcccgcagtatcatcaccattgccaaccgtccgagaagaaccagagcttgcgtggaatttttgggaggacgagatgcgaatagaggcgcaggaaaaggagtcccaaactaaacagccggagttgggctactccttttcgattagtactactacgcccctttccccttcgaatcccttcctgccctaccttgtcagtgaggagcagcatcgaaatcatccagagaagccctccttttcgtattcgttgttcagctccatatcaaatagttcgcaagaagatcaggcggatgatcatgagatgaatgttttaaatgccaatttccatgatttctttacgtggagtgctcccttgcagaacggccatacgaccagtccgcccaagggcggaaatgcagcgatggcgcccagtgaggatcgatatgccgctcttaaggatctcgacgagcagctgcgagaactgaaggccagcgaaagcgccacagagacgcccacgcccaccagtggcaatgttcaggccacagatgcctttggtggagccctcaacaacaatccaaatcccttcaagggccagcaacagcagcagctcagcagccatgtggtgaatccattccagcagcagcaacagcagcagcaccagcagaatctctatggccagttgacgctcataccaaatgcctacggcagcagttcccagcagcagatggggcaccatctcctccagcagcagcagcagcaacagcagagcttcttcaacttcaacaacaacgggttcgccatctcgcagggtctgcccaacggctgcggcttcggcagcatgcaacccgctcctgtgatggccaacaatccctttgcagccagcggcgccatgaacaccaacaatccattcttatgagactcaacccgggagaatccgcctcgcgccacctggcagaggcgctgagccagcgaacaaagagcagacgcggaggaaccgaaccgaaattagtccattttactaacaatagcgttaatctatgtatacataatgcacgccggagagcactctttgtgtacatagcccaaatatgtacacccgaaaggctccacgctgacgctagtcctcgcggatggcggaggcggactggggcgttgatatattcttttacatggtaactctactctaacgtttacggatacggatatttgtatttgccgtttgccctagaactctatacttgtactaagcgcccatgaacacttcatccactaacatagctactaatcctcatcctagtggaggatgcagttggtccagacactctgttatttgttttatccatcctcgtacttgtctttgtcccatttagcactttcgttgcggataagaactttgtcagttattgattgtgtggccttaataagattataaaactaaatattataacgtacgactatacatatacggatacagatacagattcagacacagttagtacagatacagatatacatatacgcttttgtacctaatgaattgcttcttgtttccattgctaatcatctgcttttcgtgtgctaattttatacactagtacgtgcgatatcggccgtgcagatagattgctcagctcgcgagtcaagcctcttttggttgcacccacggcagacatttgtacatatactgtctgattgtaagcctcgtgtaatacctccattaacaccactcccccaccacccatccatcgaaccccgaatccatgactcaattcactgctcacatgtccatgcccatgccttaacgtgtcaaacattatcgaagccttaaagttatttaaaactacgaaatttcaataaaaacaaataagaacgctatc'; - #(options, args) = parser.parse_args(['-x', 'rrr', '-t', 0.5, '-s', sequences, '-o', 'collection_content']); - - (options, args) = parser.parse_args(); - if options.version: - print __version__; - else: - # create output dir (collection) - output_dir_path = options.outputdir; - if not os.path.exists(output_dir_path): - os.makedirs(output_dir_path); - - return query( options, args ); - -if __name__ == "__main__": __main__() diff -r f02c2c58a6f9 -r be864d79c9c7 query.xml --- a/query.xml Fri Jan 18 10:12:40 2019 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,94 +0,0 @@ - - - a Sequence Bloom Tree - - macros.xml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -