# HG changeset patch
# User fabio
# Date 1551706203 18000
# Node ID be864d79c9c731ce3277eaa7935cf89762bbfe32
# Parent f02c2c58a6f9522fefc26791d522cbe2ce1b2557
Uploaded 20190304
diff -r f02c2c58a6f9 -r be864d79c9c7 .shed.yml
--- a/.shed.yml Fri Jan 18 10:12:40 2019 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-name: btman
-owner: iuc
-categories:
- - Data Source
- - Web Services
-description: BloomTree Manager
-long_description: |
- A fast querying tool to identify all publicly available sequenced
- samples which express a transcript of interest
-remote_repository_url: https://github.com/fabio-cumbo/bloomtree-manager
-homepage_url: https://github.com/fabio-cumbo/bloomtree-manager
-type: unrestricted
-auto_tool_repositories:
- name_template: "{{ tool_id }}"
- descriptor_template: "Wrapper for BloomTree Manager: {{ tool_name }}."
-suite:
- name: "btman_suite"
- description: "A suite of Galaxy tools designed to work with the BloomTree Manager."
- long_description: |
- A fast querying tool to identify all publicly available sequenced
- samples which express a transcript of interest
diff -r f02c2c58a6f9 -r be864d79c9c7 btman-1.0.0/.shed.yml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/btman-1.0.0/.shed.yml Mon Mar 04 08:30:03 2019 -0500
@@ -0,0 +1,21 @@
+name: btman
+owner: iuc
+categories:
+ - Data Source
+ - Web Services
+description: BloomTree Manager
+long_description: |
+ A suite of tools to fast create and query Sequence Bloom Trees
+ supporting determined/how split filters
+remote_repository_url: https://github.com/fabio-cumbo/bloomtree-manager
+homepage_url: https://github.com/fabio-cumbo/bloomtree-manager
+type: unrestricted
+auto_tool_repositories:
+ name_template: "{{ tool_id }}"
+ descriptor_template: "Wrapper for BloomTree Manager: {{ tool_name }}."
+suite:
+ name: "btman_suite"
+ description: "A suite of Galaxy tools designed to work with the BloomTree Manager."
+ long_description: |
+ A suite of tools to fast create and query Sequence Bloom Trees
+ supporting determined/how split filters
diff -r f02c2c58a6f9 -r be864d79c9c7 btman-1.0.0/build.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/btman-1.0.0/build.sh Mon Mar 04 08:30:03 2019 -0500
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+outExpDir=$1
+
+cd ${outExpDir}
+
+howdesbt build --HowDe --tree=union.txt --outtree=howde.txt
diff -r f02c2c58a6f9 -r be864d79c9c7 btman-1.0.0/cluster.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/btman-1.0.0/cluster.sh Mon Mar 04 08:30:03 2019 -0500
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+outExpDir=$1
+bfsize=$2
+
+cd ${outExpDir}
+
+ls *.bf > leafnames.txt
+howdesbt cluster --list=leafnames.txt --bits=${bfsize} --tree=union.txt --nodename=node{number} --keepallnodes
+#rm leafnames_txt_?
diff -r f02c2c58a6f9 -r be864d79c9c7 btman-1.0.0/create.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/btman-1.0.0/create.py Mon Mar 04 08:30:03 2019 -0500
@@ -0,0 +1,233 @@
+#!/usr/bin/env python
+
+import sys, os, optparse, shutil, glob
+
+__version__ = "1.0.0"
+# in the case of collections, exitcodes equal to 0 and 1 are not considered errors
+ERR_EXIT_CODE = 2
+OK_EXIT_CODE = 0
+VALID_CHARS = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
+
+def printLog( logfilepath, message, exitcode=OK_EXIT_CODE, exit=False ):
+ print message
+ with open( logfilepath, 'a+' ) as out:
+ out.write( message + '\n' )
+ if exit:
+ sys.exit( exitcode )
+
+def downloadAccessions(formats, filepaths, outlogfile, outdirpath):
+ downloaded_files = { }
+ for dataset_idx in range(0, len(formats)):
+ if formats[ dataset_idx ] == 'accessions':
+ print filepaths[ dataset_idx ]
+ with open( filepaths[ dataset_idx ] ) as accessions:
+ for line in accessions:
+ print line
+ accession = line.split( '\t' )[0].strip()
+ if accession:
+ printLog( outlogfile, 'Downloading \"' + accession.upper() + '\" with the fastq-dump tool (part of the sra-tools utility)' )
+ fastq_dump_exitcode = os.system( 'fastq-dump --outdir ' + outdirpath + ' --fasta ' + accession.upper() )
+ if fastq_dump_exitcode > 0:
+ printLog( outlogfile, '> FASTA file: FAILED ( \"' + accession.upper() + '\" will be excluded )' )
+ else:
+ #os.rename( os.path.join( outdirpath, accession.upper() + '.fasta' ), os.path.join( outdirpath, accession.upper() + '_fasta' ) )
+ printLog( outlogfile, '> FASTA file: \"' + accession.upper() + '.fasta\"' )
+ accession_data = {
+ 'format': '.fasta',
+ 'filepath': os.path.join( outdirpath, accession.upper() + '.fasta' ),
+ 'filename': ''.join( c for c in accession.upper() if c in VALID_CHARS )
+ }
+ downloaded_files[ accession.upper() ] = accession_data
+ return downloaded_files
+
+# format = { fasta, fastq, accession }
+# this version skip the quality control procedure
+def createSBT( options, args ):
+ outlogfile = str( options.outfile )
+ outdirpath = str( options.outdir )
+ if not outdirpath.endswith('/'): outdirpath += '/'
+ if not os.path.exists( outdirpath ):
+ os.mkdir( outdirpath )
+ outdirpath = os.path.abspath( outdirpath )
+ os.chdir( outdirpath )
+ tooldirpath = os.path.abspath( str( options.tooldir ) )
+ if not tooldirpath.endswith('/'): tooldirpath += '/'
+
+ formats = [ fo for fo in str( options.formats ).split( '|' ) if fo.strip() ]
+ filepaths = [ filepath for filepath in str( options.filepaths ).split( '|' ) if filepath.strip() ]
+ filenames = [ filename for filename in str( options.filenames ).split( '|' ) if filename.strip() ]
+ compressed = [ True == int(c) for c in str( options.compressed ).split( '|' ) if c.strip() ]
+ minabundances = [ int(minab) for minab in str( options.minabundances ).split( '|' ) if minab.strip() ]
+ qualitythresholds = [ float(qthres) for qthres in str( options.qualitythresholds ).split( '|' ) if qthres.strip() ]
+
+ klen = int( options.klen )
+ bfsize = int( options.bfsize )
+
+ if len(formats) == len(filepaths) == len(filenames) == len(compressed) == len(minabundances) == len(qualitythresholds):
+ printLog( outlogfile, 'Retrieving experiments' )
+ accessions = downloadAccessions( formats, filepaths, outlogfile, outdirpath )
+ printLog( outlogfile, '> ' + str( len( accessions ) ) + ' experiments retrieved from the Sequence Read Archive' )
+ acc_arr = [ a for a in accessions ]
+ print str( acc_arr )
+ if bfsize < 0: # estimate bloom filter size
+ data_paths = ' '.join( accessions[ accession ][ 'filepath' ] for accession in accessions if 'filepath' in accessions[ accession ] )
+ print data_paths
+ if len( data_paths ) > 0:
+ data_paths += ' '
+ for dataset_idx in range(0, len(formats)):
+ if formats[ dataset_idx ] != 'accessions':
+ data_paths += ' '.join( path for path in filepaths[ dataset_idx ].split( ',' ) )
+ # ntcard
+ printLog( outlogfile, 'Estimating the Bloom Filter size with ntcard' )
+ if len( data_paths ) > 0:
+ ntcard_res_filepath = os.path.join( outdirpath, 'freq_k' + str( klen ) + '.hist' )
+ ntcard_exitcode = os.system( 'ntcard --kmer=' + str( klen ) + ' ' + data_paths )
+ print 'ntcard --kmer=' + str( klen ) + ' ' + data_paths
+ if ntcard_exitcode > 0:
+ printLog( outlogfile, '> [exitcode: ' + str(ntcard_exitcode) + '] an error with ntcard has occurred', exitcode=ERR_EXIT_CODE, exit=True )
+ else:
+ if os.path.exists( ntcard_res_filepath ):
+ os.rename( ntcard_res_filepath, os.path.join( outdirpath, 'ntcard' + str( klen ) + '.txt' ) )
+ ntcard_res_filepath = os.path.join( outdirpath, 'ntcard' + str( klen ) + '.txt' )
+ var_F0 = None
+ var_f1 = None
+ with open( ntcard_res_filepath ) as ntcard_res:
+ for line in ntcard_res:
+ line = line.strip()
+ if line:
+ line_split = line.split( '\t' )
+ if len(line_split) == 2:
+ if line_split[0] == 'F0':
+ var_F0 = int( line_split[1] )
+ elif line_split[0] == 'f1':
+ var_f1 = int( line_split[1] )
+ if var_F0 is not None and var_f1 is not None:
+ break
+ if var_F0 is not None and var_f1 is not None:
+ bfsize = var_F0 - var_f1
+ printLog( outlogfile, '> estimated Bloom Filter size: ' + str(bfsize) )
+ else:
+ printLog( outlogfile, '> an error has occurred while estimating the Bloom Filter size', exitcode=ERR_EXIT_CODE, exit=True )
+ else:
+ printLog( outlogfile, '> an error with ntcard has occurred', exitcode=ERR_EXIT_CODE, exit=True )
+ else:
+ printLog( outlogfile, '> unable to estimate the Bloom Filter size', exitcode=ERR_EXIT_CODE, exit=True )
+
+ if bfsize > 0:
+ for dataset_idx in range(0, len(formats)):
+ if formats[ dataset_idx ] == 'accessions':
+ with open( filepaths[ dataset_idx ] ) as accessions_file:
+ for line in accessions_file:
+ accession = line.split( '\t' )[0].strip().upper()
+ if accession in accessions:
+ curr_format = accessions[ accession ][ 'format' ]
+ curr_compressed = 'uncompress'
+ curr_filepath = accessions[ accession ][ 'filepath' ]
+ curr_filename = accessions[ accession ][ 'filename' ]
+ printLog( outlogfile, 'Processing \"' + accession + '\" ( format=\"' + curr_format +
+ '\", compressed=\"' + str(False) + '\", fixed_name=\"' + curr_filename + '\" )' )
+ print 'sh ' + tooldirpath + 'makebf.sh ' + curr_filepath + ' ' + curr_filename + ' ' + curr_format + ' ' + str(curr_compressed) + ' ' + outdirpath + ' ' + str( klen ) + ' ' + str( minabundances[ dataset_idx ] ) + ' ' + str( bfsize ) + ' 1 1'
+ makebf_exitcode = os.system( 'sh ' + tooldirpath + 'makebf.sh ' + curr_filepath + ' ' + curr_filename + ' ' +
+ curr_format + ' ' + str(curr_compressed) + ' ' + outdirpath + ' ' + str( klen ) + ' ' +
+ str( minabundances[ dataset_idx ] ) + ' ' + str( bfsize ) + ' 1 1' )
+ if makebf_exitcode > 0:
+ printLog( outlogfile, '> [exitcode: ' + str(makebf_exitcode) + '] Bloom Filter file: FAILED ( \"' + accession + '\" will be excluded )' )
+ else:
+ printLog( outlogfile, '> Bloom Filter file: \"' + curr_filename + '.bf\"' )
+ else:
+ curr_format = '.' + formats[ dataset_idx ].lower()
+ curr_compressed = '.gz' if compressed[ dataset_idx ] else 'uncompress'
+ curr_filepaths = filepaths[ dataset_idx ].split( ',' )
+ curr_filenames = filenames[ dataset_idx ].split( ',' )
+ for curr_idx in range(0, len(curr_formats)):
+ curr_filename_fixed = ''.join( c for c in curr_filenames[ curr_idx ] if c in VALID_CHARS )
+ printLog( outlogfile, 'Processing \"' + curr_filenames[ curr_idx ] + '\" ( format=\"' + curr_format +
+ '\", compressed=\"' + str(compressed[ dataset_idx ]) + '\", fixed_name=\"' + curr_filename_fixed + '\" )' )
+ if compressed[ dataset_idx ]:
+ makebf_exitcode = os.system( 'sh ' + tooldirpath + 'makebf.sh ' + curr_filepaths[ dataset_idx ] + ' ' + curr_filename_fixed + ' ' +
+ curr_format + ' ' + str(curr_compressed) + ' ' + outdirpath + ' ' + str( klen ) + ' ' +
+ str( minabundances[ dataset_idx ] ) + ' ' + str( bfsize ) + ' 0 1' )
+ else:
+ makebf_exitcode = os.system( 'sh ' + tooldirpath + 'makebf.sh ' + curr_filepaths[ dataset_idx ] + ' ' + curr_filename_fixed + ' ' +
+ curr_format + ' ' + str(curr_compressed) + ' ' + outdirpath + ' ' + str( klen ) + ' ' +
+ str( minabundances[ dataset_idx ] ) + ' ' + str( bfsize ) + ' 0 0' )
+ if makebf_exitcode > 0:
+ printLog( outlogfile, '> [exitcode: ' + str(makebf_exitcode) + '] Bloom Filter file: FAILED ( \"' + curr_filenames[ curr_idx ] + '\" will be excluded )' )
+ else:
+ printLog( outlogfile, '> Bloom Filter file: \"' + curr_filename_fixed + '.bf\"' )
+ # Create a tree topology
+ printLog( outlogfile, 'Creating a tree topology file' )
+ bf_counter = len( glob.glob1( outdirpath, '*.bf' ) )
+ if bf_counter > 0:
+ cluster_exitcode = os.system( 'sh ' + tooldirpath + 'cluster.sh ' + outdirpath + ' ' + str( bfsize ) )
+ if cluster_exitcode > 0:
+ printLog( outlogfile, '> [exitcode: ' + str(cluster_exitcode) + '] an error has occurred during the creation of the topology file', exitcode=ERR_EXIT_CODE, exit=True )
+ else:
+ # Build the HowDeSBT nodes
+ if os.path.exists( os.path.join( outdirpath, 'leafnames.txt' ) ):
+ printLog( outlogfile, 'Building the Bloom Filter files for the tree' )
+ build_exitcode = os.system( 'sh ' + tooldirpath + 'build.sh ' + outdirpath )
+ if build_exitcode > 0:
+ printLog( outlogfile, '> [exitcode: ' + str(build_exitcode) + '] an error has occurred during the creation of the Bloom Filter files for the tree', exitcode=ERR_EXIT_CODE, exit=True )
+ else:
+ printLog( outlogfile, '> the tree has been successfully built: \"howde.txt\"', exitcode=OK_EXIT_CODE, exit=True )
+ '''
+ howde_filepath = os.path.join( outdirpath, 'howde.txt' )
+ howde_galaxy_filepath = os.path.join( outdirpath, 'howde_galaxy.txt' )
+ howde_galaxy = open( howde_galaxy_filepath, 'w' )
+ with open( howde_filepath ) as howde_file:
+ for line in howde_file:
+ line = line.strip()
+ if line:
+ # trim stars * and get node name
+ # find galaxy file path to the node name
+ # rewrite path with stars
+ howde_galaxy.close()
+ '''
+ else:
+ printLog( outlogfile, '> an error has occurred during the creation of the topology file', exitcode=ERR_EXIT_CODE, exit=True )
+ else:
+ printLog( outlogfile, '> no Bloom Filter files found', exitcode=ERR_EXIT_CODE, exit=True )
+ else:
+ printLog( outlogfile, '> ERROR: the Bloom Filter size is ' + str( bfsize ), exitcode=ERR_EXIT_CODE, exit=True )
+ else:
+ printLog( outlogfile, 'Something went wrong with the input parameters', exitcode=ERR_EXIT_CODE, exit=True )
+
+def __main__():
+ # Parse the command line options
+ usage = ("Usage: create.py --formats file_formats --filepaths file_paths --filenames file_names "
+ "--compressed file_compressed --minabundance min_abundance --qualitythresholds quality_thresholds "
+ "--klen kmer_len --bfsize bloom_filter_size --outfile out_log_file_path --outdir out_dir_path")
+ parser = optparse.OptionParser(usage = usage)
+ parser.add_option("-v", "--version", action="store_true", dest="version",
+ default=False, help="display version and exit")
+ parser.add_option("-f", "--formats", type="string",
+ action="store", dest="formats", help="list of file formats separated by a tab character")
+ parser.add_option("-p", "--filepaths", type="string",
+ action="store", dest="filepaths", help="list of input file paths separated by a tab character")
+ parser.add_option("-n", "--filenames", type="string",
+ action="store", dest="filenames", help="list of input file names separated by a tab character")
+ parser.add_option("-c", "--compressed", type="string",
+ action="store", dest="compressed", help="list of compressed flags related to the imput files separated by a tab character")
+ parser.add_option("-m", "--minabundances", type="string",
+ action="store", dest="minabundances", help="list of blooom filter minimum abundances related to the imput files separated by a tab character")
+ parser.add_option("-q", "--qualitythresholds", type="string",
+ action="store", dest="qualitythresholds", help="list of quality thresholds related to the imput files separated by a tab character")
+ parser.add_option("-k", "--klen", type="int", default=21,
+ action="store", dest="klen", help="k-mer length")
+ parser.add_option("-b", "--bfsize", type="int", default=-1,
+ action="store", dest="bfsize", help="bloom filter size")
+ parser.add_option("-o", "--outfile", type="string", default="sbtres.txt",
+ action="store", dest="outfile", help="output log file path")
+ parser.add_option("-d", "--outdir", type="string", default="sbtres.txt",
+ action="store", dest="outdir", help="output directory path")
+ parser.add_option("-t", "--tooldir", type="string", default="./",
+ action="store", dest="tooldir", help="tool directory path")
+
+ (options, args) = parser.parse_args()
+ if options.version:
+ print __version__
+ else:
+ createSBT( options, args )
+
+if __name__ == "__main__": __main__()
diff -r f02c2c58a6f9 -r be864d79c9c7 btman-1.0.0/create.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/btman-1.0.0/create.xml Mon Mar 04 08:30:03 2019 -0500
@@ -0,0 +1,173 @@
+
+
+ a Sequence Bloom Tree
+
+ macros.xml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r f02c2c58a6f9 -r be864d79c9c7 btman-1.0.0/dataset.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/btman-1.0.0/dataset.tsv Mon Mar 04 08:30:03 2019 -0500
@@ -0,0 +1,11 @@
+SRR833714
+SRR833713
+SRR833715
+SRR567161
+SRR567146
+SRR191393
+SRR191449
+SRR191448
+SRR191447
+SRR191446
+SRR191445
diff -r f02c2c58a6f9 -r be864d79c9c7 btman-1.0.0/macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/btman-1.0.0/macros.xml Mon Mar 04 08:30:03 2019 -0500
@@ -0,0 +1,16 @@
+
+
+
+ python
+ sra-tools
+ ntcard
+ howdesbt
+
+
+
+
+
+ 10.1101/090464
+
+
+
\ No newline at end of file
diff -r f02c2c58a6f9 -r be864d79c9c7 btman-1.0.0/makebf.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/btman-1.0.0/makebf.sh Mon Mar 04 08:30:03 2019 -0500
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+expPath=$1
+expName=$2
+expFormat=$3
+expCompress=$4
+
+outExpDir=$5
+
+klen=$6
+minab=$7
+bfsize=$8
+
+rmCompressed=$9
+rmSource=${10}
+
+cd ${outExpDir}
+
+if [ "${expCompress}" == ".gz" ]; then
+ gzip -dc ${expPath} > ${expName}${expFormat}
+ howdesbt makebf K=${klen} --min=${minab} --bits=${bfsize} ${expName}${expFormat} --out=${expName}.bf
+ if [ "${rmCompressed}" -eq "1" ]; then
+ rm ${expPath}
+ fi
+ if [ "${rmSource}" -eq "1" ]; then
+ rm ${expName}${expFormat}
+ fi
+else
+ howdesbt makebf K=${klen} --min=${minab} --bits=${bfsize} ${expPath} --out=${expName}.bf
+ if [ "${rmSource}" -eq "1" ]; then
+ rm ${expPath}
+ fi
+fi
diff -r f02c2c58a6f9 -r be864d79c9c7 btman-1.0.0/query.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/btman-1.0.0/query.py Mon Mar 04 08:30:03 2019 -0500
@@ -0,0 +1,148 @@
+#!/usr/bin/env python
+
+import sys, os, optparse, shutil
+
+__version__ = "1.0.0"
+VALID_CHARS = '.-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
+# in the case of collections, exitcodes equal to 0 and 1 are not considered errors
+ERR_EXIT_CODE = 2
+OK_EXIT_CODE = 0
+
+def printLog( logfilepath, message, exitcode=OK_EXIT_CODE, exit=False ):
+ print message
+ with open( logfilepath, 'a+' ) as out:
+ out.write( message + '\n' )
+ if exit:
+ sys.exit( exitcode )
+
+def querySBT( options, args ):
+ output_dir_path = options.outputdir
+ outlogfile = options.outfile
+
+ tree_file_paths = options.treep.split( ',' )
+ tree_file_names = options.treen.split( ',' )
+ tree_def_filepath = None
+ leafnames_filepath = None
+ for idx, tree_file_name in enumerate( tree_file_names ):
+ if tree_file_name == 'howde':
+ tree_def_filepath = tree_file_paths[ idx ]
+ elif tree_file_name == 'leafnames':
+ leafnames_filepath = tree_file_paths[ idx ]
+ if tree_def_filepath is not None and leafnames_filepath is not None:
+ break
+
+ if tree_def_filepath is not None and leafnames_filepath is not None:
+ leafnames_counter = 0
+ with open( leafnames_filepath ) as leafnames_file:
+ for line in leafnames_file:
+ if line.strip():
+ leafnames_counter += 1
+ if leafnames_counter > 0:
+ printLog( outlogfile, 'The selected collection contains a valid tree' )
+ shutil.copyfile( tree_def_filepath, 'howde.txt' )
+ tree_def_filepath = 'howde.txt'
+ for idx, tree_file_name in enumerate( tree_file_names ):
+ if tree_file_name.endswith( 'detbrief.rrr' ):
+ shutil.copyfile( tree_file_paths[ idx ], tree_file_name + '.bf' )
+
+ printLog( outlogfile, 'Creating batch of queries' )
+ # create tmp batch file
+ batch_file_name = 'queries.fa'
+ batch_file = open( batch_file_name, 'w' )
+
+ comma_sep_file_paths = options.files
+ # check if options.files contains at least one file path
+ if comma_sep_file_paths is not None:
+ # split file paths
+ file_paths = comma_sep_file_paths.split(",")
+ # split file names
+ file_names = options.names.split(",")
+ for idx, file_path in enumerate(file_paths):
+ fixed_file_name = ''.join( c for c in file_names[ idx ] if c in VALID_CHARS )
+ printLog( outlogfile, '> processing file ' + file_names[ idx ] + ' ( fixed_name=\"' + fixed_file_name + '\" ) ' )
+ with open(file_path, 'r') as content_file:
+ for line in content_file:
+ line = line.strip()
+ if line:
+ line_split = line.strip().split("\t") # split on tab
+ if len(line_split) == 2: # 0:id , 1:seq , otherwise skip line
+ original_seq_id = line_split[0]
+ # fix seq_id using valid chars only
+ seq_id = ''.join( c for c in original_seq_id if c in VALID_CHARS )
+ printLog( outlogfile, '> sequence ' + original_seq_id + ' ( fixed_name=\"' + seq_id + '\" )' )
+ seq_text = line_split[1]
+
+ # write on batch file
+ batch_file.write( '> ' + fixed_file_name + '_' + seq_id + '\n' + seq_text + '\n' )
+ batch_file.close()
+ # query the tree
+ printLog( outlogfile, 'Querying the tree' )
+ query_res_file_path = os.path.abspath( 'answer.txt' )
+ sort_param = '--sort'
+ if options.sort == 0:
+ sort_param = ''
+ query_exitcode = os.system( 'howdesbt query --tree=' + os.path.abspath( tree_def_filepath ) + ' ' + os.path.abspath( batch_file_name ) + '=' + str(options.threshold) + ' --out=' + query_res_file_path ) + ' ' + sort_param
+ if query_exitcode > 0:
+ printLog( outlogfile, '> ERROR: an error has occurred while querying the tree with the sequence [id: ' + seq_id + '] in input file ' + file_names[ idx ] )
+ else:
+ if os.path.exists( query_res_file_path ):
+ with open( query_res_file_path ) as query_res_file:
+ file_path = ''
+ theta_matches = 0
+ for line in query_res_file:
+ line = line.strip()
+ if line:
+ if line.startswith( '*' ):
+ line_split = line.split( ' ' )
+ theta_matches = int( line_split[ 1 ] )
+ file_name = line_split[ 0 ].replace( '*', '' )
+ file_path = os.path.join( output_dir_path, file_name + '_txt' )
+ open( file_path, 'a' ).close()
+ else:
+ res_file = open( file_path, 'a+' )
+ fraction = str( theta_matches ) + '/' + str( leafnames_counter )
+ score = format( round( float( theta_matches ) / float( leafnames_counter ) , 6 ), '6f' )
+ res_file.write( line + '\t' + fraction + '\t' + score + '\n' )
+ res_file.close()
+ else:
+ printLog( outlogfile, 'An error has occurred while querying the tree', exitcode=ERR_EXIT_CODE, exit=True )
+ else:
+ printLog( outlogfile, 'The selected collection does not contain a valid tree', exitcode=ERR_EXIT_CODE, exit=True )
+ else:
+ printLog( outlogfile, 'The selected collection does not contain a valid tree', exitcode=ERR_EXIT_CODE, exit=True )
+
+def __main__():
+ # Parse the command line options
+ usage = "Usage: query.py --files comma_sep_file_paths --names comma_seq_file_names --sequences sequences_text --search search_mode --exact exact_alg --sthreshold threshold --outputdir output_dir_path"
+ parser = optparse.OptionParser(usage = usage)
+ parser.add_option("-v", "--version", action="store_true", dest="version",
+ default=False, help="display version and exit")
+ parser.add_option("-f", "--files", type="string",
+ action="store", dest="files", help="comma separated files path")
+ parser.add_option("-n", "--names", type="string",
+ action="store", dest="names", help="comma separated names associated to the files specified in --files")
+ parser.add_option("-k", "--treep", type="string",
+ action="store", dest="treep", help="paths of files in collection")
+ parser.add_option("-m", "--treen", type="string",
+ action="store", dest="treen", help="names of files in collection")
+ parser.add_option("-t", "--threshold", type="float", default=0.7,
+ action="store", dest="threshold", help="search threshold")
+ parser.add_option("-s", "--sort", type="int", default=1,
+ action="store", dest="sort", help="sort results")
+ parser.add_option("-o", "--outputdir", type="string", default="output",
+ action="store", dest="outputdir", help="output directory (collection) path")
+ parser.add_option("-r", "--outfile", type="string", default="query.txt",
+ action="store", dest="outfile", help="output log file path")
+
+ (options, args) = parser.parse_args()
+ if options.version:
+ print __version__
+ else:
+ # create output dir (collection)
+ output_dir_path = options.outputdir
+ if not os.path.exists(output_dir_path):
+ os.makedirs(output_dir_path)
+
+ querySBT( options, args )
+
+if __name__ == "__main__": __main__()
diff -r f02c2c58a6f9 -r be864d79c9c7 btman-1.0.0/query.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/btman-1.0.0/query.tsv Mon Mar 04 08:30:03 2019 -0500
@@ -0,0 +1,3 @@
+0 CCAACCAAAGGGAAAACTTTTTTCCGACTTTGGCCTAAAGGGTTTAACGGCCAAGTCAGAAGGGAAAAAGTTGCGCCAAAAATGGCGTTAAAATGTGTAATCAGAGAAGCGACACGAAAAGGGGATCAGCTCTTGGCTGGCAATTGGTAGGTCAGAGGTGGATTGGGAAAAGGCAAGTCAGCAACTGTCGATGACGGCGACTGACTGTTAATGAAAATTGTTTTGGCTGTGTGGAAAAAAATACGCGGGAATCCGTGAATTTTCCGAGGAGCTGGTGGAGCGAAGAAAACGGGGTGCTGCTGTTGTAAATGATTGGTGAAAGTCACACGCCCGCAGCCTTGCCAAACTAATTAACGCCAAATGGAGCTAAGGCCTTTGAATGATGGCTGCAGGCTAGCTTATGAAAAGGGGTTGAAGAGAAGTGGAAAAATTGGTAGAAAGGGATTTGCTCAAGATGCC
+1 TTAATGACAGGGCCACATGATGTGAAAAAAAATCAGAAACCGAGTCAACGTGAGAAGATAGTACGTACTACCGCAAATGAATGGCCATTTCATTTGCATGTTGGGAGCAACAGAAATGAGAGAGCATCCGAAGCTAACCACAAAAATGGACTTTGCTTCATTATGCACAAACACGCCAATAAATGTAACGAGAAAGATAGTAGGAGCGAAAGACGAGACGAGACAAACAGGAAGAAGACGAGTGGACGAGTGTTTTTTGTAACGAAACTCTTAATCGCTCCTTTGCAGGCTTAAGCTGATAGTTGCTACGTTTATGCCATGAATTTCAAGATCTCTCAAATGCGTGAAAATCCAGTTTATGCGACAGACAAATTCATGTATTTGAAAAATCTTAGCTGATAGAAATCAAAGGTGATT
+2 CAATTAATGATAAATATTTTATAAGGTGCGGAAATAAAGTGAGGAATATCTTTTAAATTCAAGTTCAATTCTGAAAGC
\ No newline at end of file
diff -r f02c2c58a6f9 -r be864d79c9c7 btman-1.0.0/query.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/btman-1.0.0/query.xml Mon Mar 04 08:30:03 2019 -0500
@@ -0,0 +1,83 @@
+
+
+ a Sequence Bloom Tree
+
+ macros.xml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r f02c2c58a6f9 -r be864d79c9c7 create.py
--- a/create.py Fri Jan 18 10:12:40 2019 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,132 +0,0 @@
-#!/usr/bin/env python
-
-# https://github.com/ross/requests-futures
-# http://docs.python-requests.org/en/master/user/quickstart/#more-complicated-post-requests
-
-import sys, os, uuid, optparse, requests, json, time
-#from requests_futures.sessions import FuturesSession
-
-#### NN14 ####
-SERVICE_URL = "http://nn14.galaxyproject.org:8080/";
-#service_url = "http://127.0.0.1:8082/";
-CREATE_URL = SERVICE_URL+"tree/create";
-STATUS_URL = SERVICE_URL+"status/";
-##############
-# query delay in seconds
-QUERY_DELAY = 30;
-##############
-
-__version__ = "1.0.0";
-ERR_EXIT_CODE = 1;
-OK_EXIT_CODE = 0;
-
-def raiseException( exitcode, message, errorfilepath ):
- with open(errorfilepath, 'w') as out:
- out.write(message);
- sys.exit(exitcode);
-
-def create_request( options, args, data ):
- outfilepath = options.outfile;
- cluster_id_2_query_id = { };
-
- for cluster_id in data:
- payload = { };
- payload["accessions"] = data[cluster_id];
- # add additional parameters to the payload
- payload["qualitycontrol"] = int(options.qualitycontrol);
- payload["qualitythreshold"] = float(options.qualitythreshold);
- payload["klen"] = int(options.klen);
- payload["minabundance"] = int(options.minabundance);
- # set the content type to application/json
- headers = {'Content-type': 'application/json'};
- # create a session
- session = requests.Session();
- # make a synchronous post request to the create route
- req = session.post(CREATE_URL, headers=headers, json=payload);
- resp_code = req.status_code;
- #print(str(req.content)+"\n\n");
- if resp_code == requests.codes.ok:
- resp_content = str(req.content);
- # convert out to json
- json_content = json.loads(resp_content);
- # retrieve query id
- query_id = json_content['query_id'];
- cluster_id_2_query_id[cluster_id] = query_id;
- else:
- with open(outfilepath, 'a+') as outfile:
- outfile.write( "An error has occurred while submitting data to the /tree/create endpoint for the cluster " + cluster_id + "\n\n" );
-
- build_flags = [ ]
- while len(build_flags) < len(cluster_id_2_query_id):
- for idx, cluster_id in enumerate( cluster_id_2_query_id ):
- if cluster_id not in build_flags:
- query_id = cluster_id_2_query_id[ cluster_id ];
- # create a new session
- session = requests.Session();
- # make a synchronous get request to the status route
- status_query_url = STATUS_URL.replace("", query_id);
- status_req = session.get(status_query_url);
- status_resp_content = str(status_req.content);
- #print(status_resp_content+"\n\n");
- # convert out to json
- json_status_content = json.loads(status_resp_content);
- # take a look at the state
- # state attribute is always available
- if json_status_content['state'] == 'SUCCESS':
- build_flags.append( cluster_id );
- built_tree_id = json_status_content['results']['tree_id'];
- with open(outfilepath, 'a+') as outfile:
- outfile.write( "Query ID: " + str(query_id) + "\n" + "Query status: " + str(json_status_content['state']) + "\n" + "Cluster ID: " + cluster_id + "\n" + "Sequence Bloom Tree ID: " + built_tree_id + "\n\n" );
- elif json_status_content['state'] in ['FAILURE', 'REVOKED']:
- build_flags.append( cluster_id );
- with open(outfilepath, 'a+') as outfile:
- outfile.write( "Query ID: " + str(query_id) + "\n" + "Query status: " + str(json_status_content['state']) + "\n" + "Cluster ID: " + cluster_id + "\n\n" );
- if len(build_flags) < len(cluster_id_2_query_id):
- time.sleep(QUERY_DELAY); # in seconds
- return sys.exit(OK_EXIT_CODE);
-
-def create( options, args ):
- multiple_data = {};
- experiment_list_file_path = options.explist;
- with open(experiment_list_file_path) as explist:
- for line in explist:
- if line.strip() != "":
- line_split = line.strip().split("\t"); # split on tab
- if len(line_split) == 2: # 0:accession , 1:cluster_id , otherwise skip line
- accession = line_split[0];
- cluster_id = line_split[1];
- if cluster_id in multiple_data:
- multiple_data[cluster_id].append( accession );
- else:
- multiple_data[cluster_id] = [ accession ];
- if len(multiple_data) > 0:
- return create_request( options, args, multiple_data );
- else:
- return raiseException( ERR_EXIT_CODE, "An error has occurred. Please be sure that your input file is valid.", options.outfile );
-
-def __main__():
- # Parse the command line options
- usage = "Usage: create.py --explist experiment_list --qualitycontrol quality_control --qualitythreshold quality_threshold --klen kmer_len --minabundance min_abundance --outfile output_file_path";
- parser = optparse.OptionParser(usage = usage);
- parser.add_option("-v", "--version", action="store_true", dest="version",
- default=False, help="display version and exit")
- parser.add_option("-l", "--explist", type="string",
- action="store", dest="explist", help="tabular file with a list of SRA accessions and their cluster label");
- parser.add_option("-q", "--qualitycontrol", type="int", default=0
- action="store", dest="qualitycontrol", help="flag to enable or disable the experiment quality control");
- parser.add_option("-t", "--qualitythreshold", type="float", default=0.0
- action="store", dest="qualitythreshold", help="quality threshold, if quality control is enabled only");
- parser.add_option("-k", "--klen", type="int", default=21,
- action="store", dest="klen", help="k-mer length");
- parser.add_option("-m", "--minabundance", type="int", default=2,
- action="store", dest="minabundance", help="minimum abundance");
- parser.add_option("-o", "--outfile", type="string", default="outfile_txt",
- action="store", dest="outfile", help="output file path");
-
- (options, args) = parser.parse_args();
- if options.version:
- print __version__;
- else:
- return create( options, args );
-
-if __name__ == "__main__": __main__()
diff -r f02c2c58a6f9 -r be864d79c9c7 create.xml
--- a/create.xml Fri Jan 18 10:12:40 2019 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,104 +0,0 @@
-
-
- a Sequence Bloom Tree
-
- macros.xml
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff -r f02c2c58a6f9 -r be864d79c9c7 macros.xml
--- a/macros.xml Fri Jan 18 10:12:40 2019 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
-
-
-
- python
- requests
-
-
-
-
-
- 10.1101/090464
-
-
-
\ No newline at end of file
diff -r f02c2c58a6f9 -r be864d79c9c7 query.py
--- a/query.py Fri Jan 18 10:12:40 2019 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,221 +0,0 @@
-#!/usr/bin/env python
-
-# https://github.com/ross/requests-futures
-# http://docs.python-requests.org/en/master/user/quickstart/#more-complicated-post-requests
-
-import sys, os, uuid, optparse, requests, json, time
-#from requests_futures.sessions import FuturesSession
-
-#### NN14 ####
-SERVICE_URL = "http://nn14.galaxyproject.org:8080/";
-#service_url = "http://127.0.0.1:8082/";
-QUERY_URL = SERVICE_URL+"tree//query";
-STATUS_URL = SERVICE_URL+"status/";
-##############
-# query delay in seconds
-QUERY_DELAY = 30;
-##############
-
-__version__ = "1.0.0";
-VALID_CHARS = '.-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ '
-# in the case of collections, exitcodes equal to 0 and 1 are not considered errors
-ERR_EXIT_CODE = 2;
-OK_EXIT_CODE = 0;
-
-def raiseException( exitcode, message, output_dir_path, errorfilename ):
- errorfilepath = os.path.join(output_dir_path, errorfilename+"_txt");
- with open(errorfilepath, 'w') as out:
- out.write(message);
- sys.exit(exitcode);
-
-def query_request( options, args, payload ):
- output_dir_path = options.outputdir;
- # add additional parameters to the payload
- #payload["tree_id"] = str(options.treeid);
- payload["search_mode"] = str(options.search);
- payload["exact_algorithm"] = int(options.exact);
- payload["search_threshold"] = float(options.sthreshold);
- payload["sort"] = int(options.sortcontrol);
- # set the content type to application/json
- headers = {'Content-type': 'application/json'};
-
- # create a session
- session = requests.Session();
- # make a synchronous post request to the query route
- req = session.post(QUERY_URL.replace("", str(options.treeid)), headers=headers, json=payload);
- resp_code = req.status_code;
- #print(str(req.content)+"\n\n");
- if resp_code == requests.codes.ok:
- resp_content = str(req.content);
- # convert out to json
- json_content = json.loads(resp_content);
- # retrieve query id
- query_id = json_content['query_id'];
- query_processed = False;
- # results json content
- json_status_content = None;
- while query_processed is False:
- # create a new session
- session = requests.Session();
- # make a synchronous get request to the status route
- status_query_url = STATUS_URL.replace("", query_id);
- status_req = session.get(status_query_url);
- status_resp_content = str(status_req.content);
- #print(status_resp_content+"\n\n");
- # convert out to json
- json_status_content = json.loads(status_resp_content);
- # take a look at the state
- # state attribute is always available
- if json_status_content['state'] == 'SUCCESS':
- query_processed = True;
- break;
- elif json_status_content['state'] in ['FAILURE', 'REVOKED']:
- return raiseException( ERR_EXIT_CODE, "Query ID: "+str(query_id)+"\nQuery status: "+str(json_status_content['state']), output_dir_path, str(options.errorfile) );
- else:
- time.sleep(QUERY_DELAY); # in seconds
-
- out_file_format = "tabular";
- for block in json_status_content['results']:
- seq_id = block['sequence_id'];
- # put response block in the output collection
- output_file_path = os.path.join(output_dir_path, seq_id + "_" + out_file_format);
- accessions_list = "";
- hits_block = block['hits'];
- accessions_dict = { };
- is_sabutan = False;
- for hit in hits_block:
- if type(hit) is dict: # sabutan
- #accessions_list = accessions_list + str(hit['accession_number']) + "\t" + str(hit['score']) + "\n";
- accession_number = hit['accession_number'];
- #------------
- #score = hit['score'];
- #score_split = score.split("/");
- #accessions_dict[accession_number] = "{0:.6f}".format(float(score_split[0])/float(score_split[1]));
- #------------
- fraction = hit['fraction'];
- score = hit['score'];
- accession_scores = {
- "fraction": str(fraction),
- "score": float(score)
- }
- accessions_dict[accession_number] = accession_scores;
- is_sabutan = True;
- else: # all-some
- accessions_list = accessions_list + str(hit) + "\n";
- if is_sabutan:
- sorted_accessions = sorted(accessions_dict, key=lambda i: float(accessions_dict[i]["score"]), reverse=True);
- for acc in sorted_accessions:
- accessions_list = accessions_list + str(acc) + "\t" + str(accessions_dict[acc]["fraction"]) + "\t" + str(accessions_dict[acc]["score"]) + "\n";
- with open(output_file_path, 'w') as out:
- out.write(accessions_list.strip());
- return sys.exit(OK_EXIT_CODE);
- else:
- return raiseException( ERR_EXIT_CODE, "Unable to query the remote server. Please try again in a while.", output_dir_path, str(options.errorfile) );
-
-def query( options, args ):
- output_dir_path = options.outputdir;
- multiple_data = {};
- comma_sep_file_paths = options.files;
- #print("files: "+str(comma_sep_file_paths)+" - "+str(type(comma_sep_file_paths)));
- # check if options.files contains at least one file path
- if comma_sep_file_paths is not None:
- # split file paths
- file_paths = comma_sep_file_paths.split(",");
- # split file names
- comma_sep_file_names = str(options.names);
- #print("names: "+str(comma_sep_file_names));
- file_names = comma_sep_file_names.split(",");
- for idx, file_path in enumerate(file_paths):
- #file_name = file_names[idx];
- with open(file_path, 'r') as content_file:
- for line in content_file:
- if line.strip() != "":
- line_split = line.strip().split("\t"); # split on tab
- if len(line_split) == 2: # 0:id , 1:seq , otherwise skip line
- seq_id = line_split[0];
- # fix seq_id using valid chars only
- seq_id = ''.join(e for e in seq_id if e in VALID_CHARS)
- seq_text = line_split[1];
- if seq_id in multiple_data:
- return raiseException( ERR_EXIT_CODE, "Error: the id '"+seq_id+"' is duplicated", output_dir_path, str(options.errorfile) );
- multiple_data[seq_id] = seq_text;
- if len(multiple_data) > 0:
- return query_request( options, args, multiple_data );
- #return echo( options, args );
- else:
- return raiseException( ERR_EXIT_CODE, "An error has occurred. Please be sure that your input files are valid.", output_dir_path, str(options.errorfile) );
- else:
- # try with the sequence in --sequence
- text_content = options.sequences;
- #print("sequences: "+text_content);
- # check if options.sequences contains a list of sequences (one for each row)
- if text_content is not None:
- text_content = str(text_content);
- if text_content.strip():
- # populate a dictionary with the files containing the sequences to query
- text_content = text_content.strip().split("__cn__"); # split on new line
- for line in text_content:
- if line.strip() != "":
- line_split = line.strip().split("__tc__"); # split on tab
- if len(line_split) == 2: # 0:id , 1:seq , otherwise skip line
- seq_id = line_split[0];
- # fix seq_id using valid chars only
- seq_id = ''.join(e for e in seq_id if e in VALID_CHARS)
- seq_text = line_split[1];
- if seq_id in multiple_data:
- return raiseException( ERR_EXIT_CODE, "Error: the id '"+seq_id+"' is duplicated", output_dir_path, str(options.errorfile) );
- multiple_data[seq_id] = seq_text;
- if len(multiple_data) > 0:
- return query_request( options, args, multiple_data );
- #return echo( options, args );
- else:
- return raiseException( ERR_EXIT_CODE, "An error has occurred. Please be sure that your input files are valid.", output_dir_path, str(options.errorfile) );
- else:
- return raiseException( ERR_EXIT_CODE, "You have to insert at least one row formatted as a tab delimited (ID, SEQUENCE) couple", output_dir_path, str(options.errorfile) );
- return ERR_EXIT_CODE;
-
-def __main__():
- # Parse the command line options
- usage = "Usage: query.py --files comma_sep_file_paths --names comma_seq_file_names --sequences sequences_text --search search_mode --exact exact_alg --sthreshold threshold --outputdir output_dir_path";
- parser = optparse.OptionParser(usage = usage);
- parser.add_option("-v", "--version", action="store_true", dest="version",
- default=False, help="display version and exit")
- parser.add_option("-f", "--files", type="string",
- action="store", dest="files", help="comma separated files path");
- parser.add_option("-n", "--names", type="string",
- action="store", dest="names", help="comma separated names associated to the files specified in --files");
- parser.add_option("-s", "--sequences", type="string",
- action="store", dest="sequences", help="contains a list of sequences (one for each row)");
- parser.add_option("-a", "--fasta", type="string",
- action="store", dest="fasta", help="contains the content of a fasta file");
- parser.add_option("-x", "--search", type="string", default="rrr",
- action="store", dest="search", help="search mode");
- parser.add_option("-e", "--exact", type="int", default=0,
- action="store", dest="exact", help="exact algorithm (required if search is 1 only)");
- parser.add_option("-k", "--tree", type="string", default=0,
- action="store", dest="treeid", help="the id of the tree that will be queried");
- parser.add_option("-t", "--sthreshold", type="float",
- action="store", dest="sthreshold", help="threshold applied to the search algrithm");
- parser.add_option("-z", "--sort", type="int", default=1,
- action="store", dest="sortcontrol", help="boolean required to sort the result");
- parser.add_option("-o", "--outputdir", type="string", default="output",
- action="store", dest="outputdir", help="output directory (collection) path");
- parser.add_option("-r", "--errorfile", type="string", default="error_txt",
- action="store", dest="errorfile", help="error file name containing error messages");
-
- # TEST
- #sequences = 'NM_001169378.2__tc__atttcggatgctttggagggaggaactctagtgctgcattgattggggcgtgtgttaatgatattcccagttcgcatggcgagcatcgattcctggtacgtatgtgggccccttgactcccacttatcgcacttgtcgttcgcaatttgcatgaattccgcttcgtctgaaacgcacttgcgccagacttctccggctggtctgatctggtctgtgatccggtctggtggggcgccagttgcgtttcgagctcatcaccagtcactccgcagtcgcattctgccagaggtctccgatcaagagcgcttctccattcgagattcaaacgcagcgcggtctgacgccgccacatcgagtgaaatccatatcgatggccacattcacacaggacgagatcgacttcctgcgcagccatggcaacgagctgtgtgccaagacctggctgggattgtgggatccgaagcgggctgtgcaccagcaggagcagcgcgaactgatgatggacaagtatgagcggaagcgatactacctggagccggccagtcctcttaagtcgctggccaatgcggtcaacctgaagtcgtctgctccggcgacgaaccacactcagaatggccaccaaaatgggtatgccagcatccatttgacgcctcctgctgcccagcggacctcggccaatggattgcagaaggtggccaactcgtcgagtaactcttctggaaagacctcatcctcgatcagtaggccacactataatcaccagaacaacagccaaaacaacaatcacgatgcctttggcctgggtggcggattgagcagcctgaacagcgccggttccacatccactggagctctttccgacaccagcagttgtgctagcaatggcttcggtgcggactgcgactttgtggctgactttggctcggccaacattttcgacgccacatcggcgcgttccacaggatcgccggcggtgtcgtccgtgtcctcagtgggttccagcaatggctacgccaaggtgcagcccatccgggcagctcatctccagcagcaacagcagttgcagcagcagctgcatcagcagcagctcctcaatggcaatggtcatcagggcactgagaactttgccgacttcgatcacgctcccatctacaatgcagtggctccaccgacttttaacgattggatcagcgactggagcaggcggggcttccacgatcccttcgacgattgcgatgactcgccaccaggtgcccgccctccagcacctgcgccagctcctgctcaagttcccgcagtatcatcaccattgccaaccgtccgagaagaaccagagcttgcgtggaatttttgggaggacgagatgcgaatagaggcgcaggaaaaggagtcccaaactaaacagccggagttgggctactccttttcgattagtactactacgcccctttccccttcgaatcccttcctgccctaccttgtcagtgaggagcagcatcgaaatcatccagagaagccctccttttcgtattcgttgttcagctccatatcaaatagttcgcaagaagatcaggcggatgatcatgagatgaatgttttaaatgccaatttccatgatttctttacgtggagtgctcccttgcagaacggccatacgaccagtccgcccaagggcggaaatgcagcgatggcgcccagtgaggatcgatatgccgctcttaaggatctcgacgagcagctgcgagaactgaaggccagcgaaagcgccacagagacgcccacgcccaccagtggcaatgttcaggccacagatgcctttggtggagccctcaacaacaatccaaatcccttcaagggccagcaacagcagcagctcagcagccatgtggtgaatccattccagcagcagcaacagcagcagcaccagcagaatctctatggccagttgacgctcataccaaatgcctacggcagcagttcccagcagcagatggggcaccatctcctccagcagcagcagcagcaacagcagagcttcttcaacttcaacaacaacgggttcgccatctcgcagggtctgcccaacggctgcggcttcggcagcatgcaacccgctcctgtgatggccaacaatccctttgcagccagcggcgccatgaacaccaacaatccattcttatgagactcaacccgggagaatccgcctcgcgccacctggcagaggcgctgagccagcgaacaaagagcagacgcggaggaaccgaaccgaaattagtccattttactaacaatagcgttaatctatgtatacataatgcacgccggagagcactctttgtgtacatagcccaaatatgtacacccgaaaggctccacgctgacgctagtcctcgcggatggcggaggcggactggggcgttgatatattcttttacatggtaactctactctaacgtttacggatacggatatttgtatttgccgtttgccctagaactctatacttgtactaagcgcccatgaacacttcatccactaacatagctactaatcctcatcctagtggaggatgcagttggtccagacactctgttatttgttttatccatcctcgtacttgtctttgtcccatttagcactttcgttgcggataagaactttgtcagttattgattgtgtggccttaataagattataaaactaaatattataacgtacgactatacatatacggatacagatacagattcagacacagttagtacagatacagatatacatatacgcttttgtacctaatgaattgcttcttgtttccattgctaatcatctgcttttcgtgtgctaattttatacactagtacgtgcgatatcggccgtgcagatagattgctcagctcgcgagtcaagcctcttttggttgcacccacggcagacatttgtacatatactgtctgattgtaagcctcgtgtaatacctccattaacaccactcccccaccacccatccatcgaaccccgaatccatgactcaattcactgctcacatgtccatgcccatgccttaacgtgtcaaacattatcgaagccttaaagttatttaaaactacgaaatttcaataaaaacaaataagaacgctatc';
- #(options, args) = parser.parse_args(['-x', 'rrr', '-t', 0.5, '-s', sequences, '-o', 'collection_content']);
-
- (options, args) = parser.parse_args();
- if options.version:
- print __version__;
- else:
- # create output dir (collection)
- output_dir_path = options.outputdir;
- if not os.path.exists(output_dir_path):
- os.makedirs(output_dir_path);
-
- return query( options, args );
-
-if __name__ == "__main__": __main__()
diff -r f02c2c58a6f9 -r be864d79c9c7 query.xml
--- a/query.xml Fri Jan 18 10:12:40 2019 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,94 +0,0 @@
-
-
- a Sequence Bloom Tree
-
- macros.xml
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-