annotate create.py @ 19:7f712cc0d3d5 draft

Uploaded 20190304.2
author fabio
date Mon, 04 Mar 2019 08:31:28 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
19
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
1 #!/usr/bin/env python
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
2
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
3 import sys, os, optparse, shutil, glob
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
4
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
5 __version__ = "1.0.0"
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
6 # in the case of collections, exitcodes equal to 0 and 1 are not considered errors
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
7 ERR_EXIT_CODE = 2
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
8 OK_EXIT_CODE = 0
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
9 VALID_CHARS = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
10
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
11 def printLog( logfilepath, message, exitcode=OK_EXIT_CODE, exit=False ):
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
12 print message
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
13 with open( logfilepath, 'a+' ) as out:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
14 out.write( message + '\n' )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
15 if exit:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
16 sys.exit( exitcode )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
17
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
18 def downloadAccessions(formats, filepaths, outlogfile, outdirpath):
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
19 downloaded_files = { }
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
20 for dataset_idx in range(0, len(formats)):
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
21 if formats[ dataset_idx ] == 'accessions':
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
22 print filepaths[ dataset_idx ]
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
23 with open( filepaths[ dataset_idx ] ) as accessions:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
24 for line in accessions:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
25 print line
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
26 accession = line.split( '\t' )[0].strip()
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
27 if accession:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
28 printLog( outlogfile, 'Downloading \"' + accession.upper() + '\" with the fastq-dump tool (part of the sra-tools utility)' )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
29 fastq_dump_exitcode = os.system( 'fastq-dump --outdir ' + outdirpath + ' --fasta ' + accession.upper() )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
30 if fastq_dump_exitcode > 0:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
31 printLog( outlogfile, '> FASTA file: FAILED ( \"' + accession.upper() + '\" will be excluded )' )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
32 else:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
33 #os.rename( os.path.join( outdirpath, accession.upper() + '.fasta' ), os.path.join( outdirpath, accession.upper() + '_fasta' ) )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
34 printLog( outlogfile, '> FASTA file: \"' + accession.upper() + '.fasta\"' )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
35 accession_data = {
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
36 'format': '.fasta',
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
37 'filepath': os.path.join( outdirpath, accession.upper() + '.fasta' ),
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
38 'filename': ''.join( c for c in accession.upper() if c in VALID_CHARS )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
39 }
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
40 downloaded_files[ accession.upper() ] = accession_data
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
41 return downloaded_files
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
42
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
43 # format = { fasta, fastq, accession }
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
44 # this version skip the quality control procedure
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
45 def createSBT( options, args ):
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
46 outlogfile = str( options.outfile )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
47 outdirpath = str( options.outdir )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
48 if not outdirpath.endswith('/'): outdirpath += '/'
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
49 if not os.path.exists( outdirpath ):
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
50 os.mkdir( outdirpath )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
51 outdirpath = os.path.abspath( outdirpath )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
52 os.chdir( outdirpath )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
53 tooldirpath = os.path.abspath( str( options.tooldir ) )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
54 if not tooldirpath.endswith('/'): tooldirpath += '/'
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
55
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
56 formats = [ fo for fo in str( options.formats ).split( '|' ) if fo.strip() ]
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
57 filepaths = [ filepath for filepath in str( options.filepaths ).split( '|' ) if filepath.strip() ]
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
58 filenames = [ filename for filename in str( options.filenames ).split( '|' ) if filename.strip() ]
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
59 compressed = [ True == int(c) for c in str( options.compressed ).split( '|' ) if c.strip() ]
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
60 minabundances = [ int(minab) for minab in str( options.minabundances ).split( '|' ) if minab.strip() ]
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
61 qualitythresholds = [ float(qthres) for qthres in str( options.qualitythresholds ).split( '|' ) if qthres.strip() ]
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
62
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
63 klen = int( options.klen )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
64 bfsize = int( options.bfsize )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
65
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
66 if len(formats) == len(filepaths) == len(filenames) == len(compressed) == len(minabundances) == len(qualitythresholds):
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
67 printLog( outlogfile, 'Retrieving experiments' )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
68 accessions = downloadAccessions( formats, filepaths, outlogfile, outdirpath )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
69 printLog( outlogfile, '> ' + str( len( accessions ) ) + ' experiments retrieved from the Sequence Read Archive' )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
70 acc_arr = [ a for a in accessions ]
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
71 print str( acc_arr )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
72 if bfsize < 0: # estimate bloom filter size
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
73 data_paths = ' '.join( accessions[ accession ][ 'filepath' ] for accession in accessions if 'filepath' in accessions[ accession ] )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
74 print data_paths
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
75 if len( data_paths ) > 0:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
76 data_paths += ' '
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
77 for dataset_idx in range(0, len(formats)):
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
78 if formats[ dataset_idx ] != 'accessions':
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
79 data_paths += ' '.join( path for path in filepaths[ dataset_idx ].split( ',' ) )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
80 # ntcard
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
81 printLog( outlogfile, 'Estimating the Bloom Filter size with ntcard' )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
82 if len( data_paths ) > 0:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
83 ntcard_res_filepath = os.path.join( outdirpath, 'freq_k' + str( klen ) + '.hist' )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
84 ntcard_exitcode = os.system( 'ntcard --kmer=' + str( klen ) + ' ' + data_paths )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
85 print 'ntcard --kmer=' + str( klen ) + ' ' + data_paths
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
86 if ntcard_exitcode > 0:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
87 printLog( outlogfile, '> [exitcode: ' + str(ntcard_exitcode) + '] an error with ntcard has occurred', exitcode=ERR_EXIT_CODE, exit=True )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
88 else:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
89 if os.path.exists( ntcard_res_filepath ):
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
90 os.rename( ntcard_res_filepath, os.path.join( outdirpath, 'ntcard' + str( klen ) + '.txt' ) )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
91 ntcard_res_filepath = os.path.join( outdirpath, 'ntcard' + str( klen ) + '.txt' )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
92 var_F0 = None
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
93 var_f1 = None
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
94 with open( ntcard_res_filepath ) as ntcard_res:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
95 for line in ntcard_res:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
96 line = line.strip()
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
97 if line:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
98 line_split = line.split( '\t' )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
99 if len(line_split) == 2:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
100 if line_split[0] == 'F0':
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
101 var_F0 = int( line_split[1] )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
102 elif line_split[0] == 'f1':
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
103 var_f1 = int( line_split[1] )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
104 if var_F0 is not None and var_f1 is not None:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
105 break
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
106 if var_F0 is not None and var_f1 is not None:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
107 bfsize = var_F0 - var_f1
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
108 printLog( outlogfile, '> estimated Bloom Filter size: ' + str(bfsize) )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
109 else:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
110 printLog( outlogfile, '> an error has occurred while estimating the Bloom Filter size', exitcode=ERR_EXIT_CODE, exit=True )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
111 else:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
112 printLog( outlogfile, '> an error with ntcard has occurred', exitcode=ERR_EXIT_CODE, exit=True )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
113 else:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
114 printLog( outlogfile, '> unable to estimate the Bloom Filter size', exitcode=ERR_EXIT_CODE, exit=True )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
115
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
116 if bfsize > 0:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
117 for dataset_idx in range(0, len(formats)):
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
118 if formats[ dataset_idx ] == 'accessions':
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
119 with open( filepaths[ dataset_idx ] ) as accessions_file:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
120 for line in accessions_file:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
121 accession = line.split( '\t' )[0].strip().upper()
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
122 if accession in accessions:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
123 curr_format = accessions[ accession ][ 'format' ]
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
124 curr_compressed = 'uncompress'
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
125 curr_filepath = accessions[ accession ][ 'filepath' ]
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
126 curr_filename = accessions[ accession ][ 'filename' ]
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
127 printLog( outlogfile, 'Processing \"' + accession + '\" ( format=\"' + curr_format +
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
128 '\", compressed=\"' + str(False) + '\", fixed_name=\"' + curr_filename + '\" )' )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
129 print 'sh ' + tooldirpath + 'makebf.sh ' + curr_filepath + ' ' + curr_filename + ' ' + curr_format + ' ' + str(curr_compressed) + ' ' + outdirpath + ' ' + str( klen ) + ' ' + str( minabundances[ dataset_idx ] ) + ' ' + str( bfsize ) + ' 1 1'
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
130 makebf_exitcode = os.system( 'sh ' + tooldirpath + 'makebf.sh ' + curr_filepath + ' ' + curr_filename + ' ' +
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
131 curr_format + ' ' + str(curr_compressed) + ' ' + outdirpath + ' ' + str( klen ) + ' ' +
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
132 str( minabundances[ dataset_idx ] ) + ' ' + str( bfsize ) + ' 1 1' )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
133 if makebf_exitcode > 0:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
134 printLog( outlogfile, '> [exitcode: ' + str(makebf_exitcode) + '] Bloom Filter file: FAILED ( \"' + accession + '\" will be excluded )' )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
135 else:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
136 printLog( outlogfile, '> Bloom Filter file: \"' + curr_filename + '.bf\"' )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
137 else:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
138 curr_format = '.' + formats[ dataset_idx ].lower()
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
139 curr_compressed = '.gz' if compressed[ dataset_idx ] else 'uncompress'
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
140 curr_filepaths = filepaths[ dataset_idx ].split( ',' )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
141 curr_filenames = filenames[ dataset_idx ].split( ',' )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
142 for curr_idx in range(0, len(curr_formats)):
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
143 curr_filename_fixed = ''.join( c for c in curr_filenames[ curr_idx ] if c in VALID_CHARS )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
144 printLog( outlogfile, 'Processing \"' + curr_filenames[ curr_idx ] + '\" ( format=\"' + curr_format +
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
145 '\", compressed=\"' + str(compressed[ dataset_idx ]) + '\", fixed_name=\"' + curr_filename_fixed + '\" )' )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
146 if compressed[ dataset_idx ]:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
147 makebf_exitcode = os.system( 'sh ' + tooldirpath + 'makebf.sh ' + curr_filepaths[ dataset_idx ] + ' ' + curr_filename_fixed + ' ' +
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
148 curr_format + ' ' + str(curr_compressed) + ' ' + outdirpath + ' ' + str( klen ) + ' ' +
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
149 str( minabundances[ dataset_idx ] ) + ' ' + str( bfsize ) + ' 0 1' )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
150 else:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
151 makebf_exitcode = os.system( 'sh ' + tooldirpath + 'makebf.sh ' + curr_filepaths[ dataset_idx ] + ' ' + curr_filename_fixed + ' ' +
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
152 curr_format + ' ' + str(curr_compressed) + ' ' + outdirpath + ' ' + str( klen ) + ' ' +
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
153 str( minabundances[ dataset_idx ] ) + ' ' + str( bfsize ) + ' 0 0' )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
154 if makebf_exitcode > 0:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
155 printLog( outlogfile, '> [exitcode: ' + str(makebf_exitcode) + '] Bloom Filter file: FAILED ( \"' + curr_filenames[ curr_idx ] + '\" will be excluded )' )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
156 else:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
157 printLog( outlogfile, '> Bloom Filter file: \"' + curr_filename_fixed + '.bf\"' )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
158 # Create a tree topology
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
159 printLog( outlogfile, 'Creating a tree topology file' )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
160 bf_counter = len( glob.glob1( outdirpath, '*.bf' ) )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
161 if bf_counter > 0:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
162 cluster_exitcode = os.system( 'sh ' + tooldirpath + 'cluster.sh ' + outdirpath + ' ' + str( bfsize ) )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
163 if cluster_exitcode > 0:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
164 printLog( outlogfile, '> [exitcode: ' + str(cluster_exitcode) + '] an error has occurred during the creation of the topology file', exitcode=ERR_EXIT_CODE, exit=True )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
165 else:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
166 # Build the HowDeSBT nodes
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
167 if os.path.exists( os.path.join( outdirpath, 'leafnames.txt' ) ):
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
168 printLog( outlogfile, 'Building the Bloom Filter files for the tree' )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
169 build_exitcode = os.system( 'sh ' + tooldirpath + 'build.sh ' + outdirpath )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
170 if build_exitcode > 0:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
171 printLog( outlogfile, '> [exitcode: ' + str(build_exitcode) + '] an error has occurred during the creation of the Bloom Filter files for the tree', exitcode=ERR_EXIT_CODE, exit=True )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
172 else:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
173 printLog( outlogfile, '> the tree has been successfully built: \"howde.txt\"', exitcode=OK_EXIT_CODE, exit=True )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
174 '''
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
175 howde_filepath = os.path.join( outdirpath, 'howde.txt' )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
176 howde_galaxy_filepath = os.path.join( outdirpath, 'howde_galaxy.txt' )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
177 howde_galaxy = open( howde_galaxy_filepath, 'w' )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
178 with open( howde_filepath ) as howde_file:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
179 for line in howde_file:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
180 line = line.strip()
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
181 if line:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
182 # trim stars * and get node name
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
183 # find galaxy file path to the node name
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
184 # rewrite path with stars
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
185 howde_galaxy.close()
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
186 '''
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
187 else:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
188 printLog( outlogfile, '> an error has occurred during the creation of the topology file', exitcode=ERR_EXIT_CODE, exit=True )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
189 else:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
190 printLog( outlogfile, '> no Bloom Filter files found', exitcode=ERR_EXIT_CODE, exit=True )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
191 else:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
192 printLog( outlogfile, '> ERROR: the Bloom Filter size is ' + str( bfsize ), exitcode=ERR_EXIT_CODE, exit=True )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
193 else:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
194 printLog( outlogfile, 'Something went wrong with the input parameters', exitcode=ERR_EXIT_CODE, exit=True )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
195
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
196 def __main__():
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
197 # Parse the command line options
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
198 usage = ("Usage: create.py --formats file_formats --filepaths file_paths --filenames file_names "
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
199 "--compressed file_compressed --minabundance min_abundance --qualitythresholds quality_thresholds "
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
200 "--klen kmer_len --bfsize bloom_filter_size --outfile out_log_file_path --outdir out_dir_path")
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
201 parser = optparse.OptionParser(usage = usage)
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
202 parser.add_option("-v", "--version", action="store_true", dest="version",
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
203 default=False, help="display version and exit")
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
204 parser.add_option("-f", "--formats", type="string",
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
205 action="store", dest="formats", help="list of file formats separated by a tab character")
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
206 parser.add_option("-p", "--filepaths", type="string",
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
207 action="store", dest="filepaths", help="list of input file paths separated by a tab character")
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
208 parser.add_option("-n", "--filenames", type="string",
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
209 action="store", dest="filenames", help="list of input file names separated by a tab character")
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
210 parser.add_option("-c", "--compressed", type="string",
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
211 action="store", dest="compressed", help="list of compressed flags related to the imput files separated by a tab character")
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
212 parser.add_option("-m", "--minabundances", type="string",
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
213 action="store", dest="minabundances", help="list of blooom filter minimum abundances related to the imput files separated by a tab character")
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
214 parser.add_option("-q", "--qualitythresholds", type="string",
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
215 action="store", dest="qualitythresholds", help="list of quality thresholds related to the imput files separated by a tab character")
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
216 parser.add_option("-k", "--klen", type="int", default=21,
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
217 action="store", dest="klen", help="k-mer length")
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
218 parser.add_option("-b", "--bfsize", type="int", default=-1,
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
219 action="store", dest="bfsize", help="bloom filter size")
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
220 parser.add_option("-o", "--outfile", type="string", default="sbtres.txt",
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
221 action="store", dest="outfile", help="output log file path")
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
222 parser.add_option("-d", "--outdir", type="string", default="sbtres.txt",
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
223 action="store", dest="outdir", help="output directory path")
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
224 parser.add_option("-t", "--tooldir", type="string", default="./",
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
225 action="store", dest="tooldir", help="tool directory path")
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
226
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
227 (options, args) = parser.parse_args()
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
228 if options.version:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
229 print __version__
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
230 else:
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
231 createSBT( options, args )
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
232
7f712cc0d3d5 Uploaded 20190304.2
fabio
parents:
diff changeset
233 if __name__ == "__main__": __main__()