Mercurial > repos > cmonjeau > stacks
comparison STACKS_refmap.py @ 0:d6ba40f6c824
first commit
| author | cmonjeau |
|---|---|
| date | Mon, 24 Aug 2015 09:29:12 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:d6ba40f6c824 |
|---|---|
| 1 #!/usr/bin/python | |
| 2 # -*- coding: utf-8 -*- | |
| 3 | |
| 4 import sys | |
| 5 import re | |
| 6 import os | |
| 7 import tempfile | |
| 8 import shutil | |
| 9 import subprocess | |
| 10 import glob | |
| 11 import optparse | |
| 12 from os.path import basename | |
| 13 import zipfile | |
| 14 import tarfile | |
| 15 import gzip | |
| 16 from galaxy.datatypes.checkers import * | |
| 17 from stacks import * | |
| 18 | |
| 19 | |
| 20 def __main__(): | |
| 21 | |
| 22 # arguments recuperation | |
| 23 | |
| 24 parser = optparse.OptionParser() | |
| 25 parser.add_option('-p') | |
| 26 parser.add_option('-r') | |
| 27 parser.add_option('-s') | |
| 28 parser.add_option('-O') | |
| 29 parser.add_option('-n') | |
| 30 parser.add_option('-m') | |
| 31 parser.add_option('--bound_low') | |
| 32 parser.add_option('--bound_high') | |
| 33 parser.add_option('--alpha') | |
| 34 parser.add_option('--logfile') | |
| 35 parser.add_option('--compress_output') | |
| 36 parser.add_option('--catalogsnps') | |
| 37 parser.add_option('--catalogalleles') | |
| 38 parser.add_option('--catalogtags') | |
| 39 | |
| 40 # additionnal outputs | |
| 41 | |
| 42 parser.add_option('--total_output') | |
| 43 parser.add_option('--tags_output') | |
| 44 parser.add_option('--snps_output') | |
| 45 parser.add_option('--alleles_output') | |
| 46 parser.add_option('--matches_output') | |
| 47 (options, args) = parser.parse_args() | |
| 48 | |
| 49 # create working directories | |
| 50 | |
| 51 os.mkdir('inputs') | |
| 52 os.mkdir('job_outputs') | |
| 53 os.mkdir('galaxy_outputs') | |
| 54 | |
| 55 cmd_line = [] | |
| 56 cmd_line.append('ref_map.pl') | |
| 57 | |
| 58 # if genetic map | |
| 59 | |
| 60 if options.p: | |
| 61 | |
| 62 # parse config files | |
| 63 | |
| 64 tab_parent_files = galaxy_config_to_tabfiles_for_STACKS(options.p) | |
| 65 | |
| 66 # check if zipped files are into the tab and change tab content | |
| 67 | |
| 68 extract_compress_files_from_tabfiles(tab_parent_files, 'inputs') | |
| 69 | |
| 70 # check files extension (important to have .sam files) | |
| 71 | |
| 72 check_sam_extension_and_add(tab_parent_files, 'inputs') | |
| 73 | |
| 74 # create symlink into the temp dir | |
| 75 | |
| 76 create_symlinks_from_tabfiles(tab_parent_files, 'inputs') | |
| 77 | |
| 78 # create the command input line | |
| 79 | |
| 80 for key in tab_parent_files: | |
| 81 | |
| 82 # if is a file (skip repository created after a decompression) | |
| 83 | |
| 84 if os.path.isfile('inputs/'+key): | |
| 85 cmd_line.extend(['-p', os.path.normpath('inputs/'+key)]) | |
| 86 | |
| 87 # if genetic map with progeny files | |
| 88 | |
| 89 if options.r: | |
| 90 | |
| 91 # parse config files | |
| 92 | |
| 93 tab_progeny_files = galaxy_config_to_tabfiles_for_STACKS(options.r) | |
| 94 | |
| 95 # check if zipped files are into the tab and change tab content | |
| 96 | |
| 97 extract_compress_files_from_tabfiles(tab_progeny_files, 'inputs') | |
| 98 | |
| 99 # check files extension (important to have .sam files) | |
| 100 | |
| 101 check_sam_extension_and_add(tab_progeny_files, 'inputs') | |
| 102 | |
| 103 # create symlink into the temp dir | |
| 104 | |
| 105 create_symlinks_from_tabfiles(tab_progeny_files, 'inputs') | |
| 106 | |
| 107 for key in tab_progeny_files: | |
| 108 | |
| 109 # if is a file (skip repository created after a decompression) | |
| 110 | |
| 111 if os.path.isfile('inputs/' + key): | |
| 112 cmd_line.extend(['-r', 'inputs/' + key]) | |
| 113 | |
| 114 # parse config files and create symlink if individual files are selected | |
| 115 | |
| 116 if options.s: | |
| 117 | |
| 118 # parse config files | |
| 119 | |
| 120 tab_individual_files = galaxy_config_to_tabfiles_for_STACKS(options.s) | |
| 121 | |
| 122 # check if zipped files are into the tab and change tab content | |
| 123 | |
| 124 extract_compress_files_from_tabfiles(tab_individual_files, 'inputs') | |
| 125 | |
| 126 # check files extension (important to have .sam files) | |
| 127 | |
| 128 check_sam_extension_and_add(tab_individual_files, 'inputs') | |
| 129 | |
| 130 # create symlink into the temp dir | |
| 131 | |
| 132 create_symlinks_from_tabfiles(tab_individual_files, 'inputs') | |
| 133 | |
| 134 # create the command input line | |
| 135 | |
| 136 for key in tab_individual_files: | |
| 137 cmd_line.extend(['-s', 'inputs/' + key]) | |
| 138 | |
| 139 # create the options command line | |
| 140 | |
| 141 cmd_line.extend([ | |
| 142 '-S', | |
| 143 '-b', '1', | |
| 144 '-T', '4', | |
| 145 '-o', 'job_outputs', | |
| 146 '-n', options.n, | |
| 147 '-m', options.m, | |
| 148 ]) | |
| 149 | |
| 150 if options.O: | |
| 151 cmd_line.extend(['-O', options.O]) | |
| 152 | |
| 153 if options.bound_low: | |
| 154 cmd_line.extend(['--bound_low', options.bound_low]) | |
| 155 | |
| 156 if options.bound_high: | |
| 157 cmd_line.extend(['--bound_high', options.bound_high]) | |
| 158 | |
| 159 if options.alpha: | |
| 160 cmd_line.extend(['--alpha', options.alpha]) | |
| 161 | |
| 162 # execute job | |
| 163 | |
| 164 print '[COMMAND LINE]' + ' '.join(cmd_line) | |
| 165 | |
| 166 p = subprocess.Popen(cmd_line, stdout=subprocess.PIPE, | |
| 167 stderr=subprocess.PIPE) | |
| 168 | |
| 169 (stdoutput, stderror) = p.communicate() | |
| 170 | |
| 171 print stdoutput | |
| 172 print stderror | |
| 173 | |
| 174 # postprocesses | |
| 175 | |
| 176 try: | |
| 177 shutil.move('job_outputs/ref_map.log', options.logfile) | |
| 178 except: | |
| 179 sys.stderr.write('Error in ref_map execution; Please read the additional output (stdout)\n') | |
| 180 | |
| 181 # go inside the outputs dir | |
| 182 | |
| 183 os.chdir('job_outputs') | |
| 184 | |
| 185 # move files | |
| 186 | |
| 187 for i in glob.glob('*'): | |
| 188 if re.search('catalog.snps.tsv$', i): | |
| 189 shutil.copy(i, options.catalogsnps) | |
| 190 if re.search('catalog.alleles.tsv$', i): | |
| 191 shutil.copy(i, options.catalogalleles) | |
| 192 if re.search('catalog.tags.tsv$', i): | |
| 193 shutil.copy(i, options.catalogtags) | |
| 194 | |
| 195 # copy all files inside tmp_dir into workdir | |
| 196 | |
| 197 list_files = glob.glob('*') | |
| 198 | |
| 199 # if compress output is total | |
| 200 | |
| 201 if options.compress_output == 'total': | |
| 202 | |
| 203 mytotalzipfile = zipfile.ZipFile('total.zip.temp', 'w', | |
| 204 allowZip64=True) | |
| 205 | |
| 206 for i in list_files: | |
| 207 | |
| 208 mytotalzipfile.write(os.path.basename(i)) | |
| 209 | |
| 210 # return the unique archive | |
| 211 | |
| 212 shutil.move('total.zip.temp', options.total_output) | |
| 213 elif options.compress_output == 'categories': | |
| 214 | |
| 215 # if compress output is by categories | |
| 216 | |
| 217 mytagszip = zipfile.ZipFile('tags.zip.temp', 'w', allowZip64=True) | |
| 218 mysnpszip = zipfile.ZipFile('snps.zip.temp', 'w', allowZip64=True) | |
| 219 myalleleszip = zipfile.ZipFile('alleles.zip.temp', 'w', allowZip64=True) | |
| 220 mymatcheszip = zipfile.ZipFile('matches.zip.temp', 'w', allowZip64=True) | |
| 221 | |
| 222 for i in list_files: | |
| 223 | |
| 224 # for each type of files | |
| 225 | |
| 226 if re.search("tags\.tsv$", i) and not re.search('batch', i): | |
| 227 mytagszip.write(os.path.basename(i)) | |
| 228 os.remove(i) | |
| 229 elif re.search("snps\.tsv$", i) and not re.search('batch', i): | |
| 230 mysnpszip.write(os.path.basename(i)) | |
| 231 os.remove(i) | |
| 232 elif re.search("alleles\.tsv$", i) and not re.search('batch', i): | |
| 233 myalleleszip.write(os.path.basename(i)) | |
| 234 os.remove(i) | |
| 235 elif re.search("matches\.tsv$", i) and not re.search('batch', i): | |
| 236 mymatcheszip.write(os.path.basename(i)) | |
| 237 os.remove(i) | |
| 238 else: | |
| 239 shutil.move(os.path.basename(i), '../galaxy_outputs') | |
| 240 | |
| 241 # return archives.... | |
| 242 | |
| 243 shutil.move('tags.zip.temp', options.tags_output) | |
| 244 shutil.move('snps.zip.temp', options.snps_output) | |
| 245 shutil.move('alleles.zip.temp', options.alleles_output) | |
| 246 shutil.move('matches.zip.temp', options.matches_output) | |
| 247 else: | |
| 248 | |
| 249 # else no compression | |
| 250 | |
| 251 for i in list_files: | |
| 252 shutil.move(os.path.basename(i), '../galaxy_outputs') | |
| 253 | |
| 254 | |
| 255 if __name__ == '__main__': | |
| 256 __main__() | |
| 257 | |
| 258 |
