Mercurial > repos > cmonjeau > stacks
comparison STACKS_denovomap.py @ 0:d6ba40f6c824
first commit
| author | cmonjeau |
|---|---|
| date | Mon, 24 Aug 2015 09:29:12 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:d6ba40f6c824 |
|---|---|
| 1 #!/usr/bin/python | |
| 2 # -*- coding: utf-8 -*- | |
| 3 | |
| 4 import sys | |
| 5 import re | |
| 6 import os | |
| 7 import tempfile | |
| 8 import shutil | |
| 9 import subprocess | |
| 10 import glob | |
| 11 import argparse | |
| 12 from os.path import basename | |
| 13 import zipfile | |
| 14 import tarfile | |
| 15 import gzip | |
| 16 from galaxy.datatypes.checkers import * | |
| 17 from stacks import * | |
| 18 | |
| 19 | |
| 20 def __main__(): | |
| 21 | |
| 22 # arguments recuperation | |
| 23 | |
| 24 parser = argparse.ArgumentParser() | |
| 25 parser.add_argument('-p') | |
| 26 parser.add_argument('-b') | |
| 27 parser.add_argument('-r') | |
| 28 parser.add_argument('-s') | |
| 29 parser.add_argument('-O') | |
| 30 parser.add_argument('-m') | |
| 31 parser.add_argument('-P') | |
| 32 parser.add_argument('-M') | |
| 33 parser.add_argument('-N') | |
| 34 parser.add_argument('-n') | |
| 35 parser.add_argument('-t') | |
| 36 parser.add_argument('-H') | |
| 37 parser.add_argument('--bound_low') | |
| 38 parser.add_argument('--bound_high') | |
| 39 parser.add_argument('--alpha') | |
| 40 parser.add_argument('--logfile') | |
| 41 parser.add_argument('--compress_output') | |
| 42 parser.add_argument('--catalogsnps') | |
| 43 parser.add_argument('--catalogalleles') | |
| 44 parser.add_argument('--catalogtags') | |
| 45 | |
| 46 # additionnal outputs | |
| 47 parser.add_argument('--total_output') | |
| 48 parser.add_argument('--tags_output') | |
| 49 parser.add_argument('--snps_output') | |
| 50 parser.add_argument('--alleles_output') | |
| 51 parser.add_argument('--matches_output') | |
| 52 | |
| 53 options = parser.parse_args() | |
| 54 | |
| 55 # create working directories | |
| 56 | |
| 57 os.mkdir('inputs') | |
| 58 os.mkdir('job_outputs') | |
| 59 os.mkdir('galaxy_outputs') | |
| 60 | |
| 61 cmd_line = [] | |
| 62 cmd_line.append('denovo_map.pl') | |
| 63 | |
| 64 # if genetic map | |
| 65 | |
| 66 if options.p: | |
| 67 | |
| 68 # parse config files | |
| 69 | |
| 70 tab_parent_files = galaxy_config_to_tabfiles_for_STACKS(options.p) | |
| 71 | |
| 72 # check if zipped files are into the tab and change tab content | |
| 73 | |
| 74 extract_compress_files_from_tabfiles(tab_parent_files, 'inputs') | |
| 75 | |
| 76 # check files extension (important to have .fq or .fasta files) | |
| 77 | |
| 78 check_fastq_extension_and_add(tab_parent_files, 'inputs') | |
| 79 | |
| 80 # create symlink into the temp dir | |
| 81 | |
| 82 create_symlinks_from_tabfiles(tab_parent_files, 'inputs') | |
| 83 | |
| 84 # parse the input dir and store all file names into a tab | |
| 85 | |
| 86 fastq_files = [] | |
| 87 for fastq_file in glob.glob('inputs/*'): | |
| 88 # if is a file (skip repository created after a decompression) | |
| 89 if os.path.isfile(fastq_file): | |
| 90 fastq_files.append(fastq_file) | |
| 91 | |
| 92 fastq_files.sort() | |
| 93 | |
| 94 # test if fastq are paired-end | |
| 95 if options.b == 'true': | |
| 96 for n in range(0, len(fastq_files), 2): | |
| 97 cmd_line.extend(['-p', fastq_files[n]]) | |
| 98 else: | |
| 99 for myfastqfile in fastq_files: | |
| 100 cmd_line.extend(['-p', myfastqfile]) | |
| 101 | |
| 102 # if genetic map with progeny files | |
| 103 | |
| 104 if options.r: | |
| 105 | |
| 106 # parse config files | |
| 107 tab_progeny_files = galaxy_config_to_tabfiles_for_STACKS(options.r) | |
| 108 | |
| 109 # check if zipped files are into the tab and change tab content | |
| 110 extract_compress_files_from_tabfiles(tab_progeny_files, 'inputs') | |
| 111 | |
| 112 # check files extension (important to have .fq or .fasta files) | |
| 113 check_fastq_extension_and_add(tab_progeny_files, 'inputs') | |
| 114 | |
| 115 # create symlink into the temp dir | |
| 116 create_symlinks_from_tabfiles(tab_progeny_files, 'inputs') | |
| 117 | |
| 118 for key in tab_progeny_files: | |
| 119 | |
| 120 # if is a file (skip repository created after a decompression) | |
| 121 | |
| 122 if os.path.isfile('inputs/' + key): | |
| 123 cmd_line.extend(['-r', 'inputs/' + key]) | |
| 124 | |
| 125 # if population is checked | |
| 126 if options.s: | |
| 127 | |
| 128 tab_individual_files = galaxy_config_to_tabfiles_for_STACKS(options.s) | |
| 129 | |
| 130 # check if zipped files are into the tab and change tab content | |
| 131 extract_compress_files_from_tabfiles(tab_individual_files, 'inputs') | |
| 132 | |
| 133 # check files extension (important to have .fq or .fasta files) | |
| 134 check_fastq_extension_and_add(tab_individual_files, 'inputs') | |
| 135 | |
| 136 # create symlink into the temp dir | |
| 137 create_symlinks_from_tabfiles(tab_individual_files, 'inputs') | |
| 138 | |
| 139 # create the command input line | |
| 140 for key in tab_individual_files: | |
| 141 | |
| 142 # if is a file (skip repository created after a decompression) | |
| 143 if os.path.isfile('inputs/' + key): | |
| 144 cmd_line.extend(['-s', 'inputs/' + key]) | |
| 145 | |
| 146 # create the command line | |
| 147 cmd_line.extend([ | |
| 148 '-S', | |
| 149 '-b', | |
| 150 '1', | |
| 151 '-T', | |
| 152 '4', | |
| 153 '-o', | |
| 154 'job_outputs/' | |
| 155 ]) | |
| 156 | |
| 157 if options.O: | |
| 158 cmd_line.extend(['-O', options.O]) | |
| 159 | |
| 160 if options.m and options.m != '-1': | |
| 161 cmd_line.extend(['-m', options.m]) | |
| 162 | |
| 163 if options.P and options.P != '-1': | |
| 164 cmd_line.extend(['-P', options.P]) | |
| 165 | |
| 166 if options.M and options.M != '-1': | |
| 167 cmd_line.extend(['-M', options.M]) | |
| 168 | |
| 169 if options.N and options.N != '-1': | |
| 170 cmd_line.extend(['-N', options.N]) | |
| 171 | |
| 172 if options.n and options.n != '-1': | |
| 173 cmd_line.extend(['-n', options.n]) | |
| 174 | |
| 175 if options.t and options.t == 'true': | |
| 176 cmd_line.append('-t') | |
| 177 | |
| 178 if options.H and options.H == 'true': | |
| 179 cmd_line.append('-H') | |
| 180 | |
| 181 ## SNP model | |
| 182 if options.bound_low: | |
| 183 cmd_line.extend(['--bound_low', options.bound_low]) | |
| 184 cmd_line.extend(['--bound_high', options.bound_high]) | |
| 185 | |
| 186 if options.alpha: | |
| 187 cmd_line.extend(['--alpha', options.alpha]) | |
| 188 | |
| 189 # launch the command line | |
| 190 print "[CMD_LINE] : "+' '.join(cmd_line) | |
| 191 | |
| 192 p = subprocess.call(cmd_line) | |
| 193 | |
| 194 # postprocesses | |
| 195 try: | |
| 196 shutil.move('job_outputs/denovo_map.log', options.logfile) | |
| 197 except: | |
| 198 sys.stderr.write('Error in denovo_map execution; Please read the additional output (stdout)\n') | |
| 199 sys.exit(1) | |
| 200 | |
| 201 # go inside the outputs dir | |
| 202 os.chdir('job_outputs') | |
| 203 | |
| 204 # move files | |
| 205 for i in glob.glob('*'): | |
| 206 if re.search('catalog.snps.tsv$', i): | |
| 207 shutil.copy(i, options.catalogsnps) | |
| 208 if re.search('catalog.alleles.tsv$', i): | |
| 209 shutil.copy(i, options.catalogalleles) | |
| 210 if re.search('catalog.tags.tsv$', i): | |
| 211 shutil.copy(i, options.catalogtags) | |
| 212 | |
| 213 list_files = glob.glob('*') | |
| 214 | |
| 215 # if compress output is total | |
| 216 if options.compress_output == 'total': | |
| 217 | |
| 218 mytotalzipfile = zipfile.ZipFile('total.zip.temp', 'w', | |
| 219 allowZip64=True) | |
| 220 | |
| 221 for i in list_files: | |
| 222 mytotalzipfile.write(os.path.basename(i)) | |
| 223 | |
| 224 # return the unique archive | |
| 225 shutil.move('total.zip.temp', options.total_output) | |
| 226 elif options.compress_output == 'categories': | |
| 227 | |
| 228 # if compress output is by categories | |
| 229 mytagszip = zipfile.ZipFile('tags.zip.temp', 'w', allowZip64=True) | |
| 230 mysnpszip = zipfile.ZipFile('snps.zip.temp', 'w', allowZip64=True) | |
| 231 myalleleszip = zipfile.ZipFile('alleles.zip.temp', 'w', allowZip64=True) | |
| 232 mymatcheszip = zipfile.ZipFile('matches.zip.temp', 'w', allowZip64=True) | |
| 233 | |
| 234 for i in list_files: | |
| 235 # for each type of files | |
| 236 if re.search("tags\.tsv$", i) and not re.search('batch', i): | |
| 237 mytagszip.write(os.path.basename(i)) | |
| 238 os.remove(i) | |
| 239 elif re.search("snps\.tsv$", i) and not re.search('batch', i): | |
| 240 mysnpszip.write(os.path.basename(i)) | |
| 241 os.remove(i) | |
| 242 elif re.search("alleles\.tsv$", i) and not re.search('batch', i): | |
| 243 myalleleszip.write(os.path.basename(i)) | |
| 244 os.remove(i) | |
| 245 elif re.search("matches\.tsv$", i) and not re.search('batch', i): | |
| 246 mymatcheszip.write(os.path.basename(i)) | |
| 247 os.remove(i) | |
| 248 else: | |
| 249 shutil.move(os.path.basename(i), '../galaxy_outputs') | |
| 250 | |
| 251 # return archives.... | |
| 252 shutil.move('tags.zip.temp', options.tags_output) | |
| 253 shutil.move('snps.zip.temp', options.snps_output) | |
| 254 shutil.move('alleles.zip.temp', options.alleles_output) | |
| 255 shutil.move('matches.zip.temp', options.matches_output) | |
| 256 else: | |
| 257 # else no compression | |
| 258 for i in list_files: | |
| 259 shutil.move(os.path.basename(i), '../galaxy_outputs') | |
| 260 | |
| 261 | |
| 262 if __name__ == '__main__': | |
| 263 __main__() | |
| 264 | |
| 265 |
