| 
0
 | 
     1 #!/usr/bin/python
 | 
| 
 | 
     2 # -*- coding: utf-8 -*-
 | 
| 
 | 
     3 
 | 
| 
 | 
     4 import sys
 | 
| 
 | 
     5 import re
 | 
| 
 | 
     6 import os
 | 
| 
 | 
     7 import tempfile
 | 
| 
 | 
     8 import shutil
 | 
| 
 | 
     9 import subprocess
 | 
| 
 | 
    10 import glob
 | 
| 
 | 
    11 import argparse
 | 
| 
 | 
    12 from os.path import basename
 | 
| 
 | 
    13 import zipfile
 | 
| 
 | 
    14 import tarfile
 | 
| 
 | 
    15 import gzip
 | 
| 
 | 
    16 from galaxy.datatypes.checkers import *
 | 
| 
 | 
    17 from stacks import *
 | 
| 
 | 
    18 
 | 
| 
 | 
    19 
 | 
| 
 | 
    20 def __main__():
 | 
| 
 | 
    21 
 | 
| 
 | 
    22     # arguments recuperation
 | 
| 
 | 
    23 
 | 
| 
 | 
    24     parser = argparse.ArgumentParser()
 | 
| 
 | 
    25     parser.add_argument('-P')
 | 
| 
 | 
    26     parser.add_argument('-M')
 | 
| 
 | 
    27     parser.add_argument('-b')
 | 
| 
 | 
    28     parser.add_argument('--vcf', action='store_true')
 | 
| 
 | 
    29     parser.add_argument('--genepop', action='store_true')
 | 
| 
 | 
    30     parser.add_argument('--structure', action='store_true')
 | 
| 
 | 
    31     parser.add_argument('-e')
 | 
| 
 | 
    32     parser.add_argument('--genomic', action='store_true')
 | 
| 
 | 
    33     parser.add_argument('--fasta', action='store_true')
 | 
| 
 | 
    34     parser.add_argument('--phase', action='store_true')
 | 
| 
 | 
    35     parser.add_argument('--beagle', action='store_true')
 | 
| 
 | 
    36     parser.add_argument('--plink', action='store_true')
 | 
| 
 | 
    37     parser.add_argument('--phylip', action='store_true')
 | 
| 
 | 
    38     parser.add_argument('--phylip_var', action='store_true')
 | 
| 
 | 
    39     parser.add_argument('--write_single_snp', action='store_true')
 | 
| 
 | 
    40     parser.add_argument('-k', action='store_true')
 | 
| 
 | 
    41 
 | 
| 
 | 
    42     # advanced options
 | 
| 
 | 
    43     parser.add_argument('--advanced_options_activate')
 | 
| 
 | 
    44     parser.add_argument('-B')
 | 
| 
 | 
    45     parser.add_argument('-W')
 | 
| 
 | 
    46     parser.add_argument('-r')
 | 
| 
 | 
    47     parser.add_argument('-p')
 | 
| 
 | 
    48     parser.add_argument('-m')
 | 
| 
 | 
    49     parser.add_argument('-a')
 | 
| 
 | 
    50     parser.add_argument('-f')
 | 
| 
 | 
    51     parser.add_argument('--p_value_cutoff')
 | 
| 
 | 
    52     parser.add_argument('--window_size')
 | 
| 
 | 
    53     parser.add_argument('--bootstrap')
 | 
| 
 | 
    54     parser.add_argument('--bootstrap_reps')
 | 
| 
 | 
    55 
 | 
| 
 | 
    56     # multifile management
 | 
| 
 | 
    57     parser.add_argument('--logfile')
 | 
| 
 | 
    58 
 | 
| 
 | 
    59     # outputs
 | 
| 
 | 
    60     parser.add_argument('--ss')
 | 
| 
 | 
    61     parser.add_argument('--s')
 | 
| 
 | 
    62 
 | 
| 
 | 
    63     # optional outputs
 | 
| 
 | 
    64     parser.add_argument('--ov')
 | 
| 
 | 
    65     parser.add_argument('--op')
 | 
| 
 | 
    66     parser.add_argument('--ol')
 | 
| 
 | 
    67     parser.add_argument('--of')
 | 
| 
 | 
    68     parser.add_argument('--os')
 | 
| 
 | 
    69     parser.add_argument('--oe')
 | 
| 
 | 
    70     parser.add_argument('--om')
 | 
| 
 | 
    71     parser.add_argument('--og') 
 | 
| 
 | 
    72 
 | 
| 
 | 
    73     parser.add_argument('--unphased_output')
 | 
| 
 | 
    74     parser.add_argument('--markers_output')
 | 
| 
 | 
    75     parser.add_argument('--phase_output')
 | 
| 
 | 
    76     parser.add_argument('--fst_output')
 | 
| 
 | 
    77 
 | 
| 
 | 
    78     options = parser.parse_args()
 | 
| 
 | 
    79 
 | 
| 
 | 
    80     # create the working dir
 | 
| 
 | 
    81     os.mkdir('job_outputs')
 | 
| 
 | 
    82     os.mkdir('galaxy_outputs')
 | 
| 
 | 
    83 
 | 
| 
 | 
    84     os.chdir('job_outputs')
 | 
| 
 | 
    85 
 | 
| 
 | 
    86     # STACKS_archive
 | 
| 
 | 
    87     # check if zipped files are into the tab
 | 
| 
 | 
    88     extract_compress_files(options.P, os.getcwd())
 | 
| 
 | 
    89 
 | 
| 
 | 
    90     # create the populations command input line
 | 
| 
 | 
    91     cmd_line=['populations']
 | 
| 
 | 
    92     cmd_line.extend(['-b', options.b, '-P', os.getcwd(), '-M', options.M])
 | 
| 
 | 
    93 
 | 
| 
 | 
    94     if options.e:
 | 
| 
 | 
    95         cmd_line.extend(['-e', options.e, options.genomic])
 | 
| 
 | 
    96 
 | 
| 
 | 
    97     # output options
 | 
| 
 | 
    98     if options.vcf:
 | 
| 
 | 
    99         cmd_line.append('--vcf')
 | 
| 
 | 
   100     if options.genepop:
 | 
| 
 | 
   101         cmd_line.append('--genepop')
 | 
| 
 | 
   102     if options.structure:
 | 
| 
 | 
   103         cmd_line.append('--structure')
 | 
| 
 | 
   104     if options.fasta:
 | 
| 
 | 
   105         cmd_line.append('--fasta')
 | 
| 
 | 
   106     if options.phase:
 | 
| 
 | 
   107         cmd_line.append('--phase')
 | 
| 
 | 
   108     if options.beagle:
 | 
| 
 | 
   109         cmd_line.append('--beagle')
 | 
| 
 | 
   110     if options.plink:
 | 
| 
 | 
   111         cmd_line.append('--plink')
 | 
| 
 | 
   112     if options.phylip:
 | 
| 
 | 
   113         cmd_line.append('--phylip')
 | 
| 
 | 
   114     if options.phylip_var and options.phylip:
 | 
| 
 | 
   115         cmd_line.append('--phylip_var')
 | 
| 
 | 
   116     if options.write_single_snp and (options.genepop or options.structure):
 | 
| 
 | 
   117         cmd_line.append('--write_single_snp')
 | 
| 
 | 
   118 
 | 
| 
 | 
   119     if options.k:
 | 
| 
 | 
   120         cmd_line.extend(['-k', '--window_size', options.window_size])
 | 
| 
 | 
   121    
 | 
| 
 | 
   122     if options.advanced_options_activate == 'true':
 | 
| 
 | 
   123         if options.B:
 | 
| 
 | 
   124             cmd_line.extend(['-B', options.B])
 | 
| 
 | 
   125         if options.W:
 | 
| 
 | 
   126             cmd_line.extend(['-W', options.W])
 | 
| 
 | 
   127 
 | 
| 
 | 
   128         cmd_line.extend(['-r', options.r])
 | 
| 
 | 
   129         cmd_line.extend(['-p', options.p])
 | 
| 
 | 
   130         cmd_line.extend(['-m', options.m])
 | 
| 
 | 
   131         cmd_line.extend(['-a', options.a])
 | 
| 
 | 
   132 
 | 
| 
 | 
   133     if options.f:
 | 
| 
 | 
   134         cmd_line.extend(['-f', options.f, '--p_value_cutoff', options.p_value_cutoff])
 | 
| 
 | 
   135     if options.bootstrap:
 | 
| 
 | 
   136         cmd_line.extend(['--bootstrap', options.bootstrap, '--bootstrap_reps', options.bootstrap_reps])
 | 
| 
 | 
   137 
 | 
| 
 | 
   138     print "[CMD]:"+' '.join(cmd_line)
 | 
| 
 | 
   139     subprocess.call(cmd_line)
 | 
| 
 | 
   140 
 | 
| 
 | 
   141     # postprocesses
 | 
| 
 | 
   142     try:
 | 
| 
 | 
   143         shutil.copy('batch_1.populations.log', options.logfile)
 | 
| 
 | 
   144     except:
 | 
| 
 | 
   145         sys.stderr.write('Error in population execution; Please read the additional output (stdout)\n')
 | 
| 
 | 
   146         sys.exit(1)
 | 
| 
 | 
   147 
 | 
| 
 | 
   148     try:
 | 
| 
 | 
   149         shutil.move(glob.glob('*.sumstats_summary.tsv')[0], options.ss)
 | 
| 
 | 
   150     except:
 | 
| 
 | 
   151         print "No sumstats summary file"
 | 
| 
 | 
   152 
 | 
| 
 | 
   153     try:
 | 
| 
 | 
   154         shutil.move(glob.glob('*.sumstats.tsv')[0], options.s)
 | 
| 
 | 
   155     except:
 | 
| 
 | 
   156         print "No sumstats file"
 | 
| 
 | 
   157 
 | 
| 
 | 
   158     # move additionnal output files
 | 
| 
 | 
   159     if options.vcf:
 | 
| 
 | 
   160         try:
 | 
| 
 | 
   161             shutil.move(glob.glob('*.vcf')[0], options.ov)
 | 
| 
 | 
   162         except:
 | 
| 
 | 
   163             print "No VCF files"
 | 
| 
 | 
   164 
 | 
| 
 | 
   165     if options.phylip:
 | 
| 
 | 
   166         try:
 | 
| 
 | 
   167             shutil.move(glob.glob('*.phylip')[0], options.op)
 | 
| 
 | 
   168             shutil.move(glob.glob('*.phylip.log')[0], options.ol)
 | 
| 
 | 
   169         except:
 | 
| 
 | 
   170             print "No phylip file"
 | 
| 
 | 
   171 
 | 
| 
 | 
   172     if options.fasta:
 | 
| 
 | 
   173         try:
 | 
| 
 | 
   174             shutil.move(glob.glob('*.fa')[0], options.of)
 | 
| 
 | 
   175         except:
 | 
| 
 | 
   176             print "No fasta files"
 | 
| 
 | 
   177 
 | 
| 
 | 
   178     if options.structure:
 | 
| 
 | 
   179         try:
 | 
| 
 | 
   180             shutil.move(glob.glob('*.structure.tsv')[0], options.os)
 | 
| 
 | 
   181         except:
 | 
| 
 | 
   182             print "No structure file"
 | 
| 
 | 
   183 
 | 
| 
 | 
   184     if options.plink :
 | 
| 
 | 
   185         try:
 | 
| 
 | 
   186             shutil.move(glob.glob('*.ped')[0], options.oe)
 | 
| 
 | 
   187             shutil.move(glob.glob('*.map')[0], options.om)
 | 
| 
 | 
   188         except:
 | 
| 
 | 
   189             print "No ped and map file"
 | 
| 
 | 
   190 
 | 
| 
 | 
   191     if options.genepop :
 | 
| 
 | 
   192         try:
 | 
| 
 | 
   193             shutil.move(glob.glob('*.genepop')[0], options.og)
 | 
| 
 | 
   194         except:
 | 
| 
 | 
   195             print "No genepop file"
 | 
| 
 | 
   196 
 | 
| 
 | 
   197     # copy all files inside tmp_dir into workdir or into an archive....
 | 
| 
 | 
   198     list_files = glob.glob('*')
 | 
| 
 | 
   199 
 | 
| 
 | 
   200     markerszip = zipfile.ZipFile('markers.zip.temp', 'w',
 | 
| 
 | 
   201                                  allowZip64=True)
 | 
| 
 | 
   202     phasezip = zipfile.ZipFile('phase.zip.temp', 'w', allowZip64=True)
 | 
| 
 | 
   203     unphasedzip = zipfile.ZipFile('unphased.zip.temp', 'w',
 | 
| 
 | 
   204                                   allowZip64=True)
 | 
| 
 | 
   205     fstzip = zipfile.ZipFile('fst.zip.temp', 'w', allowZip64=True)
 | 
| 
 | 
   206 
 | 
| 
 | 
   207     for i in list_files:
 | 
| 
 | 
   208         # for each type of files
 | 
| 
 | 
   209         if re.search("\.markers$", i):
 | 
| 
 | 
   210             markerszip.write(i)
 | 
| 
 | 
   211         elif re.search("phase\.inp$", i):
 | 
| 
 | 
   212             phasezip.write(i)
 | 
| 
 | 
   213         elif re.search("unphased\.bgl$", i):
 | 
| 
 | 
   214             unphasedzip.write(i)
 | 
| 
 | 
   215         elif re.search('fst', i):
 | 
| 
 | 
   216             fstzip.write(i)
 | 
| 
 | 
   217         else:
 | 
| 
 | 
   218         # else return original files
 | 
| 
 | 
   219             if re.search('^batch', os.path.basename(i)) \
 | 
| 
 | 
   220                 and not re.search("\.tsv$", os.path.basename(i)) \
 | 
| 
 | 
   221                 or re.search(".*_[0-9]*\.tsv$", os.path.basename(i)):
 | 
| 
 | 
   222                 shutil.move(i, '../galaxy_outputs')
 | 
| 
 | 
   223 
 | 
| 
 | 
   224     # close zip files
 | 
| 
 | 
   225     markerszip.close()
 | 
| 
 | 
   226     phasezip.close()
 | 
| 
 | 
   227     unphasedzip.close()
 | 
| 
 | 
   228     fstzip.close()
 | 
| 
 | 
   229 
 | 
| 
 | 
   230     # return archives
 | 
| 
 | 
   231     shutil.move('fst.zip.temp', options.fst_output)
 | 
| 
 | 
   232     if options.beagle:
 | 
| 
 | 
   233         shutil.move('markers.zip.temp', options.markers_output)
 | 
| 
 | 
   234         shutil.move('unphased.zip.temp', options.unphased_output)
 | 
| 
 | 
   235     if options.phase:
 | 
| 
 | 
   236         shutil.move('phase.zip.temp', options.phase_output)
 | 
| 
 | 
   237 
 | 
| 
 | 
   238 
 | 
| 
 | 
   239 if __name__ == '__main__':
 | 
| 
 | 
   240     __main__()
 | 
| 
 | 
   241 
 | 
| 
 | 
   242 
 | 
| 
 | 
   243 			
 |