0
|
1 import sys, tempfile, subprocess, glob
|
|
2 import os, re, shutil, stat
|
|
3 import optparse
|
|
4 from os.path import basename
|
|
5
|
|
6 """
|
|
7
|
|
8 Created by Cyril Monjeaud
|
|
9 Cyril.Monjeaud@irisa.fr
|
|
10 Modified by Fabrice Legeai
|
|
11 fabrice.legeai@rennes.inra.fr
|
|
12
|
|
13 Last modifications : 04/21/2015
|
|
14
|
|
15 WARNING :
|
|
16
|
|
17 discoSNP++.py needs:
|
|
18
|
|
19 - run_discoSnp++.sh
|
|
20 - discoSNP++_to_genotypes.py
|
|
21 - the build repository next to the scripts
|
|
22
|
|
23 All these files are available after compiling the sources of discoSNP :
|
|
24
|
|
25 https://colibread.inria.fr/files/2013/10/DiscoSNPpp-2.0.6-Source.zip
|
|
26
|
|
27 or with the package_discoSnp_plus_plus package in the toolshed
|
|
28
|
|
29 """
|
|
30
|
|
31
|
|
32 def __main__():
|
|
33
|
|
34 # store inputs in an array
|
|
35 parser = optparse.OptionParser()
|
|
36 parser.add_option("-r", dest="data_files")
|
|
37 parser.add_option("-b", dest="branching_bubbles")
|
|
38 parser.add_option("-D", dest="deletions")
|
|
39 parser.add_option("-P", dest="min_snps")
|
|
40 parser.add_option("-l", action="store_true", dest="low_complexity")
|
|
41 parser.add_option("-k", dest="kmer")
|
|
42 parser.add_option("-t", action="store_true", dest="left_right_unitigs")
|
|
43 parser.add_option("-T", action="store_true", dest="left_right_contigs")
|
|
44 parser.add_option("-c", dest="coverage")
|
|
45 parser.add_option("-C", dest="maxcoverage")
|
|
46 parser.add_option("-d", dest="error_threshold")
|
|
47 parser.add_option("-n", action="store_true", dest="genotypes")
|
|
48 parser.add_option("-G", dest="reference")
|
|
49 parser.add_option("-M", dest="mapping_error")
|
|
50
|
|
51 (options, args) = parser.parse_args()
|
|
52
|
|
53 # create the working dir inside job_working_dir
|
|
54 output_dir = os.mkdir("job_outputs")
|
|
55
|
|
56 cmd_line=[]
|
|
57 cmd_line.append("/bin/bash")
|
|
58 #cmd_line.append("/home/genouest/inrarennes/flegeai/local/DiscoSNP/DiscoSNP++-2.1.4-Source/run_discoSnp++.sh")
|
|
59 cmd_line.append("run_discoSnp++.sh")
|
|
60 #cmd_line.append("-B /local/bwa/bwa-0.7.10/")
|
|
61
|
|
62 # transform .dat into .fasta or .fastq for kissreads2
|
|
63 link_files=[]
|
|
64 f = open(options.data_files, 'r')
|
|
65 files = f.readlines()
|
|
66 for file in files:
|
|
67 file=file.strip()
|
|
68 if re.search("^$",file): continue
|
|
69 tagfile=[]
|
|
70 tagfile=re.split('::', file)
|
|
71 number = int(tagfile[0])+1
|
|
72 if re.search("^>.*", open(tagfile[1]).readline()):
|
|
73 link_file = 'input'+str(number)+'.fasta'
|
|
74 else:
|
|
75 link_file = 'input'+str(number)+'.fastq'
|
|
76
|
|
77 os.symlink(tagfile[1], link_file)
|
|
78 link_files.append(link_file)
|
|
79
|
|
80
|
|
81 # edit the command line
|
|
82 cmd_line.extend(["-r",' '.join(link_files),"-b",options.branching_bubbles,"-D",options.deletions,"-P",options.min_snps,"-k",options.kmer,"-c",options.coverage,"-C",options.maxcoverage,"-d",options.error_threshold])
|
|
83 if options.low_complexity:
|
|
84 cmd_line.append("-l")
|
|
85 if options.left_right_unitigs:
|
|
86 cmd_line.append("-t")
|
|
87 if options.left_right_contigs:
|
|
88 cmd_line.append("-T")
|
|
89 if options.genotypes:
|
|
90 cmd_line.append("-n")
|
|
91
|
|
92 # genotype part
|
|
93 if options.reference:
|
|
94 cmd_line.extend(["-G", options.reference])
|
|
95 cmd_line.extend(["-M", options.mapping_error])
|
|
96
|
|
97 cmd_line.extend(["-p","job_outputs/galaxy"])
|
|
98
|
|
99 # execute job
|
|
100 p=subprocess.Popen(cmd_line,
|
|
101 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
|
102
|
|
103 stdoutput, stderror = p.communicate()
|
|
104
|
|
105 # report file
|
|
106 logfile=open("report.txt", "w")
|
|
107 logfile.write("[COMMAND LINE]"+' '.join(cmd_line)+"\n\n")
|
|
108 logfile.write(stdoutput)
|
|
109
|
|
110 # print stderror because it's informations
|
|
111 logfile.write(stderror)
|
|
112
|
|
113 # close logfile
|
|
114 logfile.close()
|
|
115
|
|
116 # change .fa extension to .fasta for a correct print inside Galaxy
|
|
117 fafiles = glob.glob("job_outputs/*_coherent.fa")
|
|
118 for fafile in fafiles:
|
|
119 shutil.move(fafile, "coherent.fasta")
|
|
120 vcffiles = glob.glob("job_outputs/*_coherent.vcf")
|
|
121 for vcffile in vcffiles:
|
|
122 shutil.move(vcffile, "coherent.vcf")
|
|
123
|
|
124
|
|
125 if __name__ == "__main__": __main__()
|
|
126
|