annotate bbric_disco.py @ 3:081e0d67a33e default tip

add archive tools and datatypes
author cmonjeau
date Mon, 28 Sep 2015 13:17:40 +0000
parents 1beb3ed9e1e3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
1 import sys, tempfile, subprocess, glob
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
2 import os, re, shutil, stat
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
3 import optparse
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
4 from os.path import basename
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
5
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
6 """
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
7
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
8 Created by Cyril Monjeaud
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
9 Cyril.Monjeaud@irisa.fr
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
10 Modified by Fabrice Legeai
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
11 fabrice.legeai@rennes.inra.fr
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
12
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
13 Last modifications : 04/21/2015
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
14
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
15 WARNING :
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
16
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
17 discoSNP++.py needs:
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
18
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
19 - run_discoSnp++.sh
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
20 - discoSNP++_to_genotypes.py
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
21 - the build repository next to the scripts
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
22
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
23 All these files are available after compiling the sources of discoSNP :
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
24
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
25 https://colibread.inria.fr/files/2013/10/DiscoSNPpp-2.0.6-Source.zip
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
26
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
27 or with the package_discoSnp_plus_plus package in the toolshed
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
28
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
29 """
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
30
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
31
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
32 def __main__():
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
33
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
34 # store inputs in an array
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
35 parser = optparse.OptionParser()
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
36 parser.add_option("-r", dest="data_files")
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
37 parser.add_option("-b", dest="branching_bubbles")
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
38 parser.add_option("-D", dest="deletions")
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
39 parser.add_option("-P", dest="min_snps")
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
40 parser.add_option("-l", action="store_true", dest="low_complexity")
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
41 parser.add_option("-k", dest="kmer")
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
42 parser.add_option("-t", action="store_true", dest="left_right_unitigs")
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
43 parser.add_option("-T", action="store_true", dest="left_right_contigs")
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
44 parser.add_option("-c", dest="coverage")
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
45 parser.add_option("-C", dest="maxcoverage")
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
46 parser.add_option("-d", dest="error_threshold")
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
47 parser.add_option("-n", action="store_true", dest="genotypes")
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
48 parser.add_option("-G", dest="reference")
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
49 parser.add_option("-M", dest="mapping_error")
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
50
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
51 (options, args) = parser.parse_args()
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
52
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
53 # create the working dir inside job_working_dir
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
54 output_dir = os.mkdir("job_outputs")
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
55
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
56 cmd_line=[]
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
57 cmd_line.append("/bin/bash")
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
58 #cmd_line.append("/home/genouest/inrarennes/flegeai/local/DiscoSNP/DiscoSNP++-2.1.4-Source/run_discoSnp++.sh")
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
59 cmd_line.append("run_discoSnp++.sh")
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
60 #cmd_line.append("-B /local/bwa/bwa-0.7.10/")
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
61
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
62 # transform .dat into .fasta or .fastq for kissreads2
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
63 link_files=[]
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
64 f = open(options.data_files, 'r')
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
65 files = f.readlines()
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
66 for file in files:
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
67 file=file.strip()
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
68 if re.search("^$",file): continue
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
69 tagfile=[]
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
70 tagfile=re.split('::', file)
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
71 number = int(tagfile[0])+1
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
72 if re.search("^>.*", open(tagfile[1]).readline()):
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
73 link_file = 'input'+str(number)+'.fasta'
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
74 else:
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
75 link_file = 'input'+str(number)+'.fastq'
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
76
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
77 os.symlink(tagfile[1], link_file)
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
78 link_files.append(link_file)
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
79
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
80
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
81 # edit the command line
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
82 cmd_line.extend(["-r",' '.join(link_files),"-b",options.branching_bubbles,"-D",options.deletions,"-P",options.min_snps,"-k",options.kmer,"-c",options.coverage,"-C",options.maxcoverage,"-d",options.error_threshold])
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
83 if options.low_complexity:
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
84 cmd_line.append("-l")
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
85 if options.left_right_unitigs:
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
86 cmd_line.append("-t")
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
87 if options.left_right_contigs:
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
88 cmd_line.append("-T")
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
89 if options.genotypes:
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
90 cmd_line.append("-n")
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
91
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
92 # genotype part
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
93 if options.reference:
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
94 cmd_line.extend(["-G", options.reference])
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
95 cmd_line.extend(["-M", options.mapping_error])
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
96
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
97 cmd_line.extend(["-p","job_outputs/galaxy"])
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
98
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
99 # execute job
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
100 p=subprocess.Popen(cmd_line,
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
101 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
102
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
103 stdoutput, stderror = p.communicate()
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
104
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
105 # report file
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
106 logfile=open("report.txt", "w")
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
107 logfile.write("[COMMAND LINE]"+' '.join(cmd_line)+"\n\n")
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
108 logfile.write(stdoutput)
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
109
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
110 # print stderror because it's informations
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
111 logfile.write(stderror)
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
112
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
113 # close logfile
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
114 logfile.close()
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
115
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
116 # change .fa extension to .fasta for a correct print inside Galaxy
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
117 fafiles = glob.glob("job_outputs/*_coherent.fa")
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
118 for fafile in fafiles:
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
119 shutil.move(fafile, "coherent.fasta")
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
120 vcffiles = glob.glob("job_outputs/*_coherent.vcf")
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
121 for vcffile in vcffiles:
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
122 shutil.move(vcffile, "coherent.vcf")
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
123
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
124
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
125 if __name__ == "__main__": __main__()
1beb3ed9e1e3 Imported from capsule None
cmonjeau
parents:
diff changeset
126