0
|
1 from bs_utils.utils import *
|
|
2
|
|
3
|
|
4 def wg_build(fasta_file, build_command, ref_path, aligner):
|
|
5
|
1
|
6 # ref_path is a string that contains the directory where the reference genomes are stored with
|
|
7 # the input Fasta filename appended
|
0
|
8 ref_path = os.path.join(ref_path,
|
|
9 os.path.split(fasta_file)[1] + '_'+aligner)
|
|
10
|
|
11 clear_dir(ref_path)
|
|
12 #---------------------------------------------------------------
|
|
13 # 1. First get the complementary genome (also do the reverse)
|
|
14 # 2. Then do CT and GA conversions
|
|
15 #---------------------------------------------------------------
|
|
16
|
|
17 open_log(os.path.join(ref_path, 'log'))
|
|
18 refd = {}
|
|
19 w_c2t = open(os.path.join(ref_path, 'W_C2T.fa'),'w')
|
|
20 c_c2t = open(os.path.join(ref_path, 'C_C2T.fa'),'w')
|
|
21
|
|
22 w_g2a = open(os.path.join(ref_path, 'W_G2A.fa'),'w')
|
|
23 c_g2a = open(os.path.join(ref_path, 'C_G2A.fa'),'w')
|
|
24 for chrom_id, chrom_seq in read_fasta(fasta_file):
|
|
25 serialize(chrom_seq, os.path.join(ref_path, chrom_id))
|
|
26 refd[chrom_id] = len(chrom_seq)
|
|
27
|
|
28 w_c2t.write('>%s\n%s\n' % (chrom_id, chrom_seq.replace("C","T")))
|
|
29 w_g2a.write('>%s\n%s\n' % (chrom_id, chrom_seq.replace("G","A")))
|
|
30
|
|
31 chrom_seq = reverse_compl_seq(chrom_seq)
|
|
32
|
|
33 c_c2t.write('>%s\n%s\n' % (chrom_id, chrom_seq.replace("C","T")))
|
|
34 c_g2a.write('>%s\n%s\n' % (chrom_id, chrom_seq.replace("G","A")))
|
|
35
|
|
36 elapsed('Preprocessing '+chrom_id)
|
|
37
|
|
38 for outf in [w_c2t, c_c2t, w_g2a, c_g2a]:
|
|
39 outf.close()
|
|
40
|
|
41 serialize(refd, os.path.join(ref_path,"refname"))
|
|
42 elapsed('Genome preprocessing')
|
|
43 # append ref_path to all elements of to_bowtie
|
|
44 to_bowtie = map(lambda f: os.path.join(ref_path, f), ['W_C2T', 'W_G2A', 'C_C2T', 'C_G2A'])
|
|
45
|
|
46 # start bowtie-build for all converted genomes and wait for the processes to finish
|
|
47
|
|
48 run_in_parallel([(build_command % { 'fname' : fname }, fname+'.log') for fname in to_bowtie])
|
|
49
|
|
50 # delete fasta files of converted genomes
|
|
51 if aligner != "rmap" :
|
|
52 delete_files(f+'.fa' for f in to_bowtie)
|
|
53
|
|
54 elapsed('Done')
|
|
55
|