Mercurial > repos > weilong-guo > bs_seeker2
comparison BSseeker2/bs_index/wg_build.py @ 0:e6df770c0e58 draft
Initial upload
author | weilong-guo |
---|---|
date | Fri, 12 Jul 2013 18:47:28 -0400 |
parents | |
children | 8b26adf64adc |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e6df770c0e58 |
---|---|
1 from bs_utils.utils import * | |
2 | |
3 | |
4 def wg_build(fasta_file, build_command, ref_path, aligner): | |
5 | |
6 # ref_path is a string that containts the directory where the reference genomes are stored with | |
7 # the input fasta filename appended | |
8 ref_path = os.path.join(ref_path, | |
9 os.path.split(fasta_file)[1] + '_'+aligner) | |
10 | |
11 clear_dir(ref_path) | |
12 #--------------------------------------------------------------- | |
13 # 1. First get the complementary genome (also do the reverse) | |
14 # 2. Then do CT and GA conversions | |
15 #--------------------------------------------------------------- | |
16 | |
17 open_log(os.path.join(ref_path, 'log')) | |
18 refd = {} | |
19 w_c2t = open(os.path.join(ref_path, 'W_C2T.fa'),'w') | |
20 c_c2t = open(os.path.join(ref_path, 'C_C2T.fa'),'w') | |
21 | |
22 w_g2a = open(os.path.join(ref_path, 'W_G2A.fa'),'w') | |
23 c_g2a = open(os.path.join(ref_path, 'C_G2A.fa'),'w') | |
24 for chrom_id, chrom_seq in read_fasta(fasta_file): | |
25 serialize(chrom_seq, os.path.join(ref_path, chrom_id)) | |
26 refd[chrom_id] = len(chrom_seq) | |
27 | |
28 w_c2t.write('>%s\n%s\n' % (chrom_id, chrom_seq.replace("C","T"))) | |
29 w_g2a.write('>%s\n%s\n' % (chrom_id, chrom_seq.replace("G","A"))) | |
30 | |
31 chrom_seq = reverse_compl_seq(chrom_seq) | |
32 | |
33 c_c2t.write('>%s\n%s\n' % (chrom_id, chrom_seq.replace("C","T"))) | |
34 c_g2a.write('>%s\n%s\n' % (chrom_id, chrom_seq.replace("G","A"))) | |
35 | |
36 elapsed('Preprocessing '+chrom_id) | |
37 | |
38 for outf in [w_c2t, c_c2t, w_g2a, c_g2a]: | |
39 outf.close() | |
40 | |
41 serialize(refd, os.path.join(ref_path,"refname")) | |
42 elapsed('Genome preprocessing') | |
43 # append ref_path to all elements of to_bowtie | |
44 to_bowtie = map(lambda f: os.path.join(ref_path, f), ['W_C2T', 'W_G2A', 'C_C2T', 'C_G2A']) | |
45 | |
46 # start bowtie-build for all converted genomes and wait for the processes to finish | |
47 | |
48 run_in_parallel([(build_command % { 'fname' : fname }, fname+'.log') for fname in to_bowtie]) | |
49 | |
50 # delete fasta files of converted genomes | |
51 if aligner != "rmap" : | |
52 delete_files(f+'.fa' for f in to_bowtie) | |
53 | |
54 elapsed('Done') | |
55 |