Mercurial > repos > tyty > structurefold
comparison get_reads/get_read.py @ 57:31d688722e7a draft
Uploaded
author | tyty |
---|---|
date | Tue, 18 Nov 2014 01:01:21 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
56:9d26c2e4953e | 57:31d688722e7a |
---|---|
1 #!/usr/bin/env python | |
2 # -*- coding: utf-8 -*- | |
3 | |
4 import sys | |
5 #from galaxy.tools.read_file import * | |
6 from Bio import SeqIO | |
7 import os | |
8 from read_file import * | |
9 import random | |
10 import string | |
11 | |
12 fasta_file = sys.argv[1] | |
13 map_file = sys.argv[2] | |
14 result_file = sys.argv[3] | |
15 | |
16 ospath = os.path.realpath(sys.argv[0]) | |
17 ost = ospath.split('/') | |
18 syspath = "" | |
19 for i in range(len(ost)-1): | |
20 syspath = syspath+ost[i].strip() | |
21 syspath = syspath+'/' | |
22 | |
23 rs = ''.join(random.sample(string.ascii_letters + string.digits, 8)) | |
24 os.system("mkdir "+syspath+"output_"+rs) | |
25 | |
26 syspathrs = syspath+"output_"+rs+'/' | |
27 | |
28 os.system("samtools view -F 0xfff "+map_file+"|cut -f 3,4 > "+syspathrs+"map_info.txt") | |
29 | |
30 fasta_sequences = SeqIO.parse(open(fasta_file),'fasta'); | |
31 length_seq = {}; | |
32 for seq in fasta_sequences: | |
33 nuc = seq.id; | |
34 length_seq[nuc] = len(seq.seq.tostring()); | |
35 | |
36 | |
37 | |
38 mapping = {} | |
39 transcripts = [] | |
40 | |
41 f = open(syspathrs+"map_info.txt"); | |
42 for aline in f.readlines(): | |
43 tline = aline.strip(); | |
44 tl = tline.split('\t'); | |
45 if tl[0].strip() not in transcripts: | |
46 transcripts.append(tl[0].strip()); | |
47 mapping[tl[0].strip()] = []; | |
48 | |
49 mapping[tl[0].strip()].append(tl[1].strip()); | |
50 | |
51 distribution = {}; | |
52 coverage = {}; | |
53 for transcript in length_seq: | |
54 distribution[transcript] = []; | |
55 for i in range(0, length_seq[transcript]): | |
56 distribution[transcript].append(0); | |
57 sum_count = float(0); | |
58 if transcript in mapping: | |
59 for j in range(0, len(mapping[transcript])): | |
60 index = mapping[transcript][j]; | |
61 #count = reads[mapping[transcript][j][0]]; | |
62 sum_count = sum_count + 1; | |
63 distribution[transcript][int(index)-1] = distribution[transcript][int(index)-1] + 1; | |
64 coverage[transcript] = float(sum_count)/float(length_seq[transcript]); | |
65 else: | |
66 coverage[transcript] = 0 | |
67 | |
68 | |
69 | |
70 | |
71 | |
72 h = file(result_file, 'w') | |
73 for transcript in length_seq: | |
74 h.write(transcript); | |
75 h.write('\n') | |
76 for i in range(0, length_seq[transcript]): | |
77 h.write(str(distribution[transcript][i])) | |
78 h.write('\t') | |
79 h.write('\n') | |
80 h.write('\n') | |
81 | |
82 os.system("rm -r "+syspathrs) | |
83 | |
84 | |
85 | |
86 f.close(); | |
87 h.close() | |
88 | |
89 | |
90 | |
91 |