comparison get_reads/get_read.py @ 57:31d688722e7a draft

Uploaded
author tyty
date Tue, 18 Nov 2014 01:01:21 -0500
parents
children
comparison
equal deleted inserted replaced
56:9d26c2e4953e 57:31d688722e7a
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 import sys
5 #from galaxy.tools.read_file import *
6 from Bio import SeqIO
7 import os
8 from read_file import *
9 import random
10 import string
11
12 fasta_file = sys.argv[1]
13 map_file = sys.argv[2]
14 result_file = sys.argv[3]
15
16 ospath = os.path.realpath(sys.argv[0])
17 ost = ospath.split('/')
18 syspath = ""
19 for i in range(len(ost)-1):
20 syspath = syspath+ost[i].strip()
21 syspath = syspath+'/'
22
23 rs = ''.join(random.sample(string.ascii_letters + string.digits, 8))
24 os.system("mkdir "+syspath+"output_"+rs)
25
26 syspathrs = syspath+"output_"+rs+'/'
27
28 os.system("samtools view -F 0xfff "+map_file+"|cut -f 3,4 > "+syspathrs+"map_info.txt")
29
30 fasta_sequences = SeqIO.parse(open(fasta_file),'fasta');
31 length_seq = {};
32 for seq in fasta_sequences:
33 nuc = seq.id;
34 length_seq[nuc] = len(seq.seq.tostring());
35
36
37
38 mapping = {}
39 transcripts = []
40
41 f = open(syspathrs+"map_info.txt");
42 for aline in f.readlines():
43 tline = aline.strip();
44 tl = tline.split('\t');
45 if tl[0].strip() not in transcripts:
46 transcripts.append(tl[0].strip());
47 mapping[tl[0].strip()] = [];
48
49 mapping[tl[0].strip()].append(tl[1].strip());
50
51 distribution = {};
52 coverage = {};
53 for transcript in length_seq:
54 distribution[transcript] = [];
55 for i in range(0, length_seq[transcript]):
56 distribution[transcript].append(0);
57 sum_count = float(0);
58 if transcript in mapping:
59 for j in range(0, len(mapping[transcript])):
60 index = mapping[transcript][j];
61 #count = reads[mapping[transcript][j][0]];
62 sum_count = sum_count + 1;
63 distribution[transcript][int(index)-1] = distribution[transcript][int(index)-1] + 1;
64 coverage[transcript] = float(sum_count)/float(length_seq[transcript]);
65 else:
66 coverage[transcript] = 0
67
68
69
70
71
72 h = file(result_file, 'w')
73 for transcript in length_seq:
74 h.write(transcript);
75 h.write('\n')
76 for i in range(0, length_seq[transcript]):
77 h.write(str(distribution[transcript][i]))
78 h.write('\t')
79 h.write('\n')
80 h.write('\n')
81
82 os.system("rm -r "+syspathrs)
83
84
85
86 f.close();
87 h.close()
88
89
90
91