| 62 | 1 #!/usr/bin/env python | 
|  | 2 # -*- coding: utf-8 -*- | 
|  | 3 | 
|  | 4 import sys | 
|  | 5 from Bio import SeqIO | 
|  | 6 import os | 
|  | 7 from read_file import * | 
|  | 8 import random | 
|  | 9 import string | 
|  | 10 | 
|  | 11 fasta_file = sys.argv[1] | 
|  | 12 map_file = sys.argv[2] | 
|  | 13 result_file = sys.argv[3] | 
|  | 14 | 
|  | 15 syspathrs = os.getcwd() | 
|  | 16 | 
|  | 17 os.system("samtools view -F 0xfff "+map_file+"|cut -f 3,4 > "+syspathrs+"map_info.txt") | 
|  | 18 | 
|  | 19 fasta_sequences = SeqIO.parse(open(fasta_file),'fasta'); | 
|  | 20 length_seq = {}; | 
|  | 21 for seq in fasta_sequences: | 
|  | 22         nuc = seq.id; | 
|  | 23         length_seq[nuc] = len(seq.seq.tostring()); | 
|  | 24 | 
|  | 25 | 
|  | 26 | 
|  | 27 mapping = {} | 
|  | 28 transcripts = [] | 
|  | 29 | 
|  | 30 f = open(syspathrs+"map_info.txt"); | 
|  | 31 for aline in f.readlines(): | 
|  | 32     tline = aline.strip(); | 
|  | 33     tl = tline.split('\t'); | 
|  | 34     if tl[0].strip() not in transcripts: | 
|  | 35         transcripts.append(tl[0].strip()); | 
|  | 36         mapping[tl[0].strip()] = []; | 
|  | 37 | 
|  | 38     mapping[tl[0].strip()].append(tl[1].strip()); | 
|  | 39 | 
|  | 40 distribution = {}; | 
|  | 41 coverage = {}; | 
|  | 42 for transcript in length_seq: | 
|  | 43     distribution[transcript] = []; | 
|  | 44     for i in range(0, length_seq[transcript]): | 
|  | 45         distribution[transcript].append(0); | 
|  | 46     sum_count = float(0); | 
|  | 47     if transcript in mapping: | 
|  | 48         for j in range(0, len(mapping[transcript])): | 
|  | 49             index = mapping[transcript][j]; | 
|  | 50             #count = reads[mapping[transcript][j][0]]; | 
|  | 51             sum_count = sum_count + 1; | 
|  | 52             distribution[transcript][int(index)-1] = distribution[transcript][int(index)-1] + 1; | 
|  | 53             coverage[transcript] = float(sum_count)/float(length_seq[transcript]); | 
|  | 54     else: | 
|  | 55         coverage[transcript] = 0 | 
|  | 56 | 
|  | 57 | 
|  | 58 | 
|  | 59 | 
|  | 60 | 
|  | 61 h = file(result_file, 'w') | 
|  | 62 for transcript in length_seq: | 
|  | 63     h.write(transcript); | 
|  | 64     h.write('\n') | 
|  | 65     for i in range(0, length_seq[transcript]): | 
|  | 66         h.write(str(distribution[transcript][i])) | 
|  | 67         h.write('\t') | 
|  | 68     h.write('\n') | 
|  | 69     h.write('\n') | 
|  | 70 | 
|  | 71 #os.system("rm -r "+syspathrs) | 
|  | 72 | 
|  | 73 | 
|  | 74 | 
|  | 75 f.close(); | 
|  | 76 h.close() | 
|  | 77 | 
|  | 78 | 
|  | 79 | 
|  | 80 |