| 2 | 1 #!/usr/bin/env python | 
|  | 2 # -*- coding: utf-8 -*- | 
|  | 3 | 
|  | 4 import sys | 
|  | 5 #from galaxy.tools.read_file import * | 
|  | 6 from Bio import SeqIO | 
|  | 7 import os | 
|  | 8 from read_file import * | 
|  | 9 | 
|  | 10 fasta_file = sys.argv[1] | 
|  | 11 map_file = sys.argv[2] | 
|  | 12 result_file = sys.argv[3] | 
|  | 13 | 
|  | 14 os.system("samtools view -F 0xfff "+map_file+"|cut -f 3,4 > map_info.txt") | 
|  | 15 | 
|  | 16 fasta_sequences = SeqIO.parse(open(fasta_file),'fasta'); | 
|  | 17 length_seq = {}; | 
|  | 18 for seq in fasta_sequences: | 
|  | 19         nuc = seq.id; | 
|  | 20         length_seq[nuc] = len(seq.seq.tostring()); | 
|  | 21 | 
|  | 22 | 
|  | 23 | 
|  | 24 mapping = {} | 
|  | 25 transcripts = [] | 
|  | 26 | 
|  | 27 f = open("map_info.txt"); | 
|  | 28 for aline in f.readlines(): | 
|  | 29     tline = aline.strip(); | 
|  | 30     tl = tline.split('\t'); | 
|  | 31     if tl[0].strip() not in transcripts: | 
|  | 32         transcripts.append(tl[0].strip()); | 
|  | 33         mapping[tl[0].strip()] = []; | 
|  | 34 | 
|  | 35     mapping[tl[0].strip()].append(tl[1].strip()); | 
|  | 36 | 
|  | 37 distribution = {}; | 
|  | 38 coverage = {}; | 
|  | 39 for transcript in length_seq: | 
|  | 40     distribution[transcript] = []; | 
|  | 41     for i in range(0, length_seq[transcript]): | 
|  | 42         distribution[transcript].append(0); | 
|  | 43     sum_count = float(0); | 
|  | 44     if transcript in mapping: | 
|  | 45         for j in range(0, len(mapping[transcript])): | 
|  | 46             index = mapping[transcript][j]; | 
|  | 47             #count = reads[mapping[transcript][j][0]]; | 
|  | 48             sum_count = sum_count + 1; | 
|  | 49             distribution[transcript][int(index)-1] = distribution[transcript][int(index)-1] + 1; | 
|  | 50             coverage[transcript] = float(sum_count)/float(length_seq[transcript]); | 
|  | 51     else: | 
|  | 52         coverage[transcript] = 0 | 
|  | 53 | 
|  | 54 | 
|  | 55 | 
|  | 56 | 
|  | 57 | 
|  | 58 h = file(result_file, 'w') | 
|  | 59 for transcript in length_seq: | 
|  | 60     h.write(transcript); | 
|  | 61     h.write('\n') | 
|  | 62     for i in range(0, length_seq[transcript]): | 
|  | 63         h.write(str(distribution[transcript][i])) | 
|  | 64         h.write('\t') | 
|  | 65     h.write('\n') | 
|  | 66     h.write('\n') | 
|  | 67 | 
|  | 68 | 
|  | 69 | 
|  | 70 | 
|  | 71 | 
|  | 72 f.close(); | 
|  | 73 h.close() | 
|  | 74 | 
|  | 75 | 
|  | 76 | 
|  | 77 |