| 
62
 | 
     1 #!/usr/bin/env python
 | 
| 
 | 
     2 # -*- coding: utf-8 -*-
 | 
| 
 | 
     3 
 | 
| 
 | 
     4 import sys
 | 
| 
 | 
     5 from Bio import SeqIO
 | 
| 
 | 
     6 import os
 | 
| 
 | 
     7 from read_file import *
 | 
| 
 | 
     8 import random
 | 
| 
 | 
     9 import string
 | 
| 
 | 
    10 
 | 
| 
 | 
    11 fasta_file = sys.argv[1]
 | 
| 
 | 
    12 map_file = sys.argv[2]
 | 
| 
 | 
    13 result_file = sys.argv[3]
 | 
| 
 | 
    14 
 | 
| 
 | 
    15 syspathrs = os.getcwd()
 | 
| 
 | 
    16 
 | 
| 
 | 
    17 os.system("samtools view -F 0xfff "+map_file+"|cut -f 3,4 > "+syspathrs+"map_info.txt") 
 | 
| 
 | 
    18 
 | 
| 
 | 
    19 fasta_sequences = SeqIO.parse(open(fasta_file),'fasta');
 | 
| 
 | 
    20 length_seq = {};
 | 
| 
 | 
    21 for seq in fasta_sequences:
 | 
| 
 | 
    22         nuc = seq.id;
 | 
| 
 | 
    23         length_seq[nuc] = len(seq.seq.tostring());
 | 
| 
 | 
    24 
 | 
| 
 | 
    25 
 | 
| 
 | 
    26 
 | 
| 
 | 
    27 mapping = {}
 | 
| 
 | 
    28 transcripts = []
 | 
| 
 | 
    29 
 | 
| 
 | 
    30 f = open(syspathrs+"map_info.txt");
 | 
| 
 | 
    31 for aline in f.readlines():
 | 
| 
 | 
    32     tline = aline.strip();
 | 
| 
 | 
    33     tl = tline.split('\t');
 | 
| 
 | 
    34     if tl[0].strip() not in transcripts:
 | 
| 
 | 
    35         transcripts.append(tl[0].strip());
 | 
| 
 | 
    36         mapping[tl[0].strip()] = [];
 | 
| 
 | 
    37 
 | 
| 
 | 
    38     mapping[tl[0].strip()].append(tl[1].strip());
 | 
| 
 | 
    39 
 | 
| 
 | 
    40 distribution = {};
 | 
| 
 | 
    41 coverage = {};
 | 
| 
 | 
    42 for transcript in length_seq:
 | 
| 
 | 
    43     distribution[transcript] = [];
 | 
| 
 | 
    44     for i in range(0, length_seq[transcript]):
 | 
| 
 | 
    45         distribution[transcript].append(0);
 | 
| 
 | 
    46     sum_count = float(0);
 | 
| 
 | 
    47     if transcript in mapping:
 | 
| 
 | 
    48         for j in range(0, len(mapping[transcript])):
 | 
| 
 | 
    49             index = mapping[transcript][j];
 | 
| 
 | 
    50             #count = reads[mapping[transcript][j][0]];
 | 
| 
 | 
    51             sum_count = sum_count + 1;
 | 
| 
 | 
    52             distribution[transcript][int(index)-1] = distribution[transcript][int(index)-1] + 1;
 | 
| 
 | 
    53             coverage[transcript] = float(sum_count)/float(length_seq[transcript]);
 | 
| 
 | 
    54     else:
 | 
| 
 | 
    55         coverage[transcript] = 0
 | 
| 
 | 
    56 
 | 
| 
 | 
    57         
 | 
| 
 | 
    58         
 | 
| 
 | 
    59     
 | 
| 
 | 
    60 
 | 
| 
 | 
    61 h = file(result_file, 'w')
 | 
| 
 | 
    62 for transcript in length_seq:
 | 
| 
 | 
    63     h.write(transcript);
 | 
| 
 | 
    64     h.write('\n')
 | 
| 
 | 
    65     for i in range(0, length_seq[transcript]):
 | 
| 
 | 
    66         h.write(str(distribution[transcript][i]))
 | 
| 
 | 
    67         h.write('\t')
 | 
| 
 | 
    68     h.write('\n')
 | 
| 
 | 
    69     h.write('\n')
 | 
| 
 | 
    70 
 | 
| 
 | 
    71 #os.system("rm -r "+syspathrs)
 | 
| 
 | 
    72 
 | 
| 
 | 
    73     
 | 
| 
 | 
    74 
 | 
| 
 | 
    75 f.close();
 | 
| 
 | 
    76 h.close()
 | 
| 
 | 
    77 
 | 
| 
 | 
    78 
 | 
| 
 | 
    79 
 | 
| 
 | 
    80 
 |