| 
2
 | 
     1 #!/usr/bin/env python
 | 
| 
 | 
     2 # -*- coding: utf-8 -*-
 | 
| 
 | 
     3 
 | 
| 
 | 
     4 import sys
 | 
| 
 | 
     5 #from galaxy.tools.read_file import *
 | 
| 
 | 
     6 from Bio import SeqIO
 | 
| 
 | 
     7 import os
 | 
| 
 | 
     8 from read_file import *
 | 
| 
 | 
     9 
 | 
| 
 | 
    10 fasta_file = sys.argv[1]
 | 
| 
 | 
    11 map_file = sys.argv[2]
 | 
| 
 | 
    12 result_file = sys.argv[3]
 | 
| 
 | 
    13 
 | 
| 
 | 
    14 os.system("samtools view -F 0xfff "+map_file+"|cut -f 3,4 > map_info.txt") 
 | 
| 
 | 
    15 
 | 
| 
 | 
    16 fasta_sequences = SeqIO.parse(open(fasta_file),'fasta');
 | 
| 
 | 
    17 length_seq = {};
 | 
| 
 | 
    18 for seq in fasta_sequences:
 | 
| 
 | 
    19         nuc = seq.id;
 | 
| 
 | 
    20         length_seq[nuc] = len(seq.seq.tostring());
 | 
| 
 | 
    21 
 | 
| 
 | 
    22 
 | 
| 
 | 
    23 
 | 
| 
 | 
    24 mapping = {}
 | 
| 
 | 
    25 transcripts = []
 | 
| 
 | 
    26 
 | 
| 
 | 
    27 f = open("map_info.txt");
 | 
| 
 | 
    28 for aline in f.readlines():
 | 
| 
 | 
    29     tline = aline.strip();
 | 
| 
 | 
    30     tl = tline.split('\t');
 | 
| 
 | 
    31     if tl[0].strip() not in transcripts:
 | 
| 
 | 
    32         transcripts.append(tl[0].strip());
 | 
| 
 | 
    33         mapping[tl[0].strip()] = [];
 | 
| 
 | 
    34 
 | 
| 
 | 
    35     mapping[tl[0].strip()].append(tl[1].strip());
 | 
| 
 | 
    36 
 | 
| 
 | 
    37 distribution = {};
 | 
| 
 | 
    38 coverage = {};
 | 
| 
 | 
    39 for transcript in length_seq:
 | 
| 
 | 
    40     distribution[transcript] = [];
 | 
| 
 | 
    41     for i in range(0, length_seq[transcript]):
 | 
| 
 | 
    42         distribution[transcript].append(0);
 | 
| 
 | 
    43     sum_count = float(0);
 | 
| 
 | 
    44     if transcript in mapping:
 | 
| 
 | 
    45         for j in range(0, len(mapping[transcript])):
 | 
| 
 | 
    46             index = mapping[transcript][j];
 | 
| 
 | 
    47             #count = reads[mapping[transcript][j][0]];
 | 
| 
 | 
    48             sum_count = sum_count + 1;
 | 
| 
 | 
    49             distribution[transcript][int(index)-1] = distribution[transcript][int(index)-1] + 1;
 | 
| 
 | 
    50             coverage[transcript] = float(sum_count)/float(length_seq[transcript]);
 | 
| 
 | 
    51     else:
 | 
| 
 | 
    52         coverage[transcript] = 0
 | 
| 
 | 
    53 
 | 
| 
 | 
    54         
 | 
| 
 | 
    55         
 | 
| 
 | 
    56     
 | 
| 
 | 
    57 
 | 
| 
 | 
    58 h = file(result_file, 'w')
 | 
| 
 | 
    59 for transcript in length_seq:
 | 
| 
 | 
    60     h.write(transcript);
 | 
| 
 | 
    61     h.write('\n')
 | 
| 
 | 
    62     for i in range(0, length_seq[transcript]):
 | 
| 
 | 
    63         h.write(str(distribution[transcript][i]))
 | 
| 
 | 
    64         h.write('\t')
 | 
| 
 | 
    65     h.write('\n')
 | 
| 
 | 
    66     h.write('\n')
 | 
| 
 | 
    67 
 | 
| 
 | 
    68 
 | 
| 
 | 
    69 
 | 
| 
 | 
    70     
 | 
| 
 | 
    71 
 | 
| 
 | 
    72 f.close();
 | 
| 
 | 
    73 h.close()
 | 
| 
 | 
    74 
 | 
| 
 | 
    75 
 | 
| 
 | 
    76 
 | 
| 
 | 
    77 
 |