Mercurial > repos > tyty > structurefold
comparison get_reads/get_read.py @ 62:58df1060fea1 draft
Uploaded
| author | tyty |
|---|---|
| date | Tue, 18 Nov 2014 15:53:42 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 61:563b7b27cb52 | 62:58df1060fea1 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 # -*- coding: utf-8 -*- | |
| 3 | |
| 4 import sys | |
| 5 from Bio import SeqIO | |
| 6 import os | |
| 7 from read_file import * | |
| 8 import random | |
| 9 import string | |
| 10 | |
| 11 fasta_file = sys.argv[1] | |
| 12 map_file = sys.argv[2] | |
| 13 result_file = sys.argv[3] | |
| 14 | |
| 15 syspathrs = os.getcwd() | |
| 16 | |
| 17 os.system("samtools view -F 0xfff "+map_file+"|cut -f 3,4 > "+syspathrs+"map_info.txt") | |
| 18 | |
| 19 fasta_sequences = SeqIO.parse(open(fasta_file),'fasta'); | |
| 20 length_seq = {}; | |
| 21 for seq in fasta_sequences: | |
| 22 nuc = seq.id; | |
| 23 length_seq[nuc] = len(seq.seq.tostring()); | |
| 24 | |
| 25 | |
| 26 | |
| 27 mapping = {} | |
| 28 transcripts = [] | |
| 29 | |
| 30 f = open(syspathrs+"map_info.txt"); | |
| 31 for aline in f.readlines(): | |
| 32 tline = aline.strip(); | |
| 33 tl = tline.split('\t'); | |
| 34 if tl[0].strip() not in transcripts: | |
| 35 transcripts.append(tl[0].strip()); | |
| 36 mapping[tl[0].strip()] = []; | |
| 37 | |
| 38 mapping[tl[0].strip()].append(tl[1].strip()); | |
| 39 | |
| 40 distribution = {}; | |
| 41 coverage = {}; | |
| 42 for transcript in length_seq: | |
| 43 distribution[transcript] = []; | |
| 44 for i in range(0, length_seq[transcript]): | |
| 45 distribution[transcript].append(0); | |
| 46 sum_count = float(0); | |
| 47 if transcript in mapping: | |
| 48 for j in range(0, len(mapping[transcript])): | |
| 49 index = mapping[transcript][j]; | |
| 50 #count = reads[mapping[transcript][j][0]]; | |
| 51 sum_count = sum_count + 1; | |
| 52 distribution[transcript][int(index)-1] = distribution[transcript][int(index)-1] + 1; | |
| 53 coverage[transcript] = float(sum_count)/float(length_seq[transcript]); | |
| 54 else: | |
| 55 coverage[transcript] = 0 | |
| 56 | |
| 57 | |
| 58 | |
| 59 | |
| 60 | |
| 61 h = file(result_file, 'w') | |
| 62 for transcript in length_seq: | |
| 63 h.write(transcript); | |
| 64 h.write('\n') | |
| 65 for i in range(0, length_seq[transcript]): | |
| 66 h.write(str(distribution[transcript][i])) | |
| 67 h.write('\t') | |
| 68 h.write('\n') | |
| 69 h.write('\n') | |
| 70 | |
| 71 #os.system("rm -r "+syspathrs) | |
| 72 | |
| 73 | |
| 74 | |
| 75 f.close(); | |
| 76 h.close() | |
| 77 | |
| 78 | |
| 79 | |
| 80 |
