Mercurial > repos > tyty > structurefold
comparison get_reads/get_read.py @ 25:90bc1ead3abd draft
Uploaded
| author | tyty |
|---|---|
| date | Mon, 20 Oct 2014 14:41:41 -0400 |
| parents | 297cdb01d656 |
| children |
comparison
equal
deleted
inserted
replaced
| 24:fcc92680e802 | 25:90bc1ead3abd |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 # -*- coding: utf-8 -*- | |
| 3 | |
| 4 import sys | |
| 5 #from galaxy.tools.read_file import * | |
| 6 from Bio import SeqIO | |
| 7 import os | |
| 8 from read_file import * | |
| 9 | |
| 10 fasta_file = sys.argv[1] | |
| 11 map_file = sys.argv[2] | |
| 12 result_file = sys.argv[3] | |
| 13 | |
| 14 os.system("samtools view -F 0xfff "+map_file+"|cut -f 3,4 > map_info.txt") | |
| 15 | |
| 16 fasta_sequences = SeqIO.parse(open(fasta_file),'fasta'); | |
| 17 length_seq = {}; | |
| 18 for seq in fasta_sequences: | |
| 19 nuc = seq.id; | |
| 20 length_seq[nuc] = len(seq.seq.tostring()); | |
| 21 | |
| 22 | |
| 23 | |
| 24 mapping = {} | |
| 25 transcripts = [] | |
| 26 | |
| 27 f = open("map_info.txt"); | |
| 28 for aline in f.readlines(): | |
| 29 tline = aline.strip(); | |
| 30 tl = tline.split('\t'); | |
| 31 if tl[0].strip() not in transcripts: | |
| 32 transcripts.append(tl[0].strip()); | |
| 33 mapping[tl[0].strip()] = []; | |
| 34 | |
| 35 mapping[tl[0].strip()].append(tl[1].strip()); | |
| 36 | |
| 37 distribution = {}; | |
| 38 coverage = {}; | |
| 39 for transcript in length_seq: | |
| 40 distribution[transcript] = []; | |
| 41 for i in range(0, length_seq[transcript]): | |
| 42 distribution[transcript].append(0); | |
| 43 sum_count = float(0); | |
| 44 if transcript in mapping: | |
| 45 for j in range(0, len(mapping[transcript])): | |
| 46 index = mapping[transcript][j]; | |
| 47 #count = reads[mapping[transcript][j][0]]; | |
| 48 sum_count = sum_count + 1; | |
| 49 distribution[transcript][int(index)-1] = distribution[transcript][int(index)-1] + 1; | |
| 50 coverage[transcript] = float(sum_count)/float(length_seq[transcript]); | |
| 51 else: | |
| 52 coverage[transcript] = 0 | |
| 53 | |
| 54 | |
| 55 | |
| 56 | |
| 57 | |
| 58 h = file(result_file, 'w') | |
| 59 for transcript in length_seq: | |
| 60 h.write(transcript); | |
| 61 h.write('\n') | |
| 62 for i in range(0, length_seq[transcript]): | |
| 63 h.write(str(distribution[transcript][i])) | |
| 64 h.write('\t') | |
| 65 h.write('\n') | |
| 66 h.write('\n') | |
| 67 | |
| 68 | |
| 69 | |
| 70 | |
| 71 | |
| 72 f.close(); | |
| 73 h.close() | |
| 74 | |
| 75 | |
| 76 | |
| 77 |
