Mercurial > repos > nedias > orf_tools
comparison orf_tool.py @ 3:0095bf758b19 draft
Uploaded
author | nedias |
---|---|
date | Wed, 12 Oct 2016 00:03:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:c56b8a6bd02e | 3:0095bf758b19 |
---|---|
1 """ | |
2 Actual function class of open reading frame searching tool | |
3 Served as a bridge between util class and entry. | |
4 | |
5 Author Nedias Sept, 2016 | |
6 """ | |
7 from Bio import SeqIO | |
8 from Bio.SeqRecord import SeqRecord | |
9 import ORFFinder | |
10 import os | |
11 import GTranslator | |
12 | |
13 | |
14 # Get command and parameter from entry and call corresponding function | |
15 def exec_tool(options): | |
16 # If format is fasta | |
17 if options.format and options.format == "fasta": | |
18 exec_fasta(options.input, options.outputa, options.outputd, options.length) | |
19 # TODO: If format is fastq | |
20 elif options.format and options.format == "fastq": | |
21 print("Process Fastq File(TODO:Not Implemented)") | |
22 # TODO: If format is sam | |
23 elif options.format and options.format == "sam": | |
24 print("Process Sam File(TODO:Not Implemented)") | |
25 # TODO: If format is bam | |
26 elif options.format and options.format == "bam": | |
27 print("Process Bam File(TODO:Not Implemented)") | |
28 | |
29 | |
30 # Read the input fasta file find all open reading frames(ORFs) | |
31 # input: 1.in_file: input file in fasta format | |
32 # 2.outputa: all ORFs found are saved in this file | |
33 # 3.outputd: all ORFs longer than designated length are saved in this file | |
34 # 4.length: filter all ORFs if less than percentage of the length of the longest ORF found | |
35 # return: execute status code | |
36 # TODO: Seq and Rev_seq need to be process in the same time | |
37 def exec_fasta(in_file, output_all, output_dest, length): | |
38 | |
39 # Open input file(read only) | |
40 input_file = open(in_file, "rU") | |
41 # Open all match file(create or override) | |
42 all_mth_file = open(output_all, "w+") | |
43 # Open all match file(create or override) | |
44 desi_file = open(output_dest, "w+") | |
45 | |
46 # Scan through all Sequenced data in input file | |
47 for record in SeqIO.parse(input_file, "fasta"): | |
48 | |
49 # for each sequence, use function in ORFFinder to abstract all ORFs | |
50 seq = record.seq | |
51 # Get all start and end positions in +strand | |
52 result = ORFFinder.get_all_orf(str(seq), False) | |
53 # Get all start-end pairs in +strand | |
54 pairs = ORFFinder.find_all_orf(result) | |
55 | |
56 # Reverse the sequenced data | |
57 rev_seq = seq[::-1] | |
58 # Get all start and end positions in -strand | |
59 rev_result = ORFFinder.get_all_orf(str(rev_seq), True) | |
60 # Get all start-end pairs in -strand | |
61 rev_pairs = ORFFinder.find_all_orf(rev_result) | |
62 | |
63 # Get longest start-end pair of both strands | |
64 longest_match = ORFFinder.get_longest_pair(pairs, rev_pairs) | |
65 | |
66 # Calculate the designated length | |
67 match_length = int(longest_match * int(length) / 100) | |
68 | |
69 # All ORFs | |
70 all_frags = [] | |
71 # All designated ORFs | |
72 desi_frags = [] | |
73 | |
74 # TODO: considering make the result in dictionary and make this four for-loop into 2 or 1 loop | |
75 # For each pair in the +strand | |
76 for pair in pairs[:-1]: | |
77 # Intercept ORF from the original sequence using the start-end pair, and than translate the sequence | |
78 # into polypeptide sequence | |
79 frag = SeqRecord(GTranslator.nucleotide_to_polypeptide(record.seq[pair[0]:pair[1]], False), record.id + "|" | |
80 + str(pair[0]) + "-" + str(pair[1]), | |
81 '', '') | |
82 all_frags.append(frag) | |
83 | |
84 # For each pair in the -strand | |
85 for pair2 in rev_pairs[:-1]: | |
86 # Intercept ORF from the original sequence using the start-end pair, and than translate the sequence | |
87 # into polypeptide sequence | |
88 frag = SeqRecord(GTranslator.nucleotide_to_polypeptide(rev_seq[pair2[0]:pair2[1]], True), | |
89 record.id + "|" + str(len(rev_seq) - pair2[0]) + "-" + str(len(rev_seq) - pair2[1]), | |
90 '', '') | |
91 all_frags.append(frag) | |
92 | |
93 desi_pairs = ORFFinder.get_desi_pairs(pairs, match_length) | |
94 rev_desi_pairs = ORFFinder.get_desi_pairs(rev_pairs, match_length) | |
95 | |
96 # For each designated pair in the +strand | |
97 for pair in desi_pairs: | |
98 # Intercept ORF from the original sequence using the start-end pair, and than translate the sequence | |
99 # into polypeptide sequence | |
100 frag = SeqRecord(GTranslator.nucleotide_to_polypeptide(seq[pair[0]:pair[1]], False), | |
101 record.id + "|" + str(pair[0]) + "-" + str(pair[1]), '', '') | |
102 desi_frags.append(frag) | |
103 | |
104 # For each designated pair in the strand | |
105 for pair in rev_desi_pairs: | |
106 # Intercept ORF from the original sequence using the start-end pair, and than translate the sequence | |
107 # into polypeptide sequence | |
108 frag = SeqRecord(GTranslator.nucleotide_to_polypeptide(rev_seq[pair[0]:pair[1]], True), | |
109 record.id + "|" + str(len(rev_seq) - pair[0]) + "-" + str(len(rev_seq) - pair[1]), | |
110 '', '') | |
111 desi_frags.append(frag) | |
112 | |
113 # Write the result to output file | |
114 SeqIO.write(all_frags, all_mth_file, "fasta") | |
115 SeqIO.write(desi_frags, desi_file, "fasta") | |
116 | |
117 # Close file entry | |
118 input_file.close() | |
119 all_mth_file.close() | |
120 desi_file.close() | |
121 | |
122 return 0 |