Mercurial > repos > nedias > orf_tools
view orf_tool.py @ 7:0eda2c588bcd draft
Deleted selected files
author | nedias |
---|---|
date | Wed, 12 Oct 2016 18:13:02 -0400 |
parents | 0095bf758b19 |
children |
line wrap: on
line source
""" Actual function class of open reading frame searching tool Served as a bridge between util class and entry. Author Nedias Sept, 2016 """ from Bio import SeqIO from Bio.SeqRecord import SeqRecord import ORFFinder import os import GTranslator # Get command and parameter from entry and call corresponding function def exec_tool(options): # If format is fasta if options.format and options.format == "fasta": exec_fasta(options.input, options.outputa, options.outputd, options.length) # TODO: If format is fastq elif options.format and options.format == "fastq": print("Process Fastq File(TODO:Not Implemented)") # TODO: If format is sam elif options.format and options.format == "sam": print("Process Sam File(TODO:Not Implemented)") # TODO: If format is bam elif options.format and options.format == "bam": print("Process Bam File(TODO:Not Implemented)") # Read the input fasta file find all open reading frames(ORFs) # input: 1.in_file: input file in fasta format # 2.outputa: all ORFs found are saved in this file # 3.outputd: all ORFs longer than designated length are saved in this file # 4.length: filter all ORFs if less than percentage of the length of the longest ORF found # return: execute status code # TODO: Seq and Rev_seq need to be process in the same time def exec_fasta(in_file, output_all, output_dest, length): # Open input file(read only) input_file = open(in_file, "rU") # Open all match file(create or override) all_mth_file = open(output_all, "w+") # Open all match file(create or override) desi_file = open(output_dest, "w+") # Scan through all Sequenced data in input file for record in SeqIO.parse(input_file, "fasta"): # for each sequence, use function in ORFFinder to abstract all ORFs seq = record.seq # Get all start and end positions in +strand result = ORFFinder.get_all_orf(str(seq), False) # Get all start-end pairs in +strand pairs = ORFFinder.find_all_orf(result) # Reverse the sequenced data rev_seq = seq[::-1] # Get all start and end positions in -strand rev_result = ORFFinder.get_all_orf(str(rev_seq), True) # Get all start-end pairs in -strand rev_pairs = ORFFinder.find_all_orf(rev_result) # Get longest start-end pair of both strands longest_match = ORFFinder.get_longest_pair(pairs, rev_pairs) # Calculate the designated length match_length = int(longest_match * int(length) / 100) # All ORFs all_frags = [] # All designated ORFs desi_frags = [] # TODO: considering make the result in dictionary and make this four for-loop into 2 or 1 loop # For each pair in the +strand for pair in pairs[:-1]: # Intercept ORF from the original sequence using the start-end pair, and than translate the sequence # into polypeptide sequence frag = SeqRecord(GTranslator.nucleotide_to_polypeptide(record.seq[pair[0]:pair[1]], False), record.id + "|" + str(pair[0]) + "-" + str(pair[1]), '', '') all_frags.append(frag) # For each pair in the -strand for pair2 in rev_pairs[:-1]: # Intercept ORF from the original sequence using the start-end pair, and than translate the sequence # into polypeptide sequence frag = SeqRecord(GTranslator.nucleotide_to_polypeptide(rev_seq[pair2[0]:pair2[1]], True), record.id + "|" + str(len(rev_seq) - pair2[0]) + "-" + str(len(rev_seq) - pair2[1]), '', '') all_frags.append(frag) desi_pairs = ORFFinder.get_desi_pairs(pairs, match_length) rev_desi_pairs = ORFFinder.get_desi_pairs(rev_pairs, match_length) # For each designated pair in the +strand for pair in desi_pairs: # Intercept ORF from the original sequence using the start-end pair, and than translate the sequence # into polypeptide sequence frag = SeqRecord(GTranslator.nucleotide_to_polypeptide(seq[pair[0]:pair[1]], False), record.id + "|" + str(pair[0]) + "-" + str(pair[1]), '', '') desi_frags.append(frag) # For each designated pair in the strand for pair in rev_desi_pairs: # Intercept ORF from the original sequence using the start-end pair, and than translate the sequence # into polypeptide sequence frag = SeqRecord(GTranslator.nucleotide_to_polypeptide(rev_seq[pair[0]:pair[1]], True), record.id + "|" + str(len(rev_seq) - pair[0]) + "-" + str(len(rev_seq) - pair[1]), '', '') desi_frags.append(frag) # Write the result to output file SeqIO.write(all_frags, all_mth_file, "fasta") SeqIO.write(desi_frags, desi_file, "fasta") # Close file entry input_file.close() all_mth_file.close() desi_file.close() return 0