orf_tools: orf_tool.py comparison

comparison orf_tool.py @ 3:0095bf758b19 draft

Uploaded

author	nedias
date	Wed, 12 Oct 2016 00:03:53 -0400
parents
children

comparison

equal deleted inserted replaced

-:c56b8a6bd02e
+:0095bf758b19
+"""
+Actual function class of open reading frame searching tool
+Served as a bridge between util class and entry.
+Author Nedias Sept, 2016
+"""
+from Bio import SeqIO
+from Bio.SeqRecord import SeqRecord
+import ORFFinder
+import os
+import GTranslator
+# Get command and parameter from entry and call corresponding function
+def exec_tool(options):
+# If format is fasta
+if options.format and options.format == "fasta":
+exec_fasta(options.input, options.outputa, options.outputd, options.length)
+# TODO: If format is fastq
+elif options.format and options.format == "fastq":
+print("Process Fastq File(TODO:Not Implemented)")
+# TODO: If format is sam
+elif options.format and options.format == "sam":
+print("Process Sam File(TODO:Not Implemented)")
+# TODO: If format is bam
+elif options.format and options.format == "bam":
+print("Process Bam File(TODO:Not Implemented)")
+# Read the input fasta file find all open reading frames(ORFs)
+# input: 1.in_file: input file in fasta format
+#        2.outputa: all ORFs found are saved in this file
+#        3.outputd: all ORFs longer than designated length are saved in this file
+#        4.length: filter all ORFs if less than percentage of the length of the longest ORF found
+# return: execute status code
+# TODO: Seq and Rev_seq need to be process in the same time
+def exec_fasta(in_file, output_all, output_dest, length):
+# Open input file(read only)
+input_file = open(in_file, "rU")
+# Open all match file(create or override)
+all_mth_file = open(output_all, "w+")
+# Open all match file(create or override)
+desi_file = open(output_dest, "w+")
+# Scan through all Sequenced data in input file
+for record in SeqIO.parse(input_file, "fasta"):
+# for each sequence, use function in ORFFinder to abstract all ORFs
+seq = record.seq
+# Get all start and end positions in +strand
+result = ORFFinder.get_all_orf(str(seq), False)
+# Get all start-end pairs in +strand
+pairs = ORFFinder.find_all_orf(result)
+# Reverse the sequenced data
+rev_seq = seq[::-1]
+# Get all start and end positions in -strand
+rev_result = ORFFinder.get_all_orf(str(rev_seq), True)
+# Get all start-end pairs in -strand
+rev_pairs = ORFFinder.find_all_orf(rev_result)
+# Get longest start-end pair of both strands
+longest_match = ORFFinder.get_longest_pair(pairs, rev_pairs)
+# Calculate the designated length
+match_length = int(longest_match * int(length) / 100)
+# All ORFs
+all_frags = []
+# All designated ORFs
+desi_frags = []
+# TODO: considering make the result in dictionary and make this four for-loop into 2 or 1 loop
+# For each pair in the +strand
+for pair in pairs[:-1]:
+# Intercept ORF from the original sequence using the start-end pair, and than translate the sequence
+# into polypeptide sequence
+frag = SeqRecord(GTranslator.nucleotide_to_polypeptide(record.seq[pair[0]:pair[1]], False), record.id + "|"
++ str(pair[0]) + "-" + str(pair[1]),
+'', '')
+all_frags.append(frag)
+# For each pair in the -strand
+for pair2 in rev_pairs[:-1]:
+# Intercept ORF from the original sequence using the start-end pair, and than translate the sequence
+# into polypeptide sequence
+frag = SeqRecord(GTranslator.nucleotide_to_polypeptide(rev_seq[pair2[0]:pair2[1]], True),
+record.id + "|" + str(len(rev_seq) - pair2[0]) + "-" + str(len(rev_seq) - pair2[1]),
+'', '')
+all_frags.append(frag)
+desi_pairs = ORFFinder.get_desi_pairs(pairs, match_length)
+rev_desi_pairs = ORFFinder.get_desi_pairs(rev_pairs, match_length)
+# For each designated pair in the +strand
+for pair in desi_pairs:
+# Intercept ORF from the original sequence using the start-end pair, and than translate the sequence
+# into polypeptide sequence
+frag = SeqRecord(GTranslator.nucleotide_to_polypeptide(seq[pair[0]:pair[1]], False),
+record.id + "|" + str(pair[0]) + "-" + str(pair[1]), '', '')
+desi_frags.append(frag)
+# For each designated pair in the strand
+for pair in rev_desi_pairs:
+# Intercept ORF from the original sequence using the start-end pair, and than translate the sequence
+# into polypeptide sequence
+frag = SeqRecord(GTranslator.nucleotide_to_polypeptide(rev_seq[pair[0]:pair[1]], True),
+record.id + "|" + str(len(rev_seq) - pair[0]) + "-" + str(len(rev_seq) - pair[1]),
+'', '')
+desi_frags.append(frag)
+# Write the result to output file
+SeqIO.write(all_frags, all_mth_file, "fasta")
+SeqIO.write(desi_frags, desi_file, "fasta")
+# Close file entry
+input_file.close()
+all_mth_file.close()
+desi_file.close()
+return 0

Mercurial > repos > nedias > orf_tools

comparison orf_tool.py @ 3:0095bf758b19 draft