diff biopython_parsing.py @ 1:112751823323 draft

planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author cpt
date Mon, 05 Jun 2023 02:52:57 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/biopython_parsing.py	Mon Jun 05 02:52:57 2023 +0000
@@ -0,0 +1,24 @@
+#!/usr/bin/env python
+# Biopython parsing module. Uses in conjunction with the sar_finder script, and potential future scripts down the line.
+
+from Bio import SeqIO
+
+
+class FASTA_parser:
+    """Parses multi fasta file, and zips together header with sequence"""
+
+    def __init__(self, fa):
+        self.fa = fa
+
+    def multifasta_dict(self):
+        """parses the input multi fasta, and puts results into dictionary"""
+
+        return SeqIO.to_dict(SeqIO.parse(self.fa, "fasta"))
+
+
+if __name__ == "__main__":
+    fa_file = "test-data/mu-proteins.fa"
+    d = FASTA_parser(fa_file).multifasta_dict()
+    print(d)
+    for k, v in d.items():
+        print(v.description)