diff Disruptin_hydrophobicity_helicity_table_package.py @ 1:a99be535e99d draft

planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author cpt
date Mon, 05 Jun 2023 02:41:05 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Disruptin_hydrophobicity_helicity_table_package.py	Mon Jun 05 02:41:05 2023 +0000
@@ -0,0 +1,122 @@
+"""
+This program is intended to create the output table for the disruptin finder workflow
+"""
+from Bio import SeqIO
+from Bio.SeqUtils.ProtParam import ProteinAnalysis
+from Bio.SeqUtils import ProtParamData
+import csv
+import argparse
+import sys
+
+
+def disruptin_table(garnier_file, fasta_file):
+    # Iterable variables
+    position = 1
+    net_charge = 0
+    charge_res = 0
+    record_number = 0
+
+    # loop structures
+    names = []
+    sec_struct = []
+
+    # reading the lines from the garnier csv file
+    #    with open(garnier_file,'r') as csvfile:
+    #        garnierreader = csv.reader(csvfile)
+    for row in garnier_file:
+        if row[0] == "Sequence: ":
+            names += [row[1]]
+        elif row[0] in "HETC":
+            row = row.split("\t")
+            sec_struct += ["".join(row)]
+
+    record = []
+    p = []
+    r = []
+    c = []
+    h = []
+    s = []
+
+    # Parse the .fasta file and get the sequence
+    for rec in SeqIO.parse(fasta_file, "fasta"):
+        sequence = str(rec.seq)
+
+        # Set up the information vectors: for position #, residue, hydrophobic/charge/polar/nonpolar, and secondary
+        # structure
+        record += [rec.id]
+        position_vec = []
+        residue_vec = []
+        charge_sym_vec = []
+        sec_struct_vec = []
+
+        for aa in sequence:
+            position_vec += [str(position)]
+            residue_vec += [str(aa)]
+            sec_struct_vec += [str(sec_struct[record_number][position - 1])]
+
+            # For R and K residues a positive charge is given
+            if aa in "RK":
+                symbol = "+"
+            # For D and E residues a negative charge is given
+            elif aa in "DE":
+                symbol = "-"
+            elif aa in "AVMILPWFG":
+                symbol = "N"
+            elif aa in "HSYTCQN":
+                symbol = "P"
+            charge_sym_vec += symbol
+            position += 1
+
+            # Calculating hyrophobicity based on Kyte and Doolittle scale. Using binning value of 9. Since the binning
+            # is 9, the first 4 residues and last 4 residues as set blank so as to center the values to their
+            # approximate position on the sequence.
+            prot_ana_seq = ProteinAnalysis(sequence)
+            hydro = [0] * 4 + prot_ana_seq.protein_scale(ProtParamData.kd, 9) + [0] * 4
+
+        record_number += 1
+        position = 1
+
+        p += [position_vec]
+        r += [residue_vec]
+        c += [charge_sym_vec]
+        h += [hydro]
+        s += [sec_struct_vec]
+
+    # returns values for name of the sequence
+    return record, p, r, c, h, s
+
+
+if __name__ == "__main__":
+    # Grab all of the filters from our plugin loader
+    parser = argparse.ArgumentParser(description="Disruptin Table Output")
+    parser.add_argument(
+        "garnier_file", type=argparse.FileType("r"), help="csv file from garnier reader"
+    )
+    parser.add_argument(
+        "fasta_file",
+        type=argparse.FileType("r"),
+        help="fasta file of disruptin candidates",
+    )
+    args = parser.parse_args()
+
+    # Set up output location
+    #    f = open(sys.stdout, 'w', newline='')
+    #    writer1 = csv.writer(f)
+
+    iden, position, residue, charge, hydro, struct = disruptin_table(**vars(args))
+
+    for i in range(len(iden)):
+        #        writer1.writerow(['Protein ID']+[iden[i]])
+        #        writer1.writerow(['Position'] + [format(x, 's') for x in position[i]])
+        #        writer1.writerow(['Residue'] + [format(x, 's') for x in residue[i]])
+        #        writer1.writerow(['Charge'] + [format(x, 's') for x in charge[i]])
+        #        writer1.writerow(['Hydrophobicity'] + [format(x, '.3f') for x in hydro[i]])
+        #        writer1.writerow(['Secondary Structure'] + [format(x, 's') for x in struct[i]])
+        #        writer1.writerow([''])
+
+        print(str(iden[i]))
+        print("Position \t " + "\t".join(position[i]))
+        print("Residue \t" + "\t".join(residue[i]))
+        print("Charge \t" + "\t".join(charge[i]))
+        print("Hydrophobicity \t" + "\t".join(format(x, ".3f") for x in hydro[i]))
+        print("Secondary Structure \t" + "\t".join(struct[i]))