comparison Disruptin_hydrophobicity_helicity_table_package.py @ 1:a99be535e99d draft

planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author cpt
date Mon, 05 Jun 2023 02:41:05 +0000
parents
children
comparison
equal deleted inserted replaced
0:f3fc78cc4c43 1:a99be535e99d
1 """
2 This program is intended to create the output table for the disruptin finder workflow
3 """
4 from Bio import SeqIO
5 from Bio.SeqUtils.ProtParam import ProteinAnalysis
6 from Bio.SeqUtils import ProtParamData
7 import csv
8 import argparse
9 import sys
10
11
12 def disruptin_table(garnier_file, fasta_file):
13 # Iterable variables
14 position = 1
15 net_charge = 0
16 charge_res = 0
17 record_number = 0
18
19 # loop structures
20 names = []
21 sec_struct = []
22
23 # reading the lines from the garnier csv file
24 # with open(garnier_file,'r') as csvfile:
25 # garnierreader = csv.reader(csvfile)
26 for row in garnier_file:
27 if row[0] == "Sequence: ":
28 names += [row[1]]
29 elif row[0] in "HETC":
30 row = row.split("\t")
31 sec_struct += ["".join(row)]
32
33 record = []
34 p = []
35 r = []
36 c = []
37 h = []
38 s = []
39
40 # Parse the .fasta file and get the sequence
41 for rec in SeqIO.parse(fasta_file, "fasta"):
42 sequence = str(rec.seq)
43
44 # Set up the information vectors: for position #, residue, hydrophobic/charge/polar/nonpolar, and secondary
45 # structure
46 record += [rec.id]
47 position_vec = []
48 residue_vec = []
49 charge_sym_vec = []
50 sec_struct_vec = []
51
52 for aa in sequence:
53 position_vec += [str(position)]
54 residue_vec += [str(aa)]
55 sec_struct_vec += [str(sec_struct[record_number][position - 1])]
56
57 # For R and K residues a positive charge is given
58 if aa in "RK":
59 symbol = "+"
60 # For D and E residues a negative charge is given
61 elif aa in "DE":
62 symbol = "-"
63 elif aa in "AVMILPWFG":
64 symbol = "N"
65 elif aa in "HSYTCQN":
66 symbol = "P"
67 charge_sym_vec += symbol
68 position += 1
69
70 # Calculating hyrophobicity based on Kyte and Doolittle scale. Using binning value of 9. Since the binning
71 # is 9, the first 4 residues and last 4 residues as set blank so as to center the values to their
72 # approximate position on the sequence.
73 prot_ana_seq = ProteinAnalysis(sequence)
74 hydro = [0] * 4 + prot_ana_seq.protein_scale(ProtParamData.kd, 9) + [0] * 4
75
76 record_number += 1
77 position = 1
78
79 p += [position_vec]
80 r += [residue_vec]
81 c += [charge_sym_vec]
82 h += [hydro]
83 s += [sec_struct_vec]
84
85 # returns values for name of the sequence
86 return record, p, r, c, h, s
87
88
89 if __name__ == "__main__":
90 # Grab all of the filters from our plugin loader
91 parser = argparse.ArgumentParser(description="Disruptin Table Output")
92 parser.add_argument(
93 "garnier_file", type=argparse.FileType("r"), help="csv file from garnier reader"
94 )
95 parser.add_argument(
96 "fasta_file",
97 type=argparse.FileType("r"),
98 help="fasta file of disruptin candidates",
99 )
100 args = parser.parse_args()
101
102 # Set up output location
103 # f = open(sys.stdout, 'w', newline='')
104 # writer1 = csv.writer(f)
105
106 iden, position, residue, charge, hydro, struct = disruptin_table(**vars(args))
107
108 for i in range(len(iden)):
109 # writer1.writerow(['Protein ID']+[iden[i]])
110 # writer1.writerow(['Position'] + [format(x, 's') for x in position[i]])
111 # writer1.writerow(['Residue'] + [format(x, 's') for x in residue[i]])
112 # writer1.writerow(['Charge'] + [format(x, 's') for x in charge[i]])
113 # writer1.writerow(['Hydrophobicity'] + [format(x, '.3f') for x in hydro[i]])
114 # writer1.writerow(['Secondary Structure'] + [format(x, 's') for x in struct[i]])
115 # writer1.writerow([''])
116
117 print(str(iden[i]))
118 print("Position \t " + "\t".join(position[i]))
119 print("Residue \t" + "\t".join(residue[i]))
120 print("Charge \t" + "\t".join(charge[i]))
121 print("Hydrophobicity \t" + "\t".join(format(x, ".3f") for x in hydro[i]))
122 print("Secondary Structure \t" + "\t".join(struct[i]))