annotate Disruptin_hydrophobicity_helicity_table_package.py @ 1:a99be535e99d draft

planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author cpt
date Mon, 05 Jun 2023 02:41:05 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
1 """
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
2 This program is intended to create the output table for the disruptin finder workflow
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
3 """
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
4 from Bio import SeqIO
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
5 from Bio.SeqUtils.ProtParam import ProteinAnalysis
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
6 from Bio.SeqUtils import ProtParamData
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
7 import csv
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
8 import argparse
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
9 import sys
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
10
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
11
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
12 def disruptin_table(garnier_file, fasta_file):
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
13 # Iterable variables
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
14 position = 1
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
15 net_charge = 0
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
16 charge_res = 0
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
17 record_number = 0
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
18
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
19 # loop structures
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
20 names = []
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
21 sec_struct = []
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
22
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
23 # reading the lines from the garnier csv file
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
24 # with open(garnier_file,'r') as csvfile:
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
25 # garnierreader = csv.reader(csvfile)
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
26 for row in garnier_file:
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
27 if row[0] == "Sequence: ":
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
28 names += [row[1]]
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
29 elif row[0] in "HETC":
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
30 row = row.split("\t")
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
31 sec_struct += ["".join(row)]
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
32
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
33 record = []
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
34 p = []
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
35 r = []
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
36 c = []
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
37 h = []
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
38 s = []
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
39
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
40 # Parse the .fasta file and get the sequence
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
41 for rec in SeqIO.parse(fasta_file, "fasta"):
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
42 sequence = str(rec.seq)
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
43
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
44 # Set up the information vectors: for position #, residue, hydrophobic/charge/polar/nonpolar, and secondary
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
45 # structure
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
46 record += [rec.id]
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
47 position_vec = []
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
48 residue_vec = []
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
49 charge_sym_vec = []
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
50 sec_struct_vec = []
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
51
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
52 for aa in sequence:
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
53 position_vec += [str(position)]
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
54 residue_vec += [str(aa)]
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
55 sec_struct_vec += [str(sec_struct[record_number][position - 1])]
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
56
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
57 # For R and K residues a positive charge is given
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
58 if aa in "RK":
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
59 symbol = "+"
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
60 # For D and E residues a negative charge is given
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
61 elif aa in "DE":
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
62 symbol = "-"
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
63 elif aa in "AVMILPWFG":
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
64 symbol = "N"
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
65 elif aa in "HSYTCQN":
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
66 symbol = "P"
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
67 charge_sym_vec += symbol
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
68 position += 1
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
69
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
70 # Calculating hyrophobicity based on Kyte and Doolittle scale. Using binning value of 9. Since the binning
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
71 # is 9, the first 4 residues and last 4 residues as set blank so as to center the values to their
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
72 # approximate position on the sequence.
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
73 prot_ana_seq = ProteinAnalysis(sequence)
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
74 hydro = [0] * 4 + prot_ana_seq.protein_scale(ProtParamData.kd, 9) + [0] * 4
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
75
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
76 record_number += 1
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
77 position = 1
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
78
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
79 p += [position_vec]
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
80 r += [residue_vec]
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
81 c += [charge_sym_vec]
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
82 h += [hydro]
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
83 s += [sec_struct_vec]
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
84
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
85 # returns values for name of the sequence
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
86 return record, p, r, c, h, s
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
87
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
88
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
89 if __name__ == "__main__":
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
90 # Grab all of the filters from our plugin loader
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
91 parser = argparse.ArgumentParser(description="Disruptin Table Output")
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
92 parser.add_argument(
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
93 "garnier_file", type=argparse.FileType("r"), help="csv file from garnier reader"
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
94 )
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
95 parser.add_argument(
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
96 "fasta_file",
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
97 type=argparse.FileType("r"),
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
98 help="fasta file of disruptin candidates",
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
99 )
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
100 args = parser.parse_args()
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
101
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
102 # Set up output location
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
103 # f = open(sys.stdout, 'w', newline='')
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
104 # writer1 = csv.writer(f)
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
105
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
106 iden, position, residue, charge, hydro, struct = disruptin_table(**vars(args))
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
107
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
108 for i in range(len(iden)):
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
109 # writer1.writerow(['Protein ID']+[iden[i]])
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
110 # writer1.writerow(['Position'] + [format(x, 's') for x in position[i]])
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
111 # writer1.writerow(['Residue'] + [format(x, 's') for x in residue[i]])
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
112 # writer1.writerow(['Charge'] + [format(x, 's') for x in charge[i]])
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
113 # writer1.writerow(['Hydrophobicity'] + [format(x, '.3f') for x in hydro[i]])
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
114 # writer1.writerow(['Secondary Structure'] + [format(x, 's') for x in struct[i]])
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
115 # writer1.writerow([''])
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
116
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
117 print(str(iden[i]))
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
118 print("Position \t " + "\t".join(position[i]))
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
119 print("Residue \t" + "\t".join(residue[i]))
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
120 print("Charge \t" + "\t".join(charge[i]))
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
121 print("Hydrophobicity \t" + "\t".join(format(x, ".3f") for x in hydro[i]))
a99be535e99d planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
122 print("Secondary Structure \t" + "\t".join(struct[i]))