annotate DPOGALAXY.py @ 32:5a0afb1578ea draft

Uploaded
author jose_duarte
date Wed, 15 Feb 2023 09:57:01 +0000
parents ce0de724097a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
32
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
1 import pickle
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
2 from Bio import SeqIO
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
3 import os
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
4 import pandas as pd
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
5 import numpy as np
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
6 from local_ctd import CalculateCTD
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
7 from local_AAComposition import CalculateDipeptideComposition
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
8 import sys
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
9 from Bio.SeqUtils.ProtParam import ProteinAnalysis
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
10
25
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
11
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
12 class PDPOPrediction:
32
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
13
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
14 def __init__(self, folder='location', mdl='', seq_file='fasta_file.fasta', ttable=11):
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
15 """
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
16 Initialize PhageDPO prediction.
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
17 :param folder: data path
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
18 :param mdl: ml model, in this case ANN or SVM
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
19 :param seq_file: fasta file
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
20 :param ttable: Translational table. By default, The Bacterial, Archaeal and Plant Plastid Code Table 11
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
21 """
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
22 self.records = []
25
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
23 self.data = {}
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
24 self.df_output = None
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
25 self.seqfile = seq_file
32
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
26 self.__location__ = os.path.realpath(os.path.join(os.getcwd(), folder))
25
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
27
32
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
28 with open(os.path.join(self.__location__, mdl), 'rb') as m:
25
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
29 self.model = pickle.load(m)
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
30 if mdl == 'SVM4311':
32
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
31 with open(os.path.join(__location__, 'd4311_SCALER'), 'rb') as sl:
25
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
32 self.scaler = pickle.load(sl)
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
33 self.name = mdl
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
34 elif mdl == 'ANN7185':
32
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
35 with open(os.path.join(__location__, 'd7185_SCALER'), 'rb') as sc:
25
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
36 self.scaler = pickle.load(sc)
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
37 self.name = mdl
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
38
32
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
39 for seq in SeqIO.parse(os.path.join(self.__location__, self.seqfile), 'fasta'):
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
40 record = []
25
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
41 DNA_seq = seq.seq
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
42 AA_seq = DNA_seq.translate(table=ttable)
32
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
43 descr_seq = seq.description.replace(' ', '')
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
44 self.data[descr_seq] = [DNA_seq._data, AA_seq._data]
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
45 record.append(seq.description)
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
46 record.append(DNA_seq._data)
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
47 record.append(AA_seq._data)
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
48 self.records.append(record)
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
49
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
50 columns = ['ID', 'DNAseq', 'AAseq']
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
51 self.df = pd.DataFrame(self.records, columns=columns)
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
52 #self.df = self.df.set_index('ID')
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
53 self.df.update(self.df.DNAseq[self.df.DNAseq.apply(type) == list].str[0])
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
54 self.df.update(self.df.AAseq[self.df.AAseq.apply(type) == list].str[0])
25
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
55
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
56 def Datastructure(self):
32
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
57 """
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
58 Create dataset with all features
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
59 """
25
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
60 def count_orf(orf_seq):
32
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
61 """
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
62 Function to count open reading frames
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
63 :param orf_seq: sequence to analyze
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
64 :return: dictionary with open reading frames
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
65 """
25
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
66 dic = {'DNA-A': 0, 'DNA-C': 0, 'DNA-T': 0, 'DNA-G': 0, 'DNA-GC': 0}
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
67 for letter in range(len(orf_seq)):
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
68 for k in range(0, 4):
32
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
69 if str(orf_seq[letter]) in list(dic.keys())[k][-1]:
25
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
70 dic[list(dic.keys())[k]] += 1
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
71 dic['DNA-GC'] = ((dic['DNA-C'] + dic['DNA-G']) / (
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
72 dic['DNA-A'] + dic['DNA-C'] + dic['DNA-T'] + dic['DNA-G'])) * 100
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
73 return dic
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
74
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
75 def count_aa(aa_seq):
32
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
76 """
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
77 Function to count amino acids
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
78 :param aa_seq: sequence to analyze
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
79 :return: dictionary with amino acid composition
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
80 """
25
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
81 dic = {'G': 0, 'A': 0, 'L': 0, 'V': 0, 'I': 0, 'P': 0, 'F': 0, 'S': 0, 'T': 0, 'C': 0,
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
82 'Y': 0, 'N': 0, 'Q': 0, 'D': 0, 'E': 0, 'R': 0, 'K': 0, 'H': 0, 'W': 0, 'M': 0}
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
83 for letter in range(len(aa_seq)):
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
84 if aa_seq[letter] in dic.keys():
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
85 dic[aa_seq[letter]] += 1
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
86 return dic
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
87
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
88 def sec_st_fr(aa_seq):
32
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
89 """
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
90 Function to analyze secondary structure. Helix, Turn and Sheet
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
91 :param aa_seq: sequence to analyze
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
92 :return: dictionary with composition of each secondary structure
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
93 """
25
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
94 st_dic = {'Helix': 0, 'Turn': 0, 'Sheet': 0}
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
95 stu = ProteinAnalysis(aa_seq).secondary_structure_fraction()
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
96 st_dic['Helix'] = stu[0]
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
97 st_dic['Turn'] = stu[1]
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
98 st_dic['Sheet'] = stu[2]
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
99 return st_dic
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
100
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
101 self.feat={"SVM4311": ["DNA-A", "DNA-T", "DNA-G", "DNA-GC", "AA_Len", "G", "A", "S", "T", "N", "Turn", "Sheet",
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
102 "_PolarizabilityC1", "_PolarizabilityC3", "_SolventAccessibilityC1", "_SecondaryStrC1",
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
103 "_SecondaryStrC2", "_SecondaryStrC3", "_ChargeC2", "_ChargeC3", "_PolarityC1", "_NormalizedVDWVC1",
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
104 "_NormalizedVDWVC3", "_HydrophobicityC2", "_HydrophobicityC3", "_SecondaryStrT23",
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
105 "_NormalizedVDWVT13", "_PolarizabilityD1001", "_SolventAccessibilityD1001", "_SolventAccessibilityD2001",
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
106 "_SolventAccessibilityD3001", "_SecondaryStrD1025", "_ChargeD1075","_ChargeD2001", "_ChargeD2025",
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
107 "_ChargeD3025", "_ChargeD3050", "_PolarityD1075", "_PolarityD3025","_NormalizedVDWVD1001",
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
108 "_NormalizedVDWVD3050", "_HydrophobicityD2001", "DG", "DT", "GD"],
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
109 "ANN7185": ["DNA-GC", "AA_Len", "Aromaticity", "IsoelectricPoint", "G", "A", "L", "V", "I", "P", "F",
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
110 "S", "T", "C", "Y", "N", "Q", "D", "E", "R", "K", "H", "W", "M", "Turn", "Sheet", "_PolarizabilityC1",
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
111 "_PolarizabilityC2", "_PolarizabilityC3", "_SolventAccessibilityC1", "_SolventAccessibilityC2",
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
112 "_SecondaryStrC1", "_SecondaryStrC3", "_ChargeC1", "_ChargeC2", "_ChargeC3", "_PolarityC2",
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
113 "_NormalizedVDWVC2", "_NormalizedVDWVC3", "_HydrophobicityC1", "_HydrophobicityC2", "_SecondaryStrT13",
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
114 "_SecondaryStrT23", "_ChargeT12", "_ChargeT13", "_HydrophobicityT12", "_PolarizabilityD1001",
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
115 "_PolarizabilityD1025", "_PolarizabilityD1050", "_PolarizabilityD2001", "_PolarizabilityD3025",
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
116 "_PolarizabilityD3050", "_PolarizabilityD3075", "_SolventAccessibilityD1050", "_SolventAccessibilityD2001",
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
117 "_SolventAccessibilityD2025", "_SolventAccessibilityD2050", "_SolventAccessibilityD3025",
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
118 "_SolventAccessibilityD3050", "_SolventAccessibilityD3100", "_SecondaryStrD1025", "_SecondaryStrD1050",
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
119 "_SecondaryStrD1075", "_SecondaryStrD2001", "_SecondaryStrD2050", "_SecondaryStrD2075", "_ChargeD1050",
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
120 "_ChargeD1075", "_ChargeD1100", "_ChargeD2025", "_ChargeD3025", "_ChargeD3050", "_PolarityD2050",
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
121 "_PolarityD3050", "_NormalizedVDWVD1001", "_NormalizedVDWVD1050", "_NormalizedVDWVD2001", "_NormalizedVDWVD2025",
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
122 "_HydrophobicityD3001", "_HydrophobicityD3075", "AD", "AW", "AY", "RC", "RT", "NA", "NE",
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
123 "NG", "NP", "DE", "DQ", "DG", "DT", "DY", "CG", "CL", "CY", "CV", "EN", "QA", "QR", "QE",
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
124 "QI", "GA", "GR", "GD", "GQ", "GG", "GH", "GL", "GF", "GP", "GT", "GY", "HA", "HC", "HI",
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
125 "HK", "HP", "IC", "IG", "IS", "IT", "IW", "LA", "LR", "LH", "LI", "LK", "LP", "KQ", "KH",
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
126 "KS", "KT", "MQ", "MG", "MI", "FA", "FR", "FS", "FY", "PC", "PE", "PG", "PH", "PM", "PF",
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
127 "PT", "SA", "SD", "SC", "SQ", "SW", "TA", "TC", "TM", "WL", "WV", "YE", "YG", "YH", "YI",
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
128 "YL", "YK", "YM", "YS"]}
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
129
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
130 self.df_output = self.df.copy()
32
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
131 self.df_output.drop(['DNAseq', 'AAseq'], axis=1, inplace=True)
25
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
132 dna_feat = {}
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
133 aa_len = {}
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
134 aroma_dic = {}
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
135 iso_dic = {}
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
136 aa_content = {}
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
137 st_dic_master = {}
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
138 CTD_dic = {}
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
139 dp = {}
32
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
140 self.df1 = self.df[['ID']].copy()
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
141 self.df.drop(['ID'], axis=1, inplace=True)
25
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
142 for i in range(len(self.df)):
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
143 i_name = self.df.index[i]
32
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
144 dna_feat[i] = count_orf(self.df.iloc[i]['DNAseq'])
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
145 aa_len[i] = len(self.df.iloc[i]['AAseq'])
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
146 aroma_dic[i] = ProteinAnalysis(self.df.iloc[i]['AAseq']).aromaticity()
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
147 iso_dic[i] = ProteinAnalysis(self.df.iloc[i]['AAseq']).isoelectric_point()
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
148 aa_content[i] = count_aa(self.df.iloc[i]['AAseq'])
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
149 st_dic_master[i] = sec_st_fr(self.df.iloc[i]['AAseq'])
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
150 CTD_dic[i] = CalculateCTD(self.df.iloc[i]['AAseq'])
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
151 dp[i] = CalculateDipeptideComposition(self.df.iloc[i]['AAseq'])
25
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
152 for j in self.df.index:
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
153 self.df.loc[j, dna_feat[j].keys()] = dna_feat[j].values() #dic with multiple values
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
154 self.df.loc[j, 'AA_Len'] = int(aa_len[j]) #dic with one value
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
155 self.df.loc[j, 'Aromaticity'] = aroma_dic[j]
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
156 self.df.loc[j, 'IsoelectricPoint'] = iso_dic[j]
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
157 self.df.loc[j, aa_content[j].keys()] = aa_content[j].values()
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
158 self.df.loc[j, st_dic_master[j].keys()] = st_dic_master[j].values()
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
159 self.df.loc[j, CTD_dic[j].keys()] = CTD_dic[j].values()
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
160 self.df.loc[j, dp[j].keys()] = dp[j].values()
32
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
161 self.df.drop(['DNAseq', 'AAseq'], axis=1, inplace=True)
25
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
162
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
163 def Prediction(self):
32
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
164 """
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
165 Predicts the percentage of each CDS being depolymerase.
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
166 :return: None
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
167 """
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
168 ft_scaler = pd.DataFrame(self.scaler.transform(self.df.iloc[:, :]), index=self.df.index, columns=self.df.columns)
25
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
169 ft_scaler = ft_scaler.drop(columns=[col for col in self.df if col not in self.feat[self.name]], axis=1)
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
170 scores = self.model.predict_proba(ft_scaler)
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
171 pos_scores = np.empty((self.df.shape[0], 0), float)
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
172 for x in scores:
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
173 pos_scores = np.append(pos_scores, round(x[1]*100))
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
174 self.df_output.reset_index(inplace=True)
32
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
175 print(self.df_output.columns)
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
176 self.df_output.rename(columns={'index': 'CDS'}, inplace=True)
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
177 self.df_output['CDS'] += 1
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
178 self.df_output['{} DPO Prediction (%)'.format(self.name)] = pos_scores
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
179 print(self.df_output)
25
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
180 #self.df_output = self.df_output.sort_values(by='{} DPO Prediction (%)'.format(self.name), ascending=False)
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
181 self.df_output.to_html('output.html', index=False, justify='center')
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
182
32
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
183
25
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
184 if __name__ == '__main__':
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
185 __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
186
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
187 model = sys.argv[1]
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
188 fasta_file = sys.argv[2]
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
189
32
5a0afb1578ea Uploaded
jose_duarte
parents: 25
diff changeset
190 PDPO = PDPOPrediction(__location__, model, fasta_file)
25
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
191 PDPO.Datastructure()
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
192 PDPO.Prediction()
ce0de724097a Uploaded
jose_duarte
parents:
diff changeset
193