comparison PDAUG_Peptide_Data_Access/PDAUG_Peptide_Data_Access.py @ 3:e873a5224d1e draft

"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 3c91f421d26c8f42cf2671e47db735d2cf69dde8"
author jay
date Tue, 29 Dec 2020 03:57:13 +0000
parents c3f0b3a6339e
children
comparison
equal deleted inserted replaced
2:728e1fb64e91 3:e873a5224d1e
8 import os 8 import os
9 import pandas as pd 9 import pandas as pd
10 10
11 def DataGen(DataBaseType, OutFile, IDs): 11 def DataGen(DataBaseType, OutFile, IDs):
12 12
13 if DataBaseType == 'AMPvsTM': 13 if DataBaseType == 'AMPvsTMP':
14 data = load_AMPvsTM() 14 data = load_AMPvsTM()
15 15
16 elif DataBaseType == 'AMPvsUniProt': 16 elif DataBaseType == 'AMPvsUniProt':
17 data = load_AMPvsUniProt() 17 data = load_AMPvsUniProt()
18 18
19 elif DataBaseType == 'ACPvsTM': 19 elif DataBaseType == 'ACPvsTMP':
20 data = load_ACPvsTM() 20 data = load_ACPvsTM()
21 21
22 elif DataBaseType == 'ACPvsRandom': 22 elif DataBaseType == 'ACPvsRandom':
23 data = load_ACPvsRandom() 23 data = load_ACPvsRandom()
24 24
37 37
38 else: 38 else:
39 print ("Enter Correct Values") 39 print ("Enter Correct Values")
40 exit() 40 exit()
41 41
42 Target = data.target.tolist() 42 peptide_data = data.sequences
43 Target_list = set(Target) 43 class_label = int(len(peptide_data)/2)*[data.target_names[0]]+int(len(peptide_data)/2)*[data.target_names[1]]
44 df = data.sequences 44 peptide_data = pd.DataFrame(peptide_data, columns=['name'])
45 45 class_label = pd.DataFrame(class_label, columns=['class_label'])
46 46 df = pd.concat([peptide_data,class_label], axis=1)
47 Target = pd.DataFrame(Target, columns=['Target'])
48 df = pd.DataFrame(df, columns=['Peptide'])
49
50 df = pd.DataFrame(df)
51 df = pd.concat([df, Target], axis=1)
52 47
53 df.to_csv(OutFile, index=False, sep='\t') 48 df.to_csv(OutFile, index=False, sep='\t')
54 49
55 50
56 if __name__=="__main__": 51 if __name__=="__main__":
67 required=False, 62 required=False,
68 default='Out.tsv', 63 default='Out.tsv',
69 help="Out put file name for str descriptors") 64 help="Out put file name for str descriptors")
70 65
71 parser.add_argument("-L", "--List", 66 parser.add_argument("-L", "--List",
72 required=False, 67 required=False,
73 default=None, 68 default=None,
74 help="List of integer as ID") 69 help="List of integer as ID")
75 70
76 args = parser.parse_args() 71 args = parser.parse_args()
77 DataGen(args.DataBaseType, args.OutFile, args.List) 72 DataGen(args.DataBaseType, args.OutFile, args.List)