Mercurial > repos > jay > pdaug_peptide_ngrams
comparison PDAUG_Peptide_Data_Access/PDAUG_Peptide_Data_Access.py @ 0:7557b48b2872 draft
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
author | jay |
---|---|
date | Wed, 28 Oct 2020 02:10:12 +0000 |
parents | |
children | 9b5e990a0ebb |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:7557b48b2872 |
---|---|
1 import modlamp | |
2 from modlamp.datasets import load_AMPvsTM | |
3 from modlamp.datasets import load_AMPvsUniProt | |
4 from modlamp.datasets import load_ACPvsTM | |
5 from modlamp.datasets import load_ACPvsRandom | |
6 from modlamp.database import query_apd | |
7 from modlamp.database import query_camp | |
8 import os | |
9 import pandas as pd | |
10 | |
11 def DataGen(DataBaseType, OutFile, IDs): | |
12 | |
13 if DataBaseType == 'AMPvsTM': | |
14 data = load_AMPvsTM() | |
15 | |
16 elif DataBaseType == 'AMPvsUniProt': | |
17 data = load_AMPvsUniProt() | |
18 | |
19 elif DataBaseType == 'ACPvsTM': | |
20 data = load_ACPvsTM() | |
21 | |
22 elif DataBaseType == 'ACPvsRandom': | |
23 data = load_ACPvsRandom() | |
24 | |
25 elif DataBaseType == 'query_apd': | |
26 | |
27 data = query_apd([int(i) for i in IDs.split(',')]) | |
28 df = pd.DataFrame(data, columns=['Peptides']) | |
29 df.to_csv(OutFile, index=False, sep='\t') | |
30 exit() | |
31 | |
32 elif DataBaseType == 'query_camp': | |
33 data = query_camp([int(i) for i in IDs.split(',')]) | |
34 df = pd.DataFrame(data, columns=['Peptides']) | |
35 df.to_csv(OutFile, index=False, sep='\t') | |
36 exit() | |
37 | |
38 else: | |
39 print ("Enter Correct Values") | |
40 exit() | |
41 | |
42 Target = data.target.tolist() | |
43 Target_list = set(Target) | |
44 df = data.sequences | |
45 | |
46 | |
47 Target = pd.DataFrame(Target, columns=['Target']) | |
48 df = pd.DataFrame(df, columns=['Peptide']) | |
49 | |
50 df = pd.DataFrame(df) | |
51 df = pd.concat([df, Target], axis=1) | |
52 | |
53 df.to_csv(OutFile, index=False, sep='\t') | |
54 | |
55 | |
56 if __name__=="__main__": | |
57 | |
58 import argparse | |
59 parser = argparse.ArgumentParser() | |
60 | |
61 parser.add_argument("-d", "--DataBaseType", | |
62 required=True, | |
63 default=None, | |
64 help="Name of the dataset ") | |
65 | |
66 parser.add_argument("-o", "--OutFile", | |
67 required=False, | |
68 default='Out.tsv', | |
69 help="Out put file name for str descriptors") | |
70 | |
71 parser.add_argument("-L", "--List", | |
72 required=False, | |
73 default=None, | |
74 help="List of integer as ID") | |
75 | |
76 args = parser.parse_args() | |
77 DataGen(args.DataBaseType, args.OutFile, args.List) |