Mercurial > repos > jay > pdaug_peptide_core_descriptors
comparison PDAUG_Peptide_Core_Descriptors/PDAUG_Peptide_Core_Descriptors.py @ 0:0fc091fb7e8f draft
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
author | jay |
---|---|
date | Wed, 28 Oct 2020 02:12:57 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0fc091fb7e8f |
---|---|
1 from modlamp.core import BaseDescriptor | |
2 from modlamp.descriptors import PeptideDescriptor | |
3 import pandas as pd | |
4 import argparse, os | |
5 | |
6 parser = argparse.ArgumentParser() | |
7 | |
8 parser.add_argument("-I", "--InFile", required=True, default=None, help="Input file") | |
9 parser.add_argument("-O", "--OutFile", required=True, default=None, help="Output file") | |
10 parser.add_argument("-N", "--Ngrams", required=True, default=None, help="ngrams") | |
11 | |
12 args = parser.parse_args() | |
13 | |
14 file = open(args.InFile) | |
15 lines = file.readlines() | |
16 | |
17 Index = [] | |
18 Pep = [] | |
19 | |
20 | |
21 for line in lines: | |
22 if '>' in line: | |
23 Index.append(line.strip('\n')) | |
24 else: | |
25 line = line.strip('\n') | |
26 line = line.strip('\r') | |
27 Pep.append(line) | |
28 | |
29 df = pd.DataFrame() | |
30 | |
31 for i, l in enumerate(Pep): | |
32 | |
33 D = PeptideDescriptor(l) | |
34 D.count_ngrams([int(args.Ngrams)]) | |
35 | |
36 df1 = pd.DataFrame(D.descriptor, index=["sequence"+str(i),]) | |
37 df = pd.concat([df, df1], axis=0) | |
38 | |
39 df = df.fillna(0) | |
40 df.to_csv(args.OutFile, sep='\t', index=None) | |
41 | |
42 | |
43 |