Mercurial > repos > jay > pdaug_ml_models
comparison PDAUG_TSVtoFASTA/PDAUG_TSVtoFASTA.py @ 4:0b17bc2ddcdd draft
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
author | jay |
---|---|
date | Tue, 12 Jan 2021 19:50:39 +0000 |
parents | bda0527365da |
children |
comparison
equal
deleted
inserted
replaced
3:bda0527365da | 4:0b17bc2ddcdd |
---|---|
1 import os | 1 |
2 import argparse | 2 import pandas as pd |
3 | 3 |
4 | 4 |
5 def TSVtoFASTA(InFile, Method, Positive, Negative, OutFile): | 5 def TSVtoFASTA(infile, method, firstdatafile, seconddatafile, outfile, clmpepid, slcclasslabel, peps): |
6 | 6 |
7 if Method == 'WithClassLabel': | |
8 | 7 |
9 f = open(InFile) | 8 fn = [firstdatafile, seconddatafile] |
10 lines = f.readlines() | |
11 | 9 |
12 of1 = open(Positive,'w') | |
13 of2 = open(Negative,'w') | |
14 | 10 |
15 n = 0 | 11 df = pd.read_csv(infile, sep="\t") |
16 m = 0 | 12 if clmpepid == None: |
13 pass | |
14 else: | |
15 names = df[clmpepid].tolist() | |
16 | |
17 peps = df[peps].tolist() | |
18 | |
19 if method == "withoutlabel": | |
20 f = open(outfile,'w') | |
21 if clmpepid is not None: | |
22 for i,n in enumerate(peps): | |
23 f.write(">"+names[i]+'\n') | |
24 f.write(n+'\n') | |
25 f.close() | |
26 else: | |
27 for i,n in enumerate(peps): | |
28 f.write(">"+str(i)+'\n') | |
29 f.write(n+'\n') | |
30 f.close() | |
31 | |
32 elif method == "withlabel": | |
33 labels = df[slcclasslabel].tolist() | |
34 | |
35 label = list(set(labels)) | |
17 | 36 |
18 l = [] | 37 if clmpepid is None: |
19 | 38 for i, l in enumerate(label): |
20 for line in lines[1:]: | 39 f = open(fn[i],'w') |
21 l.append(line.split('\t')[1].strip('\n').strip('\r')) | 40 print('ok1') |
22 l = list(set(l)) | 41 for i, L in enumerate(labels): |
23 | 42 if l == L: |
24 print(l) | 43 f.write(">"+str(i)+"_"+str(l)+'\n') |
25 | 44 f.write(peps[i]+'\n') |
26 for line in lines: | 45 f.close() |
27 | 46 else: |
28 if l[1] in line.split('\t')[1].strip('\n').strip('\r'): | 47 for i, l in enumerate(label): |
29 n= n+1 | 48 f = open(fn[i],'w') |
30 of1.write('>peptide_'+str(n)+'_'+str(l[1])+'\n') | 49 for i, L in enumerate(labels): |
31 of1.write(line.split('\t')[0]+'\n') | 50 if l == L: |
32 | 51 f.write(">"+names[i]+"_"+l+'\n') |
33 if l[0] in line.split('\t')[1].strip('\n').strip('\r'): | 52 f.write(peps[i]+'\n') |
34 m= m+1 | 53 f.close() |
35 of2.write('>peptide_'+str(m)+'_'+str(l[0])+'\n') | |
36 of2.write(line.split('\t')[0]+'\n') | |
37 | |
38 elif Method == 'NoClassLabel': | |
39 | |
40 f = open(InFile) | |
41 lines = f.readlines() | |
42 of1 = open(OutFile,'w') | |
43 | |
44 for i, line in enumerate(lines[1:]): | |
45 of1.write('>peptide_'+str(i)+'\n') | |
46 of1.write(line.split('\t')[0]+'\n') | |
47 | |
48 else: | |
49 pass | |
50 | 54 |
51 if __name__=="__main__": | 55 if __name__=="__main__": |
52 | 56 |
53 import argparse | 57 import argparse |
54 | |
55 parser = argparse.ArgumentParser() | 58 parser = argparse.ArgumentParser() |
56 | |
57 parser.add_argument("-I", "--InFile", required=True, default=None, help=".fasta or .tsv") | 59 parser.add_argument("-I", "--InFile", required=True, default=None, help=".fasta or .tsv") |
58 parser.add_argument("-P", "--Postvs", required=False, default='FirstDataFile.fasta', help="Path to target tsv file") | 60 parser.add_argument("-F", "--FirstDataFile", required=False, default='FirstDataFile.fasta', help="Path to target tsv file") |
59 parser.add_argument("-N", "--Negtvs", required=False, default='SecondDataFile.fasta', help="Path to target tsv file") | 61 parser.add_argument("-S", "--SecondDataFile", required=False, default='SecondDataFile.fasta', help="Path to target tsv file") |
60 parser.add_argument("-O", "--OutFile", required=False, default='OutFile.fasta', help="Path to target tsv file") | 62 parser.add_argument("-O", "--OutFile", required=False, default='OutFile.fasta', help="Path to target tsv file") |
61 parser.add_argument("-M", "--Method", required=True, default=None, help="Path to target tsv file") | 63 parser.add_argument("-M", "--Method", required=True, default=None, help="Path to target tsv file") |
64 parser.add_argument("-C", "--ClmPepID", required=False, default=None, help="Peptide Column Name") | |
65 parser.add_argument("-L", "--SlcClassLabel", required=False, default="Class_label", help="Class Label Column Name") | |
66 parser.add_argument("-P", "--PeptideColumn", required=True, default=None, help="Class Label Column Name") | |
62 args = parser.parse_args() | 67 args = parser.parse_args() |
63 | 68 |
64 TSVtoFASTA(args.InFile, args.Method, args.Postvs, args.Negtvs, args.OutFile) | 69 TSVtoFASTA(args.InFile, args.Method, args.FirstDataFile, args.SecondDataFile, args.OutFile, args.ClmPepID, args.SlcClassLabel, args.PeptideColumn) |
70 | |
71 | |
72 |