comparison PDAUG_TSVtoFASTA/PDAUG_TSVtoFASTA.py @ 4:a6322865e23a draft

"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
author jay
date Tue, 12 Jan 2021 19:07:03 +0000
parents 2b05b50ca410
children
comparison
equal deleted inserted replaced
3:2b05b50ca410 4:a6322865e23a
1 import os 1
2 import argparse 2 import pandas as pd
3 3
4 4
5 def TSVtoFASTA(InFile, Method, Positive, Negative, OutFile): 5 def TSVtoFASTA(infile, method, firstdatafile, seconddatafile, outfile, clmpepid, slcclasslabel, peps):
6 6
7 if Method == 'WithClassLabel':
8 7
9 f = open(InFile) 8 fn = [firstdatafile, seconddatafile]
10 lines = f.readlines()
11 9
12 of1 = open(Positive,'w')
13 of2 = open(Negative,'w')
14 10
15 n = 0 11 df = pd.read_csv(infile, sep="\t")
16 m = 0 12 if clmpepid == None:
13 pass
14 else:
15 names = df[clmpepid].tolist()
16
17 peps = df[peps].tolist()
18
19 if method == "withoutlabel":
20 f = open(outfile,'w')
21 if clmpepid is not None:
22 for i,n in enumerate(peps):
23 f.write(">"+names[i]+'\n')
24 f.write(n+'\n')
25 f.close()
26 else:
27 for i,n in enumerate(peps):
28 f.write(">"+str(i)+'\n')
29 f.write(n+'\n')
30 f.close()
31
32 elif method == "withlabel":
33 labels = df[slcclasslabel].tolist()
34
35 label = list(set(labels))
17 36
18 l = [] 37 if clmpepid is None:
19 38 for i, l in enumerate(label):
20 for line in lines[1:]: 39 f = open(fn[i],'w')
21 l.append(line.split('\t')[1].strip('\n').strip('\r')) 40 print('ok1')
22 l = list(set(l)) 41 for i, L in enumerate(labels):
23 42 if l == L:
24 print(l) 43 f.write(">"+str(i)+"_"+str(l)+'\n')
25 44 f.write(peps[i]+'\n')
26 for line in lines: 45 f.close()
27 46 else:
28 if l[1] in line.split('\t')[1].strip('\n').strip('\r'): 47 for i, l in enumerate(label):
29 n= n+1 48 f = open(fn[i],'w')
30 of1.write('>peptide_'+str(n)+'_'+str(l[1])+'\n') 49 for i, L in enumerate(labels):
31 of1.write(line.split('\t')[0]+'\n') 50 if l == L:
32 51 f.write(">"+names[i]+"_"+l+'\n')
33 if l[0] in line.split('\t')[1].strip('\n').strip('\r'): 52 f.write(peps[i]+'\n')
34 m= m+1 53 f.close()
35 of2.write('>peptide_'+str(m)+'_'+str(l[0])+'\n')
36 of2.write(line.split('\t')[0]+'\n')
37
38 elif Method == 'NoClassLabel':
39
40 f = open(InFile)
41 lines = f.readlines()
42 of1 = open(OutFile,'w')
43
44 for i, line in enumerate(lines[1:]):
45 of1.write('>peptide_'+str(i)+'\n')
46 of1.write(line.split('\t')[0]+'\n')
47
48 else:
49 pass
50 54
51 if __name__=="__main__": 55 if __name__=="__main__":
52 56
53 import argparse 57 import argparse
54
55 parser = argparse.ArgumentParser() 58 parser = argparse.ArgumentParser()
56
57 parser.add_argument("-I", "--InFile", required=True, default=None, help=".fasta or .tsv") 59 parser.add_argument("-I", "--InFile", required=True, default=None, help=".fasta or .tsv")
58 parser.add_argument("-P", "--Postvs", required=False, default='FirstDataFile.fasta', help="Path to target tsv file") 60 parser.add_argument("-F", "--FirstDataFile", required=False, default='FirstDataFile.fasta', help="Path to target tsv file")
59 parser.add_argument("-N", "--Negtvs", required=False, default='SecondDataFile.fasta', help="Path to target tsv file") 61 parser.add_argument("-S", "--SecondDataFile", required=False, default='SecondDataFile.fasta', help="Path to target tsv file")
60 parser.add_argument("-O", "--OutFile", required=False, default='OutFile.fasta', help="Path to target tsv file") 62 parser.add_argument("-O", "--OutFile", required=False, default='OutFile.fasta', help="Path to target tsv file")
61 parser.add_argument("-M", "--Method", required=True, default=None, help="Path to target tsv file") 63 parser.add_argument("-M", "--Method", required=True, default=None, help="Path to target tsv file")
64 parser.add_argument("-C", "--ClmPepID", required=False, default=None, help="Peptide Column Name")
65 parser.add_argument("-L", "--SlcClassLabel", required=False, default="Class_label", help="Class Label Column Name")
66 parser.add_argument("-P", "--PeptideColumn", required=True, default=None, help="Class Label Column Name")
62 args = parser.parse_args() 67 args = parser.parse_args()
63 68
64 TSVtoFASTA(args.InFile, args.Method, args.Postvs, args.Negtvs, args.OutFile) 69 TSVtoFASTA(args.InFile, args.Method, args.FirstDataFile, args.SecondDataFile, args.OutFile, args.ClmPepID, args.SlcClassLabel, args.PeptideColumn)
70
71
72