Mercurial > repos > jay > pdaug_merge_dataframes
annotate PDAUG_TSVtoFASTA/PDAUG_TSVtoFASTA.py @ 9:2dbfb42d34bc draft default tip
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit d396d7ff89705cc0dd626ed32c45a9f4029b1b05"
author | jay |
---|---|
date | Wed, 12 Jan 2022 19:50:39 +0000 |
parents | b5a9f541c6f8 |
children |
rev | line source |
---|---|
4
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
1 |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
2 import pandas as pd |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
3 |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
4 |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
5 def TSVtoFASTA(infile, method, firstdatafile, seconddatafile, outfile, clmpepid, slcclasslabel, peps): |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
6 |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
7 |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
8 fn = [firstdatafile, seconddatafile] |
0
5bb52d4bf172
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
9 |
5bb52d4bf172
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
10 |
4
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
11 df = pd.read_csv(infile, sep="\t") |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
12 if clmpepid == None: |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
13 pass |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
14 else: |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
15 names = df[clmpepid].tolist() |
0
5bb52d4bf172
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
16 |
4
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
17 peps = df[peps].tolist() |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
18 |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
19 if method == "withoutlabel": |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
20 f = open(outfile,'w') |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
21 if clmpepid is not None: |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
22 for i,n in enumerate(peps): |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
23 f.write(">"+names[i]+'\n') |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
24 f.write(n+'\n') |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
25 f.close() |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
26 else: |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
27 for i,n in enumerate(peps): |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
28 f.write(">"+str(i)+'\n') |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
29 f.write(n+'\n') |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
30 f.close() |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
31 |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
32 elif method == "withlabel": |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
33 labels = df[slcclasslabel].tolist() |
0
5bb52d4bf172
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
34 |
4
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
35 label = list(set(labels)) |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
36 |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
37 if clmpepid is None: |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
38 for i, l in enumerate(label): |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
39 f = open(fn[i],'w') |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
40 print('ok1') |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
41 for i, L in enumerate(labels): |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
42 if l == L: |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
43 f.write(">"+str(i)+"_"+str(l)+'\n') |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
44 f.write(peps[i]+'\n') |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
45 f.close() |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
46 else: |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
47 for i, l in enumerate(label): |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
48 f = open(fn[i],'w') |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
49 for i, L in enumerate(labels): |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
50 if l == L: |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
51 f.write(">"+names[i]+"_"+l+'\n') |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
52 f.write(peps[i]+'\n') |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
53 f.close() |
0
5bb52d4bf172
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
54 |
5bb52d4bf172
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
55 if __name__=="__main__": |
5bb52d4bf172
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
56 |
5bb52d4bf172
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
57 import argparse |
5bb52d4bf172
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
58 parser = argparse.ArgumentParser() |
5bb52d4bf172
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
59 parser.add_argument("-I", "--InFile", required=True, default=None, help=".fasta or .tsv") |
4
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
60 parser.add_argument("-F", "--FirstDataFile", required=False, default='FirstDataFile.fasta', help="Path to target tsv file") |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
61 parser.add_argument("-S", "--SecondDataFile", required=False, default='SecondDataFile.fasta', help="Path to target tsv file") |
0
5bb52d4bf172
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
62 parser.add_argument("-O", "--OutFile", required=False, default='OutFile.fasta', help="Path to target tsv file") |
5bb52d4bf172
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
63 parser.add_argument("-M", "--Method", required=True, default=None, help="Path to target tsv file") |
4
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
64 parser.add_argument("-C", "--ClmPepID", required=False, default=None, help="Peptide Column Name") |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
65 parser.add_argument("-L", "--SlcClassLabel", required=False, default="Class_label", help="Class Label Column Name") |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
66 parser.add_argument("-P", "--PeptideColumn", required=True, default=None, help="Class Label Column Name") |
0
5bb52d4bf172
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
67 args = parser.parse_args() |
5bb52d4bf172
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
68 |
4
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
69 TSVtoFASTA(args.InFile, args.Method, args.FirstDataFile, args.SecondDataFile, args.OutFile, args.ClmPepID, args.SlcClassLabel, args.PeptideColumn) |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
70 |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
71 |
b5a9f541c6f8
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
3
diff
changeset
|
72 |