Mercurial > repos > jay > pdaug_uversky_plot
annotate PDAUG_Merge_Dataframes/PDAUG_Merge_Dataframes.py @ 9:5c08dad483d9 draft default tip
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit d396d7ff89705cc0dd626ed32c45a9f4029b1b05"
author | jay |
---|---|
date | Wed, 12 Jan 2022 19:53:09 +0000 |
parents | f46f1d9b3899 |
children |
rev | line source |
---|---|
0
070fc1da6df7
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
1 import glob |
070fc1da6df7
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
2 import pandas as pd |
070fc1da6df7
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
3 import sys |
070fc1da6df7
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
4 |
4
f46f1d9b3899
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
0
diff
changeset
|
5 |
f46f1d9b3899
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
0
diff
changeset
|
6 def MergeData(infiles, add_class_label, class_label, OutPut): |
f46f1d9b3899
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
0
diff
changeset
|
7 |
f46f1d9b3899
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
0
diff
changeset
|
8 data_frame = pd.DataFrame() |
0
070fc1da6df7
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
9 |
4
f46f1d9b3899
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
0
diff
changeset
|
10 if add_class_label == 'True' or add_class_label == 'true': |
f46f1d9b3899
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
0
diff
changeset
|
11 for i, file in enumerate(infiles.split(',')): |
f46f1d9b3899
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
0
diff
changeset
|
12 df1 = pd.read_csv(file,sep='\t') |
f46f1d9b3899
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
0
diff
changeset
|
13 df2 = pd.DataFrame(df1.shape[0]*[i], columns=[class_label]) |
f46f1d9b3899
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
0
diff
changeset
|
14 df3 = pd.concat([df1,df2], axis=1) |
f46f1d9b3899
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
0
diff
changeset
|
15 data_frame = pd.concat([data_frame,df3]) |
f46f1d9b3899
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
0
diff
changeset
|
16 final_DF = data_frame.fillna(0) |
0
070fc1da6df7
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
17 |
4
f46f1d9b3899
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
0
diff
changeset
|
18 else: |
f46f1d9b3899
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
0
diff
changeset
|
19 |
f46f1d9b3899
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
0
diff
changeset
|
20 for file in infiles.split(','): |
f46f1d9b3899
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
0
diff
changeset
|
21 df1 = pd.read_csv(file,sep='\t') |
f46f1d9b3899
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
0
diff
changeset
|
22 data_frame = pd.concat([data_frame,df1]) |
f46f1d9b3899
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
0
diff
changeset
|
23 final_DF = data_frame.fillna(0) |
f46f1d9b3899
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
0
diff
changeset
|
24 |
f46f1d9b3899
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
0
diff
changeset
|
25 final_DF.to_csv(OutPut, sep="\t", index=False) |
0
070fc1da6df7
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
26 |
070fc1da6df7
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
27 |
4
f46f1d9b3899
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
0
diff
changeset
|
28 if __name__=="__main__": |
0
070fc1da6df7
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
29 |
4
f46f1d9b3899
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
0
diff
changeset
|
30 import argparse |
f46f1d9b3899
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
0
diff
changeset
|
31 parser = argparse.ArgumentParser() |
f46f1d9b3899
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
0
diff
changeset
|
32 parser.add_argument("-I", "--infiles", required=True, default=None, help=".tsv") |
f46f1d9b3899
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
0
diff
changeset
|
33 parser.add_argument("-L", "--add_class_label", required=False, default=False, help="Path to target tsv file") |
f46f1d9b3899
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
0
diff
changeset
|
34 parser.add_argument("-C", "--class_label", required=False, default='class_label', help="Path to target tsv file") |
f46f1d9b3899
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
0
diff
changeset
|
35 parser.add_argument("-O", "--OutPut", required=False, default='Out.tsv', help="Path to target tsv file") |
0
070fc1da6df7
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
36 |
4
f46f1d9b3899
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
0
diff
changeset
|
37 args = parser.parse_args() |
0
070fc1da6df7
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
jay
parents:
diff
changeset
|
38 |
4
f46f1d9b3899
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
jay
parents:
0
diff
changeset
|
39 MergeData(args.infiles, args.add_class_label, args.class_label, args.OutPut) |