diff PDAUG_Merge_Dataframes/PDAUG_Merge_Dataframes.py @ 6:391e7e836fe9 draft

"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 45ebf32dcaa1eed91670d3a2491f9cf3dfb535ef"
author jay
date Tue, 12 Jan 2021 18:40:09 +0000
parents c3f0b3a6339e
children
line wrap: on
line diff
--- a/PDAUG_Merge_Dataframes/PDAUG_Merge_Dataframes.py	Wed Dec 30 02:42:16 2020 +0000
+++ b/PDAUG_Merge_Dataframes/PDAUG_Merge_Dataframes.py	Tue Jan 12 18:40:09 2021 +0000
@@ -2,27 +2,38 @@
 import pandas as pd 
 import sys
 
-files = sys.argv[1]
-out_file = sys.argv[2]
+
+def MergeData(infiles, add_class_label, class_label, OutPut):
+
+    data_frame = pd.DataFrame()
 
+    if add_class_label == 'True' or add_class_label == 'true':
+        for i, file in enumerate(infiles.split(',')): 
+            df1 = pd.read_csv(file,sep='\t')
+            df2 = pd.DataFrame(df1.shape[0]*[i], columns=[class_label])
+            df3 =  pd.concat([df1,df2], axis=1)
+            data_frame =  pd.concat([data_frame,df3])
+        final_DF = data_frame.fillna(0)
 
-data_frame = pd.read_csv(files.split(',')[0],sep='\t')
+    else:
+
+        for file in infiles.split(','): 
+            df1 = pd.read_csv(file,sep='\t')
+            data_frame =  pd.concat([data_frame,df1])
+        final_DF = data_frame.fillna(0)
+
+    final_DF.to_csv(OutPut, sep="\t", index=False)
 
 
-for file in files.split(',')[1:]: 
-
-    df1 = pd.read_csv(file,sep='\t')
-    data_frame =  pd.concat([data_frame,df1])
-
-final_DF = data_frame.fillna(0)
-
-final_DF.to_csv(out_file,sep="\t", index=False)
+if __name__=="__main__":
 
-
-
-
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-I", "--infiles", required=True, default=None, help=".tsv")
+    parser.add_argument("-L", "--add_class_label", required=False, default=False, help="Path to target tsv file")
+    parser.add_argument("-C", "--class_label", required=False, default='class_label', help="Path to target tsv file")
+    parser.add_argument("-O", "--OutPut", required=False, default='Out.tsv', help="Path to target tsv file")
 
-
+    args = parser.parse_args()
 
-
-
+    MergeData(args.infiles, args.add_class_label, args.class_label, args.OutPut)