diff aplcms_to_ramclustr_converter.py @ 0:c6c0f6027e34 draft default tip

"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
author recetox
date Tue, 22 Mar 2022 16:06:28 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/aplcms_to_ramclustr_converter.py	Tue Mar 22 16:06:28 2022 +0000
@@ -0,0 +1,34 @@
+#!/usr/bin/env python
+
+import argparse
+import sys
+
+import pandas as pd
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--dataframe", help="Parquet dataframe")
+parser.add_argument('output')
+args = parser.parse_args()
+
+
+def main():
+    featureTable = pd.read_parquet(args.dataframe)
+
+    # Concatenate "mz" and "rt" columns; select relevant columns; pivot the table
+    featureTable["mz_rt"] = featureTable["mz"].astype(str) + "_" + featureTable["rt"].astype(str)
+    featureTable = featureTable[["sample", "mz_rt", "sample_intensity"]]
+    featureTable = pd.pivot_table(featureTable, columns="mz_rt", index="sample", values="sample_intensity")
+
+    try:
+        featureTable.to_csv(args.output, sep=',')
+        msg = f"Dataset of {len(featureTable)} samples is converted to a feature-by-sample table"
+        print(msg, file=sys.stdout)
+        return 0
+    except Exception:
+        print("Could not write the data", file=sys.stdout)
+        return 1
+
+
+if __name__ == "__main__":
+    main()