Mercurial > repos > recetox > aplcms_to_ramclustr_converter
diff aplcms_to_ramclustr_converter.py @ 0:c6c0f6027e34 draft default tip
"planemo upload for repository https://github.com/RECETOX/galaxytools/tools/aplcms_to_ramclustr_converter/ commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
author | recetox |
---|---|
date | Tue, 22 Mar 2022 16:06:28 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/aplcms_to_ramclustr_converter.py Tue Mar 22 16:06:28 2022 +0000 @@ -0,0 +1,34 @@ +#!/usr/bin/env python + +import argparse +import sys + +import pandas as pd + + +parser = argparse.ArgumentParser() +parser.add_argument("--dataframe", help="Parquet dataframe") +parser.add_argument('output') +args = parser.parse_args() + + +def main(): + featureTable = pd.read_parquet(args.dataframe) + + # Concatenate "mz" and "rt" columns; select relevant columns; pivot the table + featureTable["mz_rt"] = featureTable["mz"].astype(str) + "_" + featureTable["rt"].astype(str) + featureTable = featureTable[["sample", "mz_rt", "sample_intensity"]] + featureTable = pd.pivot_table(featureTable, columns="mz_rt", index="sample", values="sample_intensity") + + try: + featureTable.to_csv(args.output, sep=',') + msg = f"Dataset of {len(featureTable)} samples is converted to a feature-by-sample table" + print(msg, file=sys.stdout) + return 0 + except Exception: + print("Could not write the data", file=sys.stdout) + return 1 + + +if __name__ == "__main__": + main()