diff tsv_to_loompy.py @ 0:c8e4d0b9ae8c draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
author iuc
date Mon, 06 Jan 2020 13:43:38 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tsv_to_loompy.py	Mon Jan 06 13:43:38 2020 -0500
@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+"""This module converts a tsv file into a binary loom file"""
+
+import argparse
+import os
+
+import loompy
+import numpy as np
+
+parser = argparse.ArgumentParser(description="Loompy file converter flags")
+parser.add_argument('--VERSION', action='version', version='%(prog)s 0.1.0',
+                    help="Displays tool version")
+parser.add_argument('--rowfile', '-r', help="File of row attributes & values")
+parser.add_argument('--colfile', '-c',
+                    help="File of column attributes and values")
+parser.add_argument('--output', '-o', help="Output file name")
+parser.add_argument('--files', '-f', nargs='*',
+                    help="Input tsv files. First file becomes main layer.")
+args = parser.parse_args()
+
+colsfile = args.colfile
+rowsfile = args.rowfile
+if args.output:
+    filename = args.output
+else:
+    filename = "converted.loom"
+alldata = args.files
+alayers = []
+layernames = []
+rowdict = {}
+coldict = {}
+
+#  Creates dictionary based on row file
+#  For each attribute:
+#  Attribute: [attribute values]
+with open(rowsfile, "r") as rows:
+    count = 0
+    for line in rows:
+        line = line.strip().split("\t")
+        if count == 0:  # First time through
+            row_attributes = line
+            for x in row_attributes:
+                rowdict[x] = []
+            count += 1
+        else:
+            for x in range(0, len(line)):
+                rowdict[row_attributes[x]].append(line[x])
+#  Same as above, but for columns
+with open(colsfile, "r") as cols:
+    count = 0
+    for line in cols:
+        line = line.replace('\"', "")
+        line = line.replace(' ', "")
+        line = line.strip().split("\t")
+        if count == 0:  # First time through
+            col_attributes = line
+            for x in col_attributes:
+                coldict[x] = []
+            count += 1
+        else:
+            for x in range(0, len(line)):
+                coldict[col_attributes[x]].append(line[x])
+#  Finding dimensions for the loom layers
+rowshape = len(rowdict[list(rowdict.keys())[0]])
+colshape = len(coldict[list(coldict.keys())[0]])
+
+#  Creates a list with each element being entire matrix of
+#  each layer file as floats
+for file in range(0, len(alldata)):
+    layer = alldata[file][:-4]
+    layer = layer.split("/")[-1]
+    if layer == "":
+        raise Exception("Please only use named files")
+    layernames.append(layer)
+    cfile = alldata[file]
+    with open(cfile, "r") as tsv:
+        cmatrix = []
+        for line in tsv:
+            line = line.strip().split("\t")
+            line = [float(i) for i in line]
+            cmatrix += line
+        alayers.append(cmatrix)
+
+#  Loompy cannot overwright existing files. If somehow it finds
+#  a second file with the same name, it must be deleted
+if os.path.isfile(filename):
+    os.remove(filename)
+#  To create the file properly, the first row and column attributes must be
+#  added separately in the form of individual dictionaries
+row_attrs = {row_attributes[0]: np.asarray(rowdict[row_attributes[0]])}
+col_attrs = {col_attributes[0]: np.asarray(coldict[col_attributes[0]])}
+matrix = np.asarray(alayers[0])
+matrix = matrix.astype(float)
+matrix = matrix.reshape(rowshape, colshape)
+#  Creation of initial loom file
+if "loom" not in filename[-5:]:
+    filename = filename + ".loom"
+loompy.create(filename, matrix, row_attrs, col_attrs)
+#  Adding all row and column attributes, then all layers
+with loompy.connect(filename) as loomfile:
+    for x in row_attributes:
+        loomfile.ra[x] = rowdict[x]
+    for y in col_attributes:
+        loomfile.ca[y] = coldict[y]
+    for z in range(1, len(alayers)):
+        matrix = np.asarray(alayers[z])
+        matrix = matrix.astype(float)
+        matrix = matrix.reshape(rowshape, colshape)
+        loomfile[layernames[z]] = matrix