Mercurial > repos > iuc > anndata_import
diff tsv_to_loompy.py @ 3:b5c7ba11401d draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
author | iuc |
---|---|
date | Mon, 06 Jan 2020 13:45:13 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tsv_to_loompy.py Mon Jan 06 13:45:13 2020 -0500 @@ -0,0 +1,109 @@ +#!/usr/bin/env python +"""This module converts a tsv file into a binary loom file""" + +import argparse +import os + +import loompy +import numpy as np + +parser = argparse.ArgumentParser(description="Loompy file converter flags") +parser.add_argument('--VERSION', action='version', version='%(prog)s 0.1.0', + help="Displays tool version") +parser.add_argument('--rowfile', '-r', help="File of row attributes & values") +parser.add_argument('--colfile', '-c', + help="File of column attributes and values") +parser.add_argument('--output', '-o', help="Output file name") +parser.add_argument('--files', '-f', nargs='*', + help="Input tsv files. First file becomes main layer.") +args = parser.parse_args() + +colsfile = args.colfile +rowsfile = args.rowfile +if args.output: + filename = args.output +else: + filename = "converted.loom" +alldata = args.files +alayers = [] +layernames = [] +rowdict = {} +coldict = {} + +# Creates dictionary based on row file +# For each attribute: +# Attribute: [attribute values] +with open(rowsfile, "r") as rows: + count = 0 + for line in rows: + line = line.strip().split("\t") + if count == 0: # First time through + row_attributes = line + for x in row_attributes: + rowdict[x] = [] + count += 1 + else: + for x in range(0, len(line)): + rowdict[row_attributes[x]].append(line[x]) +# Same as above, but for columns +with open(colsfile, "r") as cols: + count = 0 + for line in cols: + line = line.replace('\"', "") + line = line.replace(' ', "") + line = line.strip().split("\t") + if count == 0: # First time through + col_attributes = line + for x in col_attributes: + coldict[x] = [] + count += 1 + else: + for x in range(0, len(line)): + coldict[col_attributes[x]].append(line[x]) +# Finding dimensions for the loom layers +rowshape = len(rowdict[list(rowdict.keys())[0]]) +colshape = len(coldict[list(coldict.keys())[0]]) + +# Creates a list with each element being entire matrix of +# each layer file as floats +for file in range(0, len(alldata)): + layer = alldata[file][:-4] + layer = layer.split("/")[-1] + if layer == "": + raise Exception("Please only use named files") + layernames.append(layer) + cfile = alldata[file] + with open(cfile, "r") as tsv: + cmatrix = [] + for line in tsv: + line = line.strip().split("\t") + line = [float(i) for i in line] + cmatrix += line + alayers.append(cmatrix) + +# Loompy cannot overwright existing files. If somehow it finds +# a second file with the same name, it must be deleted +if os.path.isfile(filename): + os.remove(filename) +# To create the file properly, the first row and column attributes must be +# added separately in the form of individual dictionaries +row_attrs = {row_attributes[0]: np.asarray(rowdict[row_attributes[0]])} +col_attrs = {col_attributes[0]: np.asarray(coldict[col_attributes[0]])} +matrix = np.asarray(alayers[0]) +matrix = matrix.astype(float) +matrix = matrix.reshape(rowshape, colshape) +# Creation of initial loom file +if "loom" not in filename[-5:]: + filename = filename + ".loom" +loompy.create(filename, matrix, row_attrs, col_attrs) +# Adding all row and column attributes, then all layers +with loompy.connect(filename) as loomfile: + for x in row_attributes: + loomfile.ra[x] = rowdict[x] + for y in col_attributes: + loomfile.ca[y] = coldict[y] + for z in range(1, len(alayers)): + matrix = np.asarray(alayers[z]) + matrix = matrix.astype(float) + matrix = matrix.reshape(rowshape, colshape) + loomfile[layernames[z]] = matrix