Mercurial > repos > iuc > anndata_export
comparison tsv_to_loompy.py @ 3:8623710d083c draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
| author | iuc | 
|---|---|
| date | Mon, 06 Jan 2020 13:44:46 -0500 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| 2:e0395cca2c57 | 3:8623710d083c | 
|---|---|
| 1 #!/usr/bin/env python | |
| 2 """This module converts a tsv file into a binary loom file""" | |
| 3 | |
| 4 import argparse | |
| 5 import os | |
| 6 | |
| 7 import loompy | |
| 8 import numpy as np | |
| 9 | |
| 10 parser = argparse.ArgumentParser(description="Loompy file converter flags") | |
| 11 parser.add_argument('--VERSION', action='version', version='%(prog)s 0.1.0', | |
| 12 help="Displays tool version") | |
| 13 parser.add_argument('--rowfile', '-r', help="File of row attributes & values") | |
| 14 parser.add_argument('--colfile', '-c', | |
| 15 help="File of column attributes and values") | |
| 16 parser.add_argument('--output', '-o', help="Output file name") | |
| 17 parser.add_argument('--files', '-f', nargs='*', | |
| 18 help="Input tsv files. First file becomes main layer.") | |
| 19 args = parser.parse_args() | |
| 20 | |
| 21 colsfile = args.colfile | |
| 22 rowsfile = args.rowfile | |
| 23 if args.output: | |
| 24 filename = args.output | |
| 25 else: | |
| 26 filename = "converted.loom" | |
| 27 alldata = args.files | |
| 28 alayers = [] | |
| 29 layernames = [] | |
| 30 rowdict = {} | |
| 31 coldict = {} | |
| 32 | |
| 33 # Creates dictionary based on row file | |
| 34 # For each attribute: | |
| 35 # Attribute: [attribute values] | |
| 36 with open(rowsfile, "r") as rows: | |
| 37 count = 0 | |
| 38 for line in rows: | |
| 39 line = line.strip().split("\t") | |
| 40 if count == 0: # First time through | |
| 41 row_attributes = line | |
| 42 for x in row_attributes: | |
| 43 rowdict[x] = [] | |
| 44 count += 1 | |
| 45 else: | |
| 46 for x in range(0, len(line)): | |
| 47 rowdict[row_attributes[x]].append(line[x]) | |
| 48 # Same as above, but for columns | |
| 49 with open(colsfile, "r") as cols: | |
| 50 count = 0 | |
| 51 for line in cols: | |
| 52 line = line.replace('\"', "") | |
| 53 line = line.replace(' ', "") | |
| 54 line = line.strip().split("\t") | |
| 55 if count == 0: # First time through | |
| 56 col_attributes = line | |
| 57 for x in col_attributes: | |
| 58 coldict[x] = [] | |
| 59 count += 1 | |
| 60 else: | |
| 61 for x in range(0, len(line)): | |
| 62 coldict[col_attributes[x]].append(line[x]) | |
| 63 # Finding dimensions for the loom layers | |
| 64 rowshape = len(rowdict[list(rowdict.keys())[0]]) | |
| 65 colshape = len(coldict[list(coldict.keys())[0]]) | |
| 66 | |
| 67 # Creates a list with each element being entire matrix of | |
| 68 # each layer file as floats | |
| 69 for file in range(0, len(alldata)): | |
| 70 layer = alldata[file][:-4] | |
| 71 layer = layer.split("/")[-1] | |
| 72 if layer == "": | |
| 73 raise Exception("Please only use named files") | |
| 74 layernames.append(layer) | |
| 75 cfile = alldata[file] | |
| 76 with open(cfile, "r") as tsv: | |
| 77 cmatrix = [] | |
| 78 for line in tsv: | |
| 79 line = line.strip().split("\t") | |
| 80 line = [float(i) for i in line] | |
| 81 cmatrix += line | |
| 82 alayers.append(cmatrix) | |
| 83 | |
| 84 # Loompy cannot overwright existing files. If somehow it finds | |
| 85 # a second file with the same name, it must be deleted | |
| 86 if os.path.isfile(filename): | |
| 87 os.remove(filename) | |
| 88 # To create the file properly, the first row and column attributes must be | |
| 89 # added separately in the form of individual dictionaries | |
| 90 row_attrs = {row_attributes[0]: np.asarray(rowdict[row_attributes[0]])} | |
| 91 col_attrs = {col_attributes[0]: np.asarray(coldict[col_attributes[0]])} | |
| 92 matrix = np.asarray(alayers[0]) | |
| 93 matrix = matrix.astype(float) | |
| 94 matrix = matrix.reshape(rowshape, colshape) | |
| 95 # Creation of initial loom file | |
| 96 if "loom" not in filename[-5:]: | |
| 97 filename = filename + ".loom" | |
| 98 loompy.create(filename, matrix, row_attrs, col_attrs) | |
| 99 # Adding all row and column attributes, then all layers | |
| 100 with loompy.connect(filename) as loomfile: | |
| 101 for x in row_attributes: | |
| 102 loomfile.ra[x] = rowdict[x] | |
| 103 for y in col_attributes: | |
| 104 loomfile.ca[y] = coldict[y] | |
| 105 for z in range(1, len(alayers)): | |
| 106 matrix = np.asarray(alayers[z]) | |
| 107 matrix = matrix.astype(float) | |
| 108 matrix = matrix.reshape(rowshape, colshape) | |
| 109 loomfile[layernames[z]] = matrix | 
