Mercurial > repos > iuc > anndata_manipulate
comparison tsv_to_loompy.py @ 3:6db1b06e6bbb draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
author | iuc |
---|---|
date | Mon, 06 Jan 2020 13:44:18 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:a56baceb1900 | 3:6db1b06e6bbb |
---|---|
1 #!/usr/bin/env python | |
2 """This module converts a tsv file into a binary loom file""" | |
3 | |
4 import argparse | |
5 import os | |
6 | |
7 import loompy | |
8 import numpy as np | |
9 | |
10 parser = argparse.ArgumentParser(description="Loompy file converter flags") | |
11 parser.add_argument('--VERSION', action='version', version='%(prog)s 0.1.0', | |
12 help="Displays tool version") | |
13 parser.add_argument('--rowfile', '-r', help="File of row attributes & values") | |
14 parser.add_argument('--colfile', '-c', | |
15 help="File of column attributes and values") | |
16 parser.add_argument('--output', '-o', help="Output file name") | |
17 parser.add_argument('--files', '-f', nargs='*', | |
18 help="Input tsv files. First file becomes main layer.") | |
19 args = parser.parse_args() | |
20 | |
21 colsfile = args.colfile | |
22 rowsfile = args.rowfile | |
23 if args.output: | |
24 filename = args.output | |
25 else: | |
26 filename = "converted.loom" | |
27 alldata = args.files | |
28 alayers = [] | |
29 layernames = [] | |
30 rowdict = {} | |
31 coldict = {} | |
32 | |
33 # Creates dictionary based on row file | |
34 # For each attribute: | |
35 # Attribute: [attribute values] | |
36 with open(rowsfile, "r") as rows: | |
37 count = 0 | |
38 for line in rows: | |
39 line = line.strip().split("\t") | |
40 if count == 0: # First time through | |
41 row_attributes = line | |
42 for x in row_attributes: | |
43 rowdict[x] = [] | |
44 count += 1 | |
45 else: | |
46 for x in range(0, len(line)): | |
47 rowdict[row_attributes[x]].append(line[x]) | |
48 # Same as above, but for columns | |
49 with open(colsfile, "r") as cols: | |
50 count = 0 | |
51 for line in cols: | |
52 line = line.replace('\"', "") | |
53 line = line.replace(' ', "") | |
54 line = line.strip().split("\t") | |
55 if count == 0: # First time through | |
56 col_attributes = line | |
57 for x in col_attributes: | |
58 coldict[x] = [] | |
59 count += 1 | |
60 else: | |
61 for x in range(0, len(line)): | |
62 coldict[col_attributes[x]].append(line[x]) | |
63 # Finding dimensions for the loom layers | |
64 rowshape = len(rowdict[list(rowdict.keys())[0]]) | |
65 colshape = len(coldict[list(coldict.keys())[0]]) | |
66 | |
67 # Creates a list with each element being entire matrix of | |
68 # each layer file as floats | |
69 for file in range(0, len(alldata)): | |
70 layer = alldata[file][:-4] | |
71 layer = layer.split("/")[-1] | |
72 if layer == "": | |
73 raise Exception("Please only use named files") | |
74 layernames.append(layer) | |
75 cfile = alldata[file] | |
76 with open(cfile, "r") as tsv: | |
77 cmatrix = [] | |
78 for line in tsv: | |
79 line = line.strip().split("\t") | |
80 line = [float(i) for i in line] | |
81 cmatrix += line | |
82 alayers.append(cmatrix) | |
83 | |
84 # Loompy cannot overwright existing files. If somehow it finds | |
85 # a second file with the same name, it must be deleted | |
86 if os.path.isfile(filename): | |
87 os.remove(filename) | |
88 # To create the file properly, the first row and column attributes must be | |
89 # added separately in the form of individual dictionaries | |
90 row_attrs = {row_attributes[0]: np.asarray(rowdict[row_attributes[0]])} | |
91 col_attrs = {col_attributes[0]: np.asarray(coldict[col_attributes[0]])} | |
92 matrix = np.asarray(alayers[0]) | |
93 matrix = matrix.astype(float) | |
94 matrix = matrix.reshape(rowshape, colshape) | |
95 # Creation of initial loom file | |
96 if "loom" not in filename[-5:]: | |
97 filename = filename + ".loom" | |
98 loompy.create(filename, matrix, row_attrs, col_attrs) | |
99 # Adding all row and column attributes, then all layers | |
100 with loompy.connect(filename) as loomfile: | |
101 for x in row_attributes: | |
102 loomfile.ra[x] = rowdict[x] | |
103 for y in col_attributes: | |
104 loomfile.ca[y] = coldict[y] | |
105 for z in range(1, len(alayers)): | |
106 matrix = np.asarray(alayers[z]) | |
107 matrix = matrix.astype(float) | |
108 matrix = matrix.reshape(rowshape, colshape) | |
109 loomfile[layernames[z]] = matrix |