annotate utils/batch_loader.py @ 0:b856d3d95413 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
author iuc
date Mon, 09 Jan 2023 13:27:09 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
1 #!/usr/bin/env python
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
2 # -*- coding: utf-8 -*-
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
3 # Credits: Grigorii Sukhorukov, Macha Nikolski
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
4 import numpy as np
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
5 from sklearn.utils import shuffle
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
6 from tensorflow import keras
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
7
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
8
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
9 class BatchLoader(keras.utils.Sequence):
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
10 """Helper to iterate over the data (as Numpy arrays)."""
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
11 def __init__(
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
12 self,
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
13 input_seqs,
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
14 input_seqs_rc,
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
15 input_labs,
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
16 batches,
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
17 rc=True,
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
18 random_seed=1
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
19 ):
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
20 self.input_seqs = input_seqs
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
21 self.input_seqs_rc = input_seqs_rc
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
22 self.input_labs = input_labs
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
23 self.batches = batches
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
24 self.rc = rc
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
25 self.random_seed = random_seed
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
26
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
27 def __len__(self):
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
28 return len(self.batches)
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
29
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
30 def __getitem__(self, idx):
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
31 batch = sorted(self.batches[idx])
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
32 batch_seqs, batch_seqs_rc, batch_labs = shuffle(
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
33 np.array(self.input_seqs[batch, ...]),
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
34 np.array(self.input_seqs_rc[batch, ...]),
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
35 np.array(self.input_labs[batch, ...]),
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
36 random_state=self.random_seed
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
37 )
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
38 # adding reverse batches
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
39 # batch_seqs = np.concatenate((batch_seqs, batch_seqs[:, ::-1, ...]))
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
40 # batch_seqs_rc = np.concatenate((batch_seqs_rc, batch_seqs_rc[:, ::-1, ...]))
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
41 # batch_labs = np.concatenate((batch_labs, batch_labs[:, ::-1, ...]))
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
42 if self.rc:
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
43 return (batch_seqs, batch_seqs_rc), batch_labs
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
44 else:
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
45 return batch_seqs, batch_labs
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
46
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
47
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
48 class BatchGenerator:
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
49 """Helper to iterate over the data (as Numpy arrays)."""
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
50 def __init__(
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
51 self,
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
52 input_seqs,
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
53 input_seqs_rc,
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
54 input_labs,
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
55 batches,
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
56 random_seed=1
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
57 ):
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
58 self.input_seqs = input_seqs
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
59 self.input_seqs_rc = input_seqs_rc
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
60 self.input_labs = input_labs
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
61 self.batches = batches
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
62 self.random_seed = random_seed
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
63
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
64 def __call__(self):
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
65 for batch in self.batches:
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
66 batch = sorted(batch)
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
67 batch_seqs, batch_seqs_rc, batch_labs = shuffle(
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
68 np.array(self.input_seqs[batch, ...]),
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
69 np.array(self.input_seqs_rc[batch, ...]),
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
70 np.array(self.input_labs[batch, ...]),
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
71 random_state=self.random_seed
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
72 )
b856d3d95413 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
iuc
parents:
diff changeset
73 yield (batch_seqs, batch_seqs_rc), batch_labs