Mercurial > repos > iuc > decontaminator
annotate utils/batch_loader.py @ 0:b856d3d95413 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
| author | iuc | 
|---|---|
| date | Mon, 09 Jan 2023 13:27:09 +0000 | 
| parents | |
| children | 
| rev | line source | 
|---|---|
| 
0
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
1 #!/usr/bin/env python | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
2 # -*- coding: utf-8 -*- | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
3 # Credits: Grigorii Sukhorukov, Macha Nikolski | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
4 import numpy as np | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
5 from sklearn.utils import shuffle | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
6 from tensorflow import keras | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
7 | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
8 | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
9 class BatchLoader(keras.utils.Sequence): | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
10 """Helper to iterate over the data (as Numpy arrays).""" | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
11 def __init__( | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
12 self, | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
13 input_seqs, | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
14 input_seqs_rc, | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
15 input_labs, | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
16 batches, | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
17 rc=True, | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
18 random_seed=1 | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
19 ): | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
20 self.input_seqs = input_seqs | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
21 self.input_seqs_rc = input_seqs_rc | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
22 self.input_labs = input_labs | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
23 self.batches = batches | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
24 self.rc = rc | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
25 self.random_seed = random_seed | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
26 | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
27 def __len__(self): | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
28 return len(self.batches) | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
29 | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
30 def __getitem__(self, idx): | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
31 batch = sorted(self.batches[idx]) | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
32 batch_seqs, batch_seqs_rc, batch_labs = shuffle( | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
33 np.array(self.input_seqs[batch, ...]), | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
34 np.array(self.input_seqs_rc[batch, ...]), | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
35 np.array(self.input_labs[batch, ...]), | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
36 random_state=self.random_seed | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
37 ) | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
38 # adding reverse batches | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
39 # batch_seqs = np.concatenate((batch_seqs, batch_seqs[:, ::-1, ...])) | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
40 # batch_seqs_rc = np.concatenate((batch_seqs_rc, batch_seqs_rc[:, ::-1, ...])) | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
41 # batch_labs = np.concatenate((batch_labs, batch_labs[:, ::-1, ...])) | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
42 if self.rc: | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
43 return (batch_seqs, batch_seqs_rc), batch_labs | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
44 else: | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
45 return batch_seqs, batch_labs | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
46 | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
47 | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
48 class BatchGenerator: | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
49 """Helper to iterate over the data (as Numpy arrays).""" | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
50 def __init__( | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
51 self, | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
52 input_seqs, | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
53 input_seqs_rc, | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
54 input_labs, | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
55 batches, | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
56 random_seed=1 | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
57 ): | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
58 self.input_seqs = input_seqs | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
59 self.input_seqs_rc = input_seqs_rc | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
60 self.input_labs = input_labs | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
61 self.batches = batches | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
62 self.random_seed = random_seed | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
63 | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
64 def __call__(self): | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
65 for batch in self.batches: | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
66 batch = sorted(batch) | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
67 batch_seqs, batch_seqs_rc, batch_labs = shuffle( | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
68 np.array(self.input_seqs[batch, ...]), | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
69 np.array(self.input_seqs_rc[batch, ...]), | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
70 np.array(self.input_labs[batch, ...]), | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
71 random_state=self.random_seed | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
72 ) | 
| 
 
b856d3d95413
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/decontaminator commit 3f8e87001f3dfe7d005d0765aeaa930225c93b72
 
iuc 
parents:  
diff
changeset
 | 
73 yield (batch_seqs, batch_seqs_rc), batch_labs | 
