Mercurial > repos > bgruening > sklearn_train_test_eval
annotate model_prediction.py @ 10:a9e0b963b7bb draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 208a8d348e7c7a182cfbe1b6f17868146428a7e2"
author | bgruening |
---|---|
date | Tue, 13 Apr 2021 22:04:06 +0000 |
parents | ead7adad8d0e |
children | caf7d2b71a48 |
rev | line source |
---|---|
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
1 import argparse |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
2 import json |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
3 import warnings |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
4 |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
5 import numpy as np |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
6 import pandas as pd |
10
a9e0b963b7bb
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 208a8d348e7c7a182cfbe1b6f17868146428a7e2"
bgruening
parents:
9
diff
changeset
|
7 from galaxy_ml.utils import get_module, load_model, read_columns, try_get_attr |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
8 from scipy.io import mmread |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
9 from sklearn.pipeline import Pipeline |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
10 |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
11 N_JOBS = int(__import__("os").environ.get("GALAXY_SLOTS", 1)) |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
12 |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
13 |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
14 def main( |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
15 inputs, |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
16 infile_estimator, |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
17 outfile_predict, |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
18 infile_weights=None, |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
19 infile1=None, |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
20 fasta_path=None, |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
21 ref_seq=None, |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
22 vcf_path=None, |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
23 ): |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
24 """ |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
25 Parameter |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
26 --------- |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
27 inputs : str |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
28 File path to galaxy tool parameter |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
29 |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
30 infile_estimator : strgit |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
31 File path to trained estimator input |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
32 |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
33 outfile_predict : str |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
34 File path to save the prediction results, tabular |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
35 |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
36 infile_weights : str |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
37 File path to weights input |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
38 |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
39 infile1 : str |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
40 File path to dataset containing features |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
41 |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
42 fasta_path : str |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
43 File path to dataset containing fasta file |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
44 |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
45 ref_seq : str |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
46 File path to dataset containing the reference genome sequence. |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
47 |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
48 vcf_path : str |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
49 File path to dataset containing variants info. |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
50 """ |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
51 warnings.filterwarnings("ignore") |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
52 |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
53 with open(inputs, "r") as param_handler: |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
54 params = json.load(param_handler) |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
55 |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
56 # load model |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
57 with open(infile_estimator, "rb") as est_handler: |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
58 estimator = load_model(est_handler) |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
59 |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
60 main_est = estimator |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
61 if isinstance(estimator, Pipeline): |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
62 main_est = estimator.steps[-1][-1] |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
63 if hasattr(main_est, "config") and hasattr(main_est, "load_weights"): |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
64 if not infile_weights or infile_weights == "None": |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
65 raise ValueError( |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
66 "The selected model skeleton asks for weights, " "but dataset for weights wan not selected!" |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
67 ) |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
68 main_est.load_weights(infile_weights) |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
69 |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
70 # handle data input |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
71 input_type = params["input_options"]["selected_input"] |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
72 # tabular input |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
73 if input_type == "tabular": |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
74 header = "infer" if params["input_options"]["header1"] else None |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
75 column_option = params["input_options"]["column_selector_options_1"]["selected_column_selector_option"] |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
76 if column_option in [ |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
77 "by_index_number", |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
78 "all_but_by_index_number", |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
79 "by_header_name", |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
80 "all_but_by_header_name", |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
81 ]: |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
82 c = params["input_options"]["column_selector_options_1"]["col1"] |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
83 else: |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
84 c = None |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
85 |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
86 df = pd.read_csv(infile1, sep="\t", header=header, parse_dates=True) |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
87 |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
88 X = read_columns(df, c=c, c_option=column_option).astype(float) |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
89 |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
90 if params["method"] == "predict": |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
91 preds = estimator.predict(X) |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
92 else: |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
93 preds = estimator.predict_proba(X) |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
94 |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
95 # sparse input |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
96 elif input_type == "sparse": |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
97 X = mmread(open(infile1, "r")) |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
98 if params["method"] == "predict": |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
99 preds = estimator.predict(X) |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
100 else: |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
101 preds = estimator.predict_proba(X) |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
102 |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
103 # fasta input |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
104 elif input_type == "seq_fasta": |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
105 if not hasattr(estimator, "data_batch_generator"): |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
106 raise ValueError( |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
107 "To do prediction on sequences in fasta input, " |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
108 "the estimator must be a `KerasGBatchClassifier`" |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
109 "equipped with data_batch_generator!" |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
110 ) |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
111 pyfaidx = get_module("pyfaidx") |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
112 sequences = pyfaidx.Fasta(fasta_path) |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
113 n_seqs = len(sequences.keys()) |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
114 X = np.arange(n_seqs)[:, np.newaxis] |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
115 seq_length = estimator.data_batch_generator.seq_length |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
116 batch_size = getattr(estimator, "batch_size", 32) |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
117 steps = (n_seqs + batch_size - 1) // batch_size |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
118 |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
119 seq_type = params["input_options"]["seq_type"] |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
120 klass = try_get_attr("galaxy_ml.preprocessors", seq_type) |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
121 |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
122 pred_data_generator = klass(fasta_path, seq_length=seq_length) |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
123 |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
124 if params["method"] == "predict": |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
125 preds = estimator.predict(X, data_generator=pred_data_generator, steps=steps) |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
126 else: |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
127 preds = estimator.predict_proba(X, data_generator=pred_data_generator, steps=steps) |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
128 |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
129 # vcf input |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
130 elif input_type == "variant_effect": |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
131 klass = try_get_attr("galaxy_ml.preprocessors", "GenomicVariantBatchGenerator") |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
132 |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
133 options = params["input_options"] |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
134 options.pop("selected_input") |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
135 if options["blacklist_regions"] == "none": |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
136 options["blacklist_regions"] = None |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
137 |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
138 pred_data_generator = klass(ref_genome_path=ref_seq, vcf_path=vcf_path, **options) |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
139 |
5
2b8406e74f9e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 5b2ac730ec6d3b762faa9034eddd19ad1b347476"
bgruening
parents:
1
diff
changeset
|
140 pred_data_generator.set_processing_attrs() |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
141 |
1
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
142 variants = pred_data_generator.variants |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
143 |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
144 # predict 1600 sample at once then write to file |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
145 gen_flow = pred_data_generator.flow(batch_size=1600) |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
146 |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
147 file_writer = open(outfile_predict, "w") |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
148 header_row = "\t".join(["chrom", "pos", "name", "ref", "alt", "strand"]) |
1
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
149 file_writer.write(header_row) |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
150 header_done = False |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
151 |
1
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
152 steps_done = 0 |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
153 |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
154 # TODO: multiple threading |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
155 try: |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
156 while steps_done < len(gen_flow): |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
157 index_array = next(gen_flow.index_generator) |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
158 batch_X = gen_flow._get_batches_of_transformed_samples(index_array) |
1
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
159 |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
160 if params["method"] == "predict": |
1
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
161 batch_preds = estimator.predict( |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
162 batch_X, |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
163 # The presence of `pred_data_generator` below is to |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
164 # override model carrying data_generator if there |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
165 # is any. |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
166 data_generator=pred_data_generator, |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
167 ) |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
168 else: |
1
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
169 batch_preds = estimator.predict_proba( |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
170 batch_X, |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
171 # The presence of `pred_data_generator` below is to |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
172 # override model carrying data_generator if there |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
173 # is any. |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
174 data_generator=pred_data_generator, |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
175 ) |
1
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
176 |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
177 if batch_preds.ndim == 1: |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
178 batch_preds = batch_preds[:, np.newaxis] |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
179 |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
180 batch_meta = variants[index_array] |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
181 batch_out = np.column_stack([batch_meta, batch_preds]) |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
182 |
1
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
183 if not header_done: |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
184 heads = np.arange(batch_preds.shape[-1]).astype(str) |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
185 heads_str = "\t".join(heads) |
1
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
186 file_writer.write("\t%s\n" % heads_str) |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
187 header_done = True |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
188 |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
189 for row in batch_out: |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
190 row_str = "\t".join(row) |
1
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
191 file_writer.write("%s\n" % row_str) |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
192 |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
193 steps_done += 1 |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
194 |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
195 finally: |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
196 file_writer.close() |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
197 # TODO: make api `pred_data_generator.close()` |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
198 pred_data_generator.close() |
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
199 return 0 |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
200 # end input |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
201 |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
202 # output |
1
cc49634df38f
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit ba6a47bdf76bbf4cb276206ac1a8cbf61332fd16"
bgruening
parents:
0
diff
changeset
|
203 if len(preds.shape) == 1: |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
204 rval = pd.DataFrame(preds, columns=["Predicted"]) |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
205 else: |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
206 rval = pd.DataFrame(preds) |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
207 |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
208 rval.to_csv(outfile_predict, sep="\t", header=True, index=False) |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
209 |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
210 |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
211 if __name__ == "__main__": |
0
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
212 aparser = argparse.ArgumentParser() |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
213 aparser.add_argument("-i", "--inputs", dest="inputs", required=True) |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
214 aparser.add_argument("-e", "--infile_estimator", dest="infile_estimator") |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
215 aparser.add_argument("-w", "--infile_weights", dest="infile_weights") |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
216 aparser.add_argument("-X", "--infile1", dest="infile1") |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
217 aparser.add_argument("-O", "--outfile_predict", dest="outfile_predict") |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
218 aparser.add_argument("-f", "--fasta_path", dest="fasta_path") |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
219 aparser.add_argument("-r", "--ref_seq", dest="ref_seq") |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
220 aparser.add_argument("-v", "--vcf_path", dest="vcf_path") |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
221 args = aparser.parse_args() |
68aaa903052a
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
bgruening
parents:
diff
changeset
|
222 |
9
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
223 main( |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
224 args.inputs, |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
225 args.infile_estimator, |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
226 args.outfile_predict, |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
227 infile_weights=args.infile_weights, |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
228 infile1=args.infile1, |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
229 fasta_path=args.fasta_path, |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
230 ref_seq=args.ref_seq, |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
231 vcf_path=args.vcf_path, |
ead7adad8d0e
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit e2a5eade6d0e5ddf3a47630381a0ad90d80e8a04"
bgruening
parents:
5
diff
changeset
|
232 ) |