Mercurial > repos > imgteam > curve_fitting
comparison curve_fitting.py @ 0:8bf2c507af3a draft
"planemo upload for repository https://github.com/BMCV/galaxy-image-analysis/tree/master/tools/curve_fitting/ commit ef82d0882741042922349499cafa35d20d70ce70"
| author | imgteam |
|---|---|
| date | Thu, 22 Jul 2021 19:34:36 +0000 |
| parents | |
| children | e0af18405e37 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:8bf2c507af3a |
|---|---|
| 1 """ | |
| 2 Copyright 2021 Biomedical Computer Vision Group, Heidelberg University. | |
| 3 Author: Qi Gao (qi.gao@bioquant.uni-heidelberg.de) | |
| 4 | |
| 5 Distributed under the MIT license. | |
| 6 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT | |
| 7 | |
| 8 """ | |
| 9 | |
| 10 import argparse | |
| 11 | |
| 12 import numpy as np | |
| 13 import pandas as pd | |
| 14 from scipy.optimize import least_squares | |
| 15 | |
| 16 | |
| 17 def compute_curve(x, par): | |
| 18 assert len(par) in [2, 3], 'The degree of curve must be 1 or 2!' | |
| 19 if len(par) == 3: | |
| 20 return par[0] * x + par[1] + par[2] * x ** 2 | |
| 21 elif len(par) == 2: | |
| 22 return par[0] * x + par[1] | |
| 23 | |
| 24 | |
| 25 def fitting_err(par, xx, seq, penalty): | |
| 26 assert penalty in ['abs', 'quadratic', 'student-t'], 'Unknown penalty function!' | |
| 27 curve = compute_curve(xx, par) | |
| 28 if penalty == 'abs': | |
| 29 err = np.sqrt(np.abs(curve - seq)) | |
| 30 elif penalty == 'quadratic': | |
| 31 err = (curve - seq) | |
| 32 elif penalty == 'student-t': | |
| 33 a = 1000 | |
| 34 b = 0.001 | |
| 35 err = np.sqrt(a * np.log(1 + (b * (curve - seq)) ** 2)) | |
| 36 return err | |
| 37 | |
| 38 | |
| 39 def curve_fitting(seq, degree=2, penalty='abs'): | |
| 40 assert len(seq) > 5, 'Data is too short for curve fitting!' | |
| 41 assert degree in [1, 2], 'The degree of curve must be 1 or 2!' | |
| 42 | |
| 43 par0 = [(seq[-1] - seq[0]) / len(seq), np.mean(seq[0:3])] | |
| 44 if degree == 2: | |
| 45 par0.append(-0.001) | |
| 46 | |
| 47 xx = np.array([i for i in range(len(seq))]) + 1 | |
| 48 x = np.array(par0, dtype='float64') | |
| 49 result = least_squares(fitting_err, x, args=(xx, seq, penalty)) | |
| 50 | |
| 51 return compute_curve(xx, result.x) | |
| 52 | |
| 53 | |
| 54 def curve_fitting_io(fn_in, fn_out, degree=2, penalty='abs', alpha=0.01): | |
| 55 # read all sheets | |
| 56 xl = pd.ExcelFile(fn_in) | |
| 57 nSpots = len(xl.sheet_names) | |
| 58 data_all = [] | |
| 59 for i in range(nSpots): | |
| 60 df = pd.read_excel(xl, xl.sheet_names[i]) | |
| 61 data_all.append(np.array(df)) | |
| 62 col_names = df.columns.tolist() | |
| 63 ncols_ori = len(col_names) | |
| 64 | |
| 65 # curve_fitting | |
| 66 diff = np.array([], dtype=('float64')) | |
| 67 for i in range(nSpots): | |
| 68 seq = data_all[i][:, -1] | |
| 69 seq_fit = seq.copy() | |
| 70 idx_valid = ~np.isnan(seq) | |
| 71 seq_fit[idx_valid] = curve_fitting(seq[idx_valid], degree=2, penalty='abs') | |
| 72 data_all[i] = np.concatenate((data_all[i], seq_fit.reshape(-1, 1)), axis=1) | |
| 73 if alpha > 0: | |
| 74 diff = np.concatenate((diff, seq_fit[idx_valid] - seq[idx_valid])) | |
| 75 | |
| 76 # add assistive curve | |
| 77 if alpha > 0: | |
| 78 sorted_diff = np.sort(diff) | |
| 79 fac = 1 - alpha / 2 | |
| 80 sig3 = sorted_diff[int(diff.size * fac)] | |
| 81 for i in range(nSpots): | |
| 82 seq_assist = data_all[i][:, -1] + sig3 | |
| 83 data_all[i] = np.concatenate((data_all[i], seq_assist.reshape(-1, 1)), axis=1) | |
| 84 | |
| 85 # write to file | |
| 86 with pd.ExcelWriter(fn_out) as writer: | |
| 87 for i in range(nSpots): | |
| 88 df = pd.DataFrame() | |
| 89 for c in range(ncols_ori): | |
| 90 df[col_names[c]] = data_all[i][:, c] | |
| 91 df['CURVE'] = data_all[i][:, ncols_ori] | |
| 92 if alpha > 0: | |
| 93 df['CURVE_A'] = data_all[i][:, ncols_ori + 1] | |
| 94 df.to_excel(writer, sheet_name=xl.sheet_names[i], index=False, float_format='%.2f') | |
| 95 writer.save() | |
| 96 | |
| 97 | |
| 98 if __name__ == "__main__": | |
| 99 parser = argparse.ArgumentParser(description="Fit (1st- or 2nd-degree) polynomial curves to data points") | |
| 100 parser.add_argument("fn_in", help="File name of input data points (xlsx)") | |
| 101 parser.add_argument("fn_out", help="File name of output fitted curves (xlsx)") | |
| 102 parser.add_argument("degree", type=int, help="Degree of the polynomial function") | |
| 103 parser.add_argument("penalty", help="Optimization objective for fitting") | |
| 104 parser.add_argument("alpha", type=float, help="Significance level for generating assistive curves") | |
| 105 args = parser.parse_args() | |
| 106 curve_fitting_io(args.fn_in, args.fn_out, args.degree, args.penalty, args.alpha) |
