# HG changeset patch # User mbernt # Date 1712649078 0 # Node ID ce46f200802454df5c892abafecfe8cc8fcd665f planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tools/tox_tools/baseline_calculator commit 008f820fb9b8ec547e00205f809f982b8f4b8318 diff -r 000000000000 -r ce46f2008024 qsar1.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qsar1.py Tue Apr 09 07:51:18 2024 +0000 @@ -0,0 +1,39 @@ +import argparse +import re + +import pandas as pd + +parser = argparse.ArgumentParser(description='Calculate baseline toxicity for different aquatic species') +parser.add_argument('--function', type=str, choices=['calculate_baseline', 'apply_linear_functions'], + help='Function to execute') +parser.add_argument('--csv_input', type=argparse.FileType('r'), help='Path to the input csv file') +parser.add_argument('--functions_csv', type=argparse.FileType('r'), default=None, + help='Path to the csv file containing functions (only for apply_linear_functions)') +parser.add_argument('--output', type=argparse.FileType('w'), help='Path for the output csv file') +args = parser.parse_args() + +if args.function == 'calculate_baseline': + df = pd.read_csv(args.csv_input) + df.iloc[:, 0] = df.iloc[:, 0].astype(int) + df['Caenorhabditis elegans [mol/L]'] = 10 ** (-(0.81 * df.iloc[:, 0] + 1.15)) + df['Daphia magna [mol/L]'] = 10 ** (-(0.82 * df.iloc[:, 0] + 1.48)) + df['Danio rerio [mol/L]'] = 10 ** (-(0.99 * df.iloc[:, 0] + 0.78)) + df['Generic Human Cells [mol/L]'] = 0.026 / (10 ** df.iloc[:, 0]) * (1 + 10 ** (0.7 * df.iloc[:, 0] + 0.34) * 3 * 0.001 + 10 ** 3 * 0.07 * 0.001) + df.to_csv(args.output, index=False) + +elif args.function == 'apply_linear_functions': + df = pd.read_csv(args.csv_input) + functions_df = pd.read_csv(args.functions_csv) + + def parse_and_apply_equation(equation, x_values): + # Extract 'a' and 'b' from the equation (assuming the format 'ax+b' or 'ax-b') + pattern = re.compile(r'([+-]?\d*\.?\d*)x([+-]\d+)?') + match = pattern.search(equation) + a = float(match.group(1)) if match.group(1) not in ('', '+', '-') else 1.0 + b = float(match.group(2)) if match.group(2) else 0 + return a * x_values + b + + for i, row in functions_df.iterrows(): + func = row['function'] + df[f'result_{i}'] = parse_and_apply_equation(func, df['logD']) + df.to_csv(args.output, index=False) diff -r 000000000000 -r ce46f2008024 qsar1.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qsar1.xml Tue Apr 09 07:51:18 2024 +0000 @@ -0,0 +1,67 @@ + + Toxicity prediction tool + + + + + pandas + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Features: + *calculate_baseline* + *** Estimate the EC50 for four different species. EC50 are expressed in mol/L *** + + Danio rerio log(1/EC50)= 0.99 * logDlipw + 0.78 + + Caenorhabditis elegans log(1/EC50)= 0.81 * logDlipw + 1.15 + + Daphnia magna log(1/EC50)= 0.82 * logDlipw + 1.48 + + Generic Human Cell log(1/EC50)= 0.026 / (10**logDlipw) * (1 + 10**(0.7*logDlipw+0.34) * 3 * 0.001 + 10**(3) * 0.07 * 0.001) + *apply_linear_functions* + *** Estimate the EC50 or based on user input linear functions (assuming the format 'ax+b' or 'ax-b') *** + + Input format: Tabular file with column header and integers values for rows + Functions input: Tabular file with one column header and linear function in the format 'ax+b' or 'ax-b' for rows + + + 10.1021/acs.est.2c05217 + 10.1016/j.aquatox.2018.12.003 + 10.1016/S0048-9697(98)00157-0 + + \ No newline at end of file diff -r 000000000000 -r ce46f2008024 test-data/functions.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/functions.tabular Tue Apr 09 07:51:18 2024 +0000 @@ -0,0 +1,4 @@ +function +2x+5 +1.87x-0.5 +2.36x+1.02 \ No newline at end of file diff -r 000000000000 -r ce46f2008024 test-data/qsar.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/qsar.tabular Tue Apr 09 07:51:18 2024 +0000 @@ -0,0 +1,5 @@ +logD +2 +1.36 +5 +3.6 \ No newline at end of file diff -r 000000000000 -r ce46f2008024 test-data/qsar_result.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/qsar_result.tsv Tue Apr 09 07:51:18 2024 +0000 @@ -0,0 +1,5 @@ +logD,Caenorhabditis elegans [mol/L],Daphia magna [mol/L],Danio rerio [mol/L],Generic Human Cells [mol/L] +2.0,0.0016982436524617442,0.0007585775750291835,0.0017378008287493763,0.00032106418816089466 +1.0,0.01096478196143185,0.005011872336272725,0.016982436524617443,0.0028675252992991685 +5.0,6.309573444801917e-06,2.6302679918953817e-06,1.8620871366628656e-06,5.674481573167704e-06 +3.0,0.00026302679918953814,0.0001148153621496883,0.00017782794100389227,4.9302983886037675e-05 diff -r 000000000000 -r ce46f2008024 test-data/qsar_result_2.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/qsar_result_2.tsv Tue Apr 09 07:51:18 2024 +0000 @@ -0,0 +1,5 @@ +logD,result_0,result_1,result_2 +2.0,9.0,3.74,5.72 +1.36,7.720000000000001,2.5432,4.2096 +5.0,15.0,9.350000000000001,12.799999999999999 +3.6,12.2,6.732,9.496 diff -r 000000000000 -r ce46f2008024 test-data/result.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/result.tsv Tue Apr 09 07:51:18 2024 +0000 @@ -0,0 +1,2 @@ +Organism log(1/EC) +zebrafish 3.255 \ No newline at end of file diff -r 000000000000 -r ce46f2008024 test-data/table.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/table.tsv Tue Apr 09 07:51:18 2024 +0000 @@ -0,0 +1,2 @@ +Organism log(1/EC) +elegans 1.2309999999999999 \ No newline at end of file