changeset 0:ce46f2008024 draft default tip

planemo upload for repository https://github.com/bernt-matthias/mb-galaxy-tools/tools/tox_tools/baseline_calculator commit 008f820fb9b8ec547e00205f809f982b8f4b8318
author mbernt
date Tue, 09 Apr 2024 07:51:18 +0000
parents
children
files qsar1.py qsar1.xml test-data/functions.tabular test-data/qsar.tabular test-data/qsar_result.tsv test-data/qsar_result_2.tsv test-data/result.tsv test-data/table.tsv
diffstat 8 files changed, 129 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qsar1.py	Tue Apr 09 07:51:18 2024 +0000
@@ -0,0 +1,39 @@
+import argparse
+import re
+
+import pandas as pd
+
+parser = argparse.ArgumentParser(description='Calculate baseline toxicity for different aquatic species')
+parser.add_argument('--function', type=str, choices=['calculate_baseline', 'apply_linear_functions'],
+                    help='Function to execute')
+parser.add_argument('--csv_input', type=argparse.FileType('r'), help='Path to the input csv file')
+parser.add_argument('--functions_csv', type=argparse.FileType('r'), default=None,
+                    help='Path to the csv file containing functions (only for apply_linear_functions)')
+parser.add_argument('--output', type=argparse.FileType('w'), help='Path for the output csv file')
+args = parser.parse_args()
+
+if args.function == 'calculate_baseline':
+    df = pd.read_csv(args.csv_input)
+    df.iloc[:, 0] = df.iloc[:, 0].astype(int)
+    df['Caenorhabditis elegans [mol/L]'] = 10 ** (-(0.81 * df.iloc[:, 0] + 1.15))
+    df['Daphia magna [mol/L]'] = 10 ** (-(0.82 * df.iloc[:, 0] + 1.48))
+    df['Danio rerio [mol/L]'] = 10 ** (-(0.99 * df.iloc[:, 0] + 0.78))
+    df['Generic Human Cells [mol/L]'] = 0.026 / (10 ** df.iloc[:, 0]) * (1 + 10 ** (0.7 * df.iloc[:, 0] + 0.34) * 3 * 0.001 + 10 ** 3 * 0.07 * 0.001)
+    df.to_csv(args.output, index=False)
+
+elif args.function == 'apply_linear_functions':
+    df = pd.read_csv(args.csv_input)
+    functions_df = pd.read_csv(args.functions_csv)
+
+    def parse_and_apply_equation(equation, x_values):
+        # Extract 'a' and 'b' from the equation  (assuming the format 'ax+b' or 'ax-b')
+        pattern = re.compile(r'([+-]?\d*\.?\d*)x([+-]\d+)?')
+        match = pattern.search(equation)
+        a = float(match.group(1)) if match.group(1) not in ('', '+', '-') else 1.0
+        b = float(match.group(2)) if match.group(2) else 0
+        return a * x_values + b
+
+    for i, row in functions_df.iterrows():
+        func = row['function']
+        df[f'result_{i}'] = parse_and_apply_equation(func, df['logD'])
+    df.to_csv(args.output, index=False)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/qsar1.xml	Tue Apr 09 07:51:18 2024 +0000
@@ -0,0 +1,67 @@
+<tool id="tt_baseline" name="Baseline toxicity calculator" version="0.1.0+galaxy0">
+    <description>Toxicity prediction tool</description>
+    <creator>
+        <organization name="Helmholtz Centre for Environmental Research - UFZ, Department of Ecotoxicology" url ="https://www.ufz.de/index.php?en=34241"/>
+    </creator>
+    <requirements>
+        <requirement type="package" version="2.2.1">pandas</requirement>
+    </requirements>
+    <command detect_errors="aggressive"><![CDATA[
+    python '$__tool_directory__/qsar1.py'
+     --function '$function'
+     --csv_input '$csv_input'
+     #if $function == "apply_linear_functions"
+     --functions_csv '$function_csv'
+     #end if
+     --output '$output'
+    ]]></command>
+ <inputs>
+    <conditional name="function_cond">
+        <param name="function" type="select" label="Process to execute" help="Select pre-set QSAR models or personal linear functions">
+            <option value="calculate_baseline">Pre-defined QSARs</option>
+            <option value="apply_linear_functions">Personal Linear Functions</option>
+        </param>
+        <when value="calculate_baseline">
+        </when>
+        <when value="apply_linear_functions">
+            <param name="function_csv" type="data" label="Personal linear functions input" format="tabular" help="Input your logD data as tabular file"/>
+        </when>
+    </conditional>
+    <param name="csv_input" type="data" label="logD data input" format="tabular" help="Input your logD data as tabular file"/>
+</inputs>
+    <outputs>
+        <data name="output" format="tabular"/>
+    </outputs>
+        <tests>
+        <test>
+            <param name="function" value="calculate_baseline"/>
+            <param name="csv_input" value="qsar.tabular"/>
+            <output name="output" value="qsar_result.tsv" ftype="tabular"/>
+        </test>
+        <test>
+            <param name="function" value="apply_linear_functions"/>
+            <param name="csv_input" value="qsar.tabular"/>
+            <param name="function_csv" value="functions.tabular"/>
+            <output name="output" value="qsar_result_2.tsv" ftype="tabular"/>
+        </test>
+    </tests>
+    <help>
+        Features:
+        *calculate_baseline*
+        *** Estimate the EC50 for four different species. EC50 are expressed in mol/L ***
+        + Danio rerio log(1/EC50)= 0.99 * logDlipw + 0.78
+        + Caenorhabditis elegans log(1/EC50)= 0.81 * logDlipw + 1.15
+        + Daphnia magna log(1/EC50)= 0.82 * logDlipw + 1.48
+        + Generic Human Cell log(1/EC50)= 0.026 / (10**logDlipw) * (1 + 10**(0.7*logDlipw+0.34) * 3 * 0.001 + 10**(3) * 0.07 * 0.001)
+        *apply_linear_functions*
+        *** Estimate the EC50 or based on user input linear functions (assuming the format 'ax+b' or 'ax-b') ***
+
+        Input format: Tabular file with column  header and integers values for rows
+        Functions input: Tabular file with one column header and linear function in the format 'ax+b' or 'ax-b' for rows
+    </help>
+    <citations>
+    <citation type="doi">10.1021/acs.est.2c05217</citation>
+    <citation type="doi">10.1016/j.aquatox.2018.12.003</citation>
+    <citation type="doi">10.1016/S0048-9697(98)00157-0</citation>
+    </citations>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/functions.tabular	Tue Apr 09 07:51:18 2024 +0000
@@ -0,0 +1,4 @@
+function
+2x+5
+1.87x-0.5
+2.36x+1.02
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/qsar.tabular	Tue Apr 09 07:51:18 2024 +0000
@@ -0,0 +1,5 @@
+logD
+2
+1.36
+5
+3.6
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/qsar_result.tsv	Tue Apr 09 07:51:18 2024 +0000
@@ -0,0 +1,5 @@
+logD,Caenorhabditis elegans [mol/L],Daphia magna [mol/L],Danio rerio [mol/L],Generic Human Cells [mol/L]
+2.0,0.0016982436524617442,0.0007585775750291835,0.0017378008287493763,0.00032106418816089466
+1.0,0.01096478196143185,0.005011872336272725,0.016982436524617443,0.0028675252992991685
+5.0,6.309573444801917e-06,2.6302679918953817e-06,1.8620871366628656e-06,5.674481573167704e-06
+3.0,0.00026302679918953814,0.0001148153621496883,0.00017782794100389227,4.9302983886037675e-05
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/qsar_result_2.tsv	Tue Apr 09 07:51:18 2024 +0000
@@ -0,0 +1,5 @@
+logD,result_0,result_1,result_2
+2.0,9.0,3.74,5.72
+1.36,7.720000000000001,2.5432,4.2096
+5.0,15.0,9.350000000000001,12.799999999999999
+3.6,12.2,6.732,9.496
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/result.tsv	Tue Apr 09 07:51:18 2024 +0000
@@ -0,0 +1,2 @@
+Organism	log(1/EC)
+zebrafish	3.255
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/table.tsv	Tue Apr 09 07:51:18 2024 +0000
@@ -0,0 +1,2 @@
+Organism	log(1/EC)
+elegans	1.2309999999999999
\ No newline at end of file