Mercurial > repos > tduigou > save_to_db

diff seq_to_db.xml @ 0:7ff266aecf01 draft default tip
planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit 3401816c949b538bd9c67e61cbe92badff6a4007-dirty
author: tduigou
date: Wed, 11 Jun 2025 09:42:24 +0000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/seq_to_db.xml	Wed Jun 11 09:42:24 2025 +0000
@@ -0,0 +1,148 @@
+<tool id="seq_to_db" name="Save Sequence Data In DB" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.09">
+    <description>Save fragment's sequence in an accessible database and import it from .gb files</description>
+    <macros>
+        <token name="@VERSION_SUFFIX@">0</token>
+        <token name="@TOOL_VERSION@">0.1.0</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="2.2.3">pandas</requirement>
+        <requirement type="package" version="2.0.40">sqlalchemy</requirement>
+        <requirement type="package" version="2.9.9">psycopg2</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        #set genbank_file_paths = ','.join([str(f) for f in $genbank_files])
+        #set $file_name_mapping = ",".join(["%s:%s" % (file.file_name, file.name) for file in $genbank_files])
+        python '$__tool_directory__/save_to_db.py'
+            --input '$genbank_file_paths' 
+            --sequence_column '$sequence_column'
+            --annotation_column '$annotation_column'
+            --db_uri '$db_uri' 
+            --table '$table' 
+            --fragment_column '$fragment_column'
+            --output '$output'
+            --file_name_mapping '$file_name_mapping'
+            --json_conf '$json_conf'
+    ]]></command>
+    <inputs>
+        <param name="genbank_files" type="data_collection" collection_type="list" format="genbank" label="GenBank File(s)"/>
+        <param name="table" type="text" label="DB Table Name" optional="true" help="It can be extracted from JSON file -key:'JSON_table'-" />
+        <param name="sequence_column" type="text" label="DB Column Contains Sequence For ganbank File" optional="true" help="It can be extracted from JSON file -key:'JSON_sequence_column'-" />
+        <param name="annotation_column" type="text" label="DB Column Contains Annotation For Ganbank File" optional="true" help="It can be extracted from JSON file -key:'JSON_annotation_column'-" />
+        <param name="fragment_column" type="text" label="DB IDs Column Name" optional="true" help="It can be extracted from JSON file -key:'JSON_fragment_column'-" />
+        <param name="db_uri" type="text" label="DB Connection URI" help="postgresql://container_name:password@host:port/path/to/database (It can be extracted from JSON file -key:'JSON_db_uri'-)" optional="true" />
+        <section name='adv' title='Advance' expanded='false'>
+            <param name="json_conf" type="data" format='json' label="DB config as a json file" help="JSON file specifying the database URI, table name and the column names for annotation and sequence data" optional="true" />
+        </section>
+    </inputs>   
+    <outputs>
+        <data name="output" format="txt" label="saving report" />
+    </outputs>
+    <tests>
+    <!--Only 1 test can be execute because the fragment will be already saved for the second test and it will return error as the fragments are present in the DB (execut ../get_db_data/testMock.py to regenerate initial DB)-->
+        <!--test tool blocking from JSON. It is commented because it should faild as it is a test to validate that the tool can be blocked from json file -->
+        <test> 
+            <param name="genbank_files">
+                <collection type="list">
+                    <element name="p7_L7Ae-Weiss" value="p7_L7Ae-Weiss.gb" />
+                    <element name="p7_gfp_sequence" value="p6_Nt-IgKLsequence.gb" />
+                    <element name="p14_CMVp" value="p6_Kozak-ATG.gb" />
+                    <element name="p16_bGHpolyA" value="p4_Kt-L7Ae-Weiss.gb" />
+                    <element name="p18_CMVp" value="HC_Amp_ccdB.gb" />
+                </collection>
+            </param>
+            <param name="adv|json_conf" value="test-JSON_arg_block.json" />
+            <output name="output" file="test_raport.txt" ftype="txt" >
+                <assert_contents>
+                     <has_n_lines n="5" />
+                     <has_line_matching expression="p7_L7Ae-Weiss" />
+                     <has_line_matching expression="p6_Nt-IgKLsequence" />
+                     <has_line_matching expression="p6_Kozak-ATG" />
+                     <has_line_matching expression="p4_Kt-L7Ae-Weiss" />
+                     <has_line_matching expression="HC_Amp_ccdB" />
+                </assert_contents>
+            </output>
+        </test>
+        <!--test DB config in the tool -->
+        <test> 
+            <param name="genbank_files">
+                <collection type="list">
+                    <element name="p7_L7Ae-Weiss" value="p7_L7Ae-Weiss.gb" />
+                    <element name="p7_gfp_sequence" value="p6_Nt-IgKLsequence.gb" />
+                    <element name="p14_CMVp" value="p6_Kozak-ATG.gb" />
+                    <element name="p16_bGHpolyA" value="p4_Kt-L7Ae-Weiss.gb" />
+                    <element name="p18_CMVp" value="HC_Amp_ccdB.gb" />
+                </collection>
+            </param>
+            <param name="table" value="sample" />
+            <param name="sequence_column" value="sequence" />
+            <param name="annotation_column" value="annotation" />
+            <param name="fragment_column" value="fragment" />
+            <param name="db_uri" value="postgresql://postgres:RK17@localhost:5432/test_fragments_db" />
+            <output name="output" file="test_raport.txt" ftype="txt" >
+                <assert_contents>
+                     <has_n_lines n="5" />
+                     <has_line_matching expression="p7_L7Ae-Weiss" />
+                     <has_line_matching expression="p6_Nt-IgKLsequence" />
+                     <has_line_matching expression="p6_Kozak-ATG" />
+                     <has_line_matching expression="p4_Kt-L7Ae-Weiss" />
+                     <has_line_matching expression="HC_Amp_ccdB" />
+                </assert_contents>
+            </output>
+        </test>
+        <!--test DB config from JSON. It is commented because the save can be done only on time then the fragment willl be in the DB and it will return a failure. to run the test comment the test above (one saving test in the run)-->
+        <test> 
+            <param name="genbank_files">
+                <collection type="list">
+                    <element name="p7_L7Ae-Weiss" value="p7_L7Ae-Weiss.gb" />
+                    <element name="p7_gfp_sequence" value="p6_Nt-IgKLsequence.gb" />
+                    <element name="p14_CMVp" value="p6_Kozak-ATG.gb" />
+                    <element name="p16_bGHpolyA" value="p4_Kt-L7Ae-Weiss.gb" />
+                    <element name="p18_CMVp" value="HC_Amp_ccdB.gb" />
+                </collection>
+            </param>
+            <param name="adv|json_conf" value="test-JSON_arg.json" />
+            <output name="output" file="test_raport.txt" ftype="txt" >
+                <assert_contents>
+                     <has_n_lines n="5" />
+                     <has_line_matching expression="p7_L7Ae-Weiss" />
+                     <has_line_matching expression="p6_Nt-IgKLsequence" />
+                     <has_line_matching expression="p6_Kozak-ATG" />
+                     <has_line_matching expression="p4_Kt-L7Ae-Weiss" />
+                     <has_line_matching expression="HC_Amp_ccdB" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+Save Sequence Data In DB
+========================
+
+Implemented a system to save GenBank (.gb) files in an accessible DB, based on a connection via URI requests.
+
+**Parameters**:
+---------------
+* **GenBank File(s)**: List of GenBaks files.
+* **DB Table Name**: Name of the target table in the PostgreSQL database.
+* **DB Column Contains Sequence For ganbank File**: Column storing sequence data, expected to start with "ORIGIN". 
+* **DB Column Contains Annotation For Ganbank File**: Column containing annotation data, to save al part before "ORIGIN" in the .gb file.
+* **DB IDs Column Name**: Column holding the unique fragment IDs.
+* **DB Connection URI**: URI used to connect to the database (e.g., postgresql://postgres:pass@localhost:5432/test_fragments_db).
+* **DB config as a json file**: JSON file contains the DB configuration:
+    - "JSON_table": will be the key to the table name.
+    - "JSON_sequence_column":  will be the key to the sequence column.
+    - "JSON_annotation_column":  will be the key to the annotation column.
+    - "JSON_fragment_column": will be the key to the fragment column.
+    - "JSON_db_uri": will be the key to the URI.
+    - "execution": It is the key to execute or block the tool during a workflow ("True" or "False").
+    ]]></help>
+    <citations>
+        <citation type="bibtex">
+            @unpublished{seq_to_db
+                author = {Ramiz Khaled},
+                title = {{seq_to_db}},
+                url = {https://github.com/brsynth/},
+            }
+        </citation>
+    </citations>
+</tool>
\ No newline at end of file
author	tduigou
date	Wed, 11 Jun 2025 09:42:24 +0000
parents
children