view seq_to_db.xml @ 2:dad2c4c3450a draft default tip

planemo upload for repository https://github.com/brsynth/galaxytools/tree/main/tools commit db4ac861e1d03fcdfe94321d858839124e493930-dirty
author tduigou
date Wed, 23 Jul 2025 09:44:33 +0000
parents 3daf04425ea1
children
line wrap: on
line source

<tool id="seq_to_db" name="Save Sequence Data In DB" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.09">
    <description>Save fragment's sequence in an accessible database and import it from .gb files</description>
    <macros>
        <token name="@VERSION_SUFFIX@">2</token>
        <token name="@TOOL_VERSION@">0.3.0</token>
    </macros>
    <requirements>
        <requirement type="package" version="2.2.3">pandas</requirement>
        <requirement type="package" version="2.0.40">sqlalchemy</requirement>
        <requirement type="package" version="2.9.9">psycopg2</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
        #set genbank_file_paths = ','.join([str(f) for f in $genbank_files])
        #set file_name_mapping = ",".join(["%s:%s" % (file.file_name, file.name) for file in $genbank_files])
        python '$__tool_directory__/save_to_db.py'
            --input '$genbank_file_paths'
            --use_json_paramers '$json_use.use_json_paramers'
            #if not $json_use.use_json_paramers:
                --sequence_column '$json_use.sequence_column'
                --annotation_column '$json_use.annotation_column'
                --db_uri '$json_use.db_uri' 
                --table '$json_use.table' 
                --fragment_column '$json_use.fragment_column'
                --execution_enable '$json_use.execution_enable'
            #else:
                --json_conf '$json_use.json_conf'
            #end if
            --output '$output'
            --file_name_mapping '$file_name_mapping'
            --json_generating '$json_generating'
            --json_output '$json_output'
    ]]></command>
        <inputs>
        <param name="genbank_files" type="data_collection" collection_type="list" format="genbank" label="GenBank File(s)"/>
        <conditional name="json_use">
            <param name="use_json_paramers" type="boolean" checked='false' label="Use parameters from JSON" help="extract parameters from json file" />
            <when value="false">
                <param name="execution_enable" type="boolean" checked='true' label="Send Requenst to DB" help="enable or desable the interaction with the DB (can be usefull in workflows)" />
                <param name="table" type="text" label="DB Table Name" optional="true" help="It can be extracted from JSON file -key:'table'-" />
                <param name="sequence_column" type="text" label="DB Column Contains Sequence For ganbank File" optional="true" help="It can be extracted from JSON file -key:'sequence_column'-" />
                <param name="annotation_column" type="text" label="DB Column Contains Annotation For Ganbank File" optional="true" help="It can be extracted from JSON file -key:'annotation_column'-" />
                <param name="fragment_column" type="text" label="DB IDs Column Name" optional="true" help="It can be extracted from JSON file -key:'fragment_column'-" />
                <param name="db_uri" type="text" label="DB Connection URI" help="postgresql://user_name:password@host:port/path/to/database (It can be extracted from JSON file -key:'db_uri'-)" optional="true" />
            </when>
            <when value="true">
                <param name="json_conf" type="data" format='json' label="DB config as a json file" help="JSON file specifying the database URI, table name and the column names for annotation and sequence data" optional="false" />
            </when>
        </conditional>
        <param name="json_generating" type="boolean" checked='true' label="Generat Parameters in JSON" help="generat parameters in json file" />
    </inputs>   
    <outputs>
        <data name="output" format="txt" label="saving report" />
        <data name="json_output" format="json" label="seq_to_db_param">
            <filter>json_generating == True</filter>
        </data>
    </outputs>
    <tests>
        <!--test tool blocking from galaxy. It is commented because it should faild as it is a test to validate that the tool can be blocked from json file -->
        <test expect_num_outputs="2"> 
            <param name="genbank_files">
                <conditional type="list">
                    <element name="p7_L7Ae-Weiss" value="p7_L7Ae-Weiss.gb" />
                    <element name="p7_gfp_sequence" value="p6_Nt-IgKLsequence.gb" />
                    <element name="p14_CMVp" value="p6_Kozak-ATG.gb" />
                    <element name="p16_bGHpolyA" value="p4_Kt-L7Ae-Weiss.gb" />
                    <element name="p18_CMVp" value="HC_Amp_ccdB.gb" />
                </conditional>
            </param>
            <conditional name="json_use">
                <param name='use_json_paramers' value='false' />
                <param name="table" value="sample" />
                <param name="sequence_column" value="sequence" />
                <param name="annotation_column" value="annotation" />
                <param name="fragment_column" value="fragment" />
                <param name="db_uri" value="postgresql://postgres:RK17@localhost:5432/test_fragments_db" />
                <param name="execution_enable" value="false" />
            </conditional>
            <param name="use_json_paramers" value='true'/>
            <output name="output" file="test_raport.txt" ftype="txt" >
                <assert_contents>
                     <has_n_lines n="0" />
                </assert_contents>
            </output>
            <output name="json_output">
                <assert_contents>
                     <has_n_lines n="8" />
                </assert_contents>
            </output>
        </test>
        <!--test tool blocking from JSON. It is commented because it should faild as it is a test to validate that the tool can be blocked from json file -->
        <test expect_num_outputs="1"> 
            <param name="genbank_files">
                <collection type="list">
                    <element name="p7_L7Ae-Weiss" value="p7_L7Ae-Weiss.gb" />
                    <element name="p7_gfp_sequence" value="p6_Nt-IgKLsequence.gb" />
                    <element name="p14_CMVp" value="p6_Kozak-ATG.gb" />
                    <element name="p16_bGHpolyA" value="p4_Kt-L7Ae-Weiss.gb" />
                    <element name="p18_CMVp" value="HC_Amp_ccdB.gb" />
                </collection>
            </param>
            <conditional name="json_use">
                <param name="use_json_paramers" value='true'/>
                <param name="json_conf" value="test-JSON_arg_block.json" />
            </conditional>
            <param name="json_generating" value='false'/>
            <param name="json_conf" value="test-JSON_arg_block.json" />
            <output name="output" file="test_raport.txt" ftype="txt" >
                <assert_contents>
                     <has_n_lines n="0" />
                </assert_contents>
            </output>
        </test>
        <!--Only 1 test can be execute because the fragment will be already saved for the second test and it will return error as the fragments are present in the DB (execut ../get_db_data/testMock.py to regenerate initial DB)-->
        <!--test DB config in the tool -->
        <test expect_num_outputs="2"> 
            <param name="genbank_files">
                <collection type="list">
                    <element name="p7_L7Ae-Weiss" value="p7_L7Ae-Weiss.gb" />
                    <element name="p7_gfp_sequence" value="p6_Nt-IgKLsequence.gb" />
                    <element name="p14_CMVp" value="p6_Kozak-ATG.gb" />
                    <element name="p16_bGHpolyA" value="p4_Kt-L7Ae-Weiss.gb" />
                    <element name="p18_CMVp" value="HC_Amp_ccdB.gb" />
                </collection>
            </param>
            <conditional name="json_use">
                <param name="use_json_paramers" value='false'/>
                <param name="table" value="sample" />
                <param name="sequence_column" value="sequence" />
                <param name="annotation_column" value="annotation" />
                <param name="fragment_column" value="fragment" />
                <param name="db_uri" value="postgresql://postgres:RK17@localhost:5432/test_fragments_db" />
                <param name="execution_enable" value="true" />
            </conditional>
            <param name="use_json_paramers" value='true'/>
            <output name="output" file="test_raport.txt" ftype="txt" >
                <assert_contents>
                     <has_n_lines n="5" />
                     <has_line_matching expression="p7_L7Ae-Weiss" />
                     <has_line_matching expression="p6_Nt-IgKLsequence" />
                     <has_line_matching expression="p6_Kozak-ATG" />
                     <has_line_matching expression="p4_Kt-L7Ae-Weiss" />
                     <has_line_matching expression="HC_Amp_ccdB" />
                </assert_contents>
            </output>
            <output name="json_output">
                <assert_contents>
                     <has_n_lines n="8" />
                </assert_contents>
            </output>
        </test>
        <!--test DB config from JSON. It is commented because the save can be done only on time then the fragment willl be in the DB and it will return a failure. to run the test comment the test above (one saving test in the run)-->
        <test expect_num_outputs="1"> 
            <param name="genbank_files">
                <collection type="list">
                    <element name="p7_L7Ae-Weiss" value="p7_L7Ae-Weiss.gb" />
                    <element name="p7_gfp_sequence" value="p6_Nt-IgKLsequence.gb" />
                    <element name="p14_CMVp" value="p6_Kozak-ATG.gb" />
                    <element name="p16_bGHpolyA" value="p4_Kt-L7Ae-Weiss.gb" />
                    <element name="p18_CMVp" value="HC_Amp_ccdB.gb" />
                </collection>
            </param>
            <conditional name="json_use">
                <param name="use_json_paramers" value='true'/>
                <param name="json_conf" value="test-JSON_arg.json" />
            </conditional>
            <param name="json_generating" value='false'/>
            <output name="output" file="test_raport.txt" ftype="txt" >
                <assert_contents>
                     <has_n_lines n="5" />
                     <has_line_matching expression="p7_L7Ae-Weiss" />
                     <has_line_matching expression="p6_Nt-IgKLsequence" />
                     <has_line_matching expression="p6_Kozak-ATG" />
                     <has_line_matching expression="p4_Kt-L7Ae-Weiss" />
                     <has_line_matching expression="HC_Amp_ccdB" />
                </assert_contents>
            </output>
        </test>
    </tests>

    <help><![CDATA[
Save Sequence Data In DB
========================

Implemented a system to save GenBank (.gb) files in an accessible DB, based on a connection via URI requests.

**Parameters**:
---------------
* **GenBank File(s)**: List of GenBaks files.
* **DB Table Name**: Name of the target table in the PostgreSQL database.
* **DB Column Contains Sequence For ganbank File**: Column storing sequence data, expected to start with "ORIGIN". 
* **DB Column Contains Annotation For Ganbank File**: Column containing annotation data, to save al part before "ORIGIN" in the .gb file.
* **DB IDs Column Name**: Column holding the unique fragment IDs.
* **DB Connection URI**: URI used to connect to the database (e.g., postgresql://postgres:pass@localhost:5432/test_fragments_db).
* **Send Requenst to DB**: Enable or Desable the interaction with the DB (can be usefull in workflows).
* **DB config as a json file**: JSON file contains the DB configuration:
    - "table": will be the key to the table name.
    - "sequence_column":  will be the key to the sequence column.
    - "annotation_column":  will be the key to the annotation column.
    - "fragment_column": will be the key to the fragment column.
    - "db_uri": will be the key to the URI.
    - "execution": It is the key to execute or block the tool during a workflow ("True" or "False").
    ]]></help>
    <citations>
        <citation type="bibtex">
            @unpublished{seq_to_db
                author = {Ramiz Khaled},
                title = {{seq_to_db}},
                url = {https://github.com/brsynth/},
            }
        </citation>
    </citations>
</tool>