view GeoNearestNeighbor.xml @ 5:e5fc90e44599 draft default tip

planemo upload for repository https://github.com/galaxyecology/tools-ecology/tree/master/tools/Ecoregionalization_workflow commit 9dee19b2d28b61a81f2a89d4c7d35678e31a9927
author ecology
date Wed, 23 Jul 2025 14:35:55 +0000
parents 1d225ca5fac0
children
line wrap: on
line source

<tool id="ecoregion_GeoNearestNeighbor" name="GeoNearestNeighbor" version="0.1.2+galaxy0" profile="23.2">
    <description>Merge two data tables by finding the closest points based on latitude and longitude coordinates (WGS84 projection).</description>
    <requirements>
       <requirement type="package" version="4.3.2">r-base</requirement>
       <requirement type="package" version="1.3.1">r-tidyr</requirement>
       <requirement type="package" version="1.0_15">r-sf</requirement>
       <requirement type="package" version="1.1.4">r-dplyr</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
        Rscript
         '$__tool_directory__/GeoNN.R'
         '$env_file.env'
         '$env_file.envgeolong'
         '$env_file.envgeolat'
         '$occ_file.occ'
         '$occ_file.occgeolat'
         '$occ_file.occgeolong'
    ]]></command>
    <inputs>
      <section name="env_file" title="Your environment file (or table 1)" >
        <param name="env" type="data" format="tabular" label="Input your environment data file (tabular format only)" help="See example below"/>
        <param name="envgeolat" type="data_column" label="Choose columns where your latitude is in your environment data file." data_ref="env" multiple="false" use_header_names="true"/>
        <param name="envgeolong" type="data_column" label="Choose columns where your longitude is in your environment data file." data_ref="env" multiple="false" use_header_names="true"/>
      </section>
      <section name="occ_file" title="Your occurrence file (or table 2)" >
        <param name="occ" type="data" format="tabular" label="Input your occurrence data file (tabular format only)" help="See example below"/>
        <param name="occgeolat" type="data_column" label="Choose columns where your latitude is in your occurrence data file." data_ref="occ" multiple="false" use_header_names="true"/>
         <param name="occgeolong" type="data_column" label="Choose columns where your longitude is in your occurrence data file." data_ref="occ" multiple="false" use_header_names="true"/>
      </section>
    </inputs>
    <outputs>
      <data name="occ_env_out" from_work_dir="occurrence_env.tsv" format="tabular" label="Merged table"/>
      <data name="info_out" from_work_dir="infos_file.tsv" format="tabular" label="Information file"/>
    </outputs>
    <tests>
        <test>
            <section name="env_file">
                <param name="env" value="ceamarc_env.tsv"/>
                <param name="envgeolat" value="2"/>
                <param name="envgeolong" value="1"/>
            </section>
            <section name="occ_file">
                <param name="occ" value="fish_wide.tsv"/>
                <param name="occgeolat" value="1"/>
                <param name="occgeolong" value="2"/>
            </section>
            <output name='occ_env_out'>
                <assert_contents>
            	    <has_size value="2234" delta="50"/>
            	</assert_contents>
            </output>
            <output name='info_out'>
                <assert_contents>
                    <has_n_columns n="3"/>   
                    <has_text text="occ_geometry"/>
                    <has_text text="env_geometry"/> 
                    <has_text text="Distances (meters)"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[

==================    
**What it does ?**
==================

This Galaxy tool allows you to merge two data tables (tabular format only) according to their latitude and longitude coordinates (in **WGS84** projection), finding the closest points.
This tool can be used as part of the Ecoregionalization workflow data preparation that allows you to create ecoregions from occurrence and environmental data. 

===================         
**How to use it ?**
===================
        
This tool takes in input the environmental data as well as the species occurrence data. See examples of inputs below. These files must be in tabular format. You also need to select the column(s) where your latitude and longitude parameters are located in both files.
To be as precise as possible, the latitude and longitude in both files must be of the same precision.

This tool gives in output two files: 

 - Information file conataining the coordinates of occurrence data, the coordinates retains from environemental data and the distances between the two. See example below.  
 - Occurrence and Environement merge file containing occurrence data and environmental data cooresponding. See example below.

#############################################################

**Example of occurence data input:**
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+----------+-----------+------------------------+-----------+
|   lat    |   long    |Acanthorhabdus_fragilis | Acarnidae |
+----------+-----------+------------------------+-----------+
|-67.22    |139,96     |           0            |     1     |
+----------+-----------+------------------------+-----------+
|-66,52    | 140       |           0            |     1     |
+----------+-----------+------------------------+-----------+
|   ...    |   ...     |          ...           |    ...    |
+----------+-----------+------------------------+-----------+ 

#############################################################

**Example of environmental data input:**
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+------+------+---------+------+--------------+-----+
| long | lat  |  Carbo  | Grav |  Maxbearing  | ... |
+------+------+---------+------+--------------+-----+
|140.13|-66.7 |   0.88  |28.59 |     3.67     | ... |
+------+------+---------+------+--------------+-----+
|140   |-66.52|   0.88  |28.61 |     3.64     | ... |
+------+------+---------+------+--------------+-----+
| ...  | ...  |   ...   | ...  |     ...      | ... |
+------+------+---------+------+--------------+-----+ 

#####################################################

**Example of information file output:**
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+-------------------+------------------+--------------------+
|   occ_geometry    |   env_geometry   | Distances (meters) |
+-------------------+------------------+--------------------+
| c(139.96, -67.22) | c(140.13, -66.7) |       58292.77     |
+-------------------+------------------+--------------------+
| c(140, -66.52)    | c(140, -66.52)   |          0         |
+-------------------+------------------+--------------------+
|   ...             |   ...            |         ...        |
+-------------------+------------------+--------------------+
 
#############################################################

**Example of Occurrence and environment merge file output:**
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+----------+-----------+------------------------+-----------+---------+------+--------------+-----+
|   lat    |   long    |Acanthorhabdus_fragilis | Acarnidae |  Carbo  | Grav |  Maxbearing  | ... |
+----------+-----------+------------------------+-----------+---------+------+--------------+-----+
|-67,22    |  139.96   |           0            |     1     |   0.88  |28.59 |     3.67     | ... |
+----------+-----------+------------------------+-----------+---------+------+--------------+-----+
|-66,52    |    140    |           0            |     1     |   0.88  |28.61 |     3.64     | ... |
+----------+-----------+------------------------+-----------+---------+------+--------------+-----+
|   ...    |   ...     |          ...           |    ...    |   ...   | ...  |     ...      | ... |
+----------+-----------+------------------------+-----------+---------+------+--------------+-----+
    ]]></help>
    <citations>
       <citation type="doi">10.32614/RJ-2018-009</citation>  
       <citation type="doi">10.32614/CRAN.package.dplyr</citation>
       <citation type="doi">10.32614/CRAN.package.sf</citation> 
    </citations>
</tool>