view larch_athena.xml @ 5:27015eaf9a78 draft

planemo upload for repository https://github.com/MaterialsGalaxy/larch-tools/tree/main/larch_athena commit 47b59eb1f13fe9f4893acbf9e3e77341d303de73
author muon-spectroscopy-computational-project
date Mon, 20 May 2024 15:34:58 +0000
parents a0d3b0fe0fa3
children 30cdfd70f28d
line wrap: on
line source

<tool id="larch_athena" name="Larch Athena" version="@TOOL_VERSION@+galaxy@WRAPPER_VERSION@" python_template_version="3.5" profile="22.05" license="MIT">
    <description>generate Athena projects from XAFS data</description>
    <macros>
        <!-- version of underlying tool (PEP 440) -->
        <token name="@TOOL_VERSION@">0.9.75</token>
        <!-- version of this tool wrapper (integer) -->
        <token name="@WRAPPER_VERSION@">1</token>
        <!-- citation should be updated with every underlying tool version -->
        <!-- typical fields to update are version, month, year, and doi -->
        <token name="@TOOL_CITATION@">10.1088/1742-6596/430/1/012007</token>
        <xml name="format">
            <param name="format" type="select" display="radio" label="Input format" help="Whether data is in tabular or NeXus (h5) format, or has already saved as an Athena project">
                <option value="plaintext" selected="true">NeXus/tabular</option>
                <option value="athena">Athena project</option>
            </param> 
        </xml>
        <xml name="extract_group">
            <conditional name="extract_group">
                <param name="extract_group" type="select" label="Group extraction" help="Method of handling group extraction. Extracting all or multiple named groups will result in multiple outputs, unless merging groups is also true.">
                    <option value="single" selected="true">Extract single</option>
                    <option value="multiple">Extract multiple</option>
                    <option value="all">Extract all</option>
                </param>
                <when value="single">
                    <param name="group_name" type="text" optional="true" label="Group label" help="Which group to extract and process from the Athena project (will use first group in file if unset)"/>
                </when>
                <when value="multiple">
                    <repeat name="multiple" min="1" default="1" title="Groups">
                        <param name="group_name" type="text" label="Group label" help="Which group to extract and process from the Athena project (will use first group in file if unset)"/>
                    </repeat>
                </when>
                <when value="all"/>
            </conditional>
        </xml>
        <xml name="columns">
            <conditional name="energy_column">
                <param name="energy_column" type="select" label="Energy column" help="If set, this column will be used as 'energy'. Otherwise, will identify the first column ending with 'energy' or labelled 'col1' 'Ef'.">
                    <option value="auto" selected="true">Automatic</option>
                    <option value="qexafs_energy" selected="true">qexafs_energy</option>
                    <option value="Ef" selected="true">Ef</option>
                    <option value="other" selected="true">Other</option>
                </param>
                <when value="auto"/>
                <when value="qexafs_energy"/>
                <when value="Ef"/>
                <when value="other">
                    <param name="energy_column_text" type="text" label="Energy column" help="The column to be used as 'energy'."/>
                </when>
            </conditional>
            <conditional name="mu_column">
                <param name="mu_column" type="select" label="μ column" help="If set, this column will be used as 'mu'. Otherwise, will identify the first column labelled as either 'col2', 'xmu', 'lni0it', 'FFI0' or 'FF/I1'.">
                    <option value="auto" selected="true">Automatic</option>
                    <option value="lnI0It" selected="true">lnI0It</option>
                    <option value="FFI0" selected="true">FFI0</option>
                    <option value="FF/I1" selected="true">FF/I1</option>
                    <option value="other" selected="true">Other</option>
                </param>
                <when value="auto"/>
                <when value="lnI0It"/>
                <when value="FFI0"/>
                <when value="FF/I1"/>
                <when value="other">
                    <param name="mu_column_text" type="text" optional="true" label="μ column" help="The column to be used as 'mu'."/>
                </when>
            </conditional>
        </xml>
        <xml name="is_zipped">
            <param name="is_zipped" type="select" display="radio" label="Inputs Zipped" help="Whether input files are zipped together into one directory, or not.">
                <option value="" selected="true">No</option>
                <option value="true">Yes</option>
            </param>
        </xml>
        <import>macros.xml</import>
    </macros>
    <creator>
        <person givenName="Patrick" familyName="Austin" url="https://github.com/patrick-austin" identifier="https://orcid.org/0000-0002-6279-7823"/>
    </creator>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">xraylarch</requirement>
        <requirement type="package" version="3.5.2">matplotlib</requirement>
        <requirement type="package" version="3.0">zip</requirement>
        <requirement type="package" version="6.0">unzip</requirement>
    </requirements>
    <required_files>
        <include type="literal" path="larch_athena.py"/>
        <include type="literal" path="common.py"/>
    </required_files>
    <command detect_errors="exit_code"><![CDATA[
        mkdir prj plot
        #if $merge_inputs.format.format=="plaintext":
            #if $merge_inputs.format.is_zipped.is_zipped=="true":
                && echo Unzipping '$merge_inputs.format.is_zipped.dat_file.name'
                && unzip '$merge_inputs.format.is_zipped.dat_file' -d dat_files
                && python '${__tool_directory__}/larch_athena.py' dat_files '$inputs'
            #else
                && python '${__tool_directory__}/larch_athena.py' '$merge_inputs.format.is_zipped.dat_file' '$inputs'
            #end if
        #else
            && python '${__tool_directory__}/larch_athena.py' '$merge_inputs.format.dat_file' '$inputs'
        #end if
        #if $zip_outputs:
            && zip out_zip.zip prj/* edge/* flat/* derivative/*
        #end if
    ]]></command>
    <configfiles>
        <inputs name="inputs"/>
    </configfiles>
    <inputs>
        <conditional name="merge_inputs" >
            <param name="merge_inputs" type="select" display="radio" label="Merge multiple inputs" help="Whether to merge all input data into one Athena project, by default each dataset or file in a zip will result in a separate output.">
                <option value="" selected="true">No</option>
                <option value="true">Yes</option>
            </param>
            <when value="">
                <conditional name="format" >
                    <expand macro="format"/>
                    <when value="plaintext">
                        <conditional name="is_zipped" >
                            <expand macro="is_zipped"/>
                            <when value="">
                                <param name="dat_file" type="data" format="h5,tabular" label="XAFS data file" help="X-ray Absorption Fine Structure (XAFS) data, either in NeXus (h5) or tabular format."/>
                            </when>
                            <when value="true">
                                <param name="dat_file" type="data" format="zip" label="Zipped XAFS data files" help="Zipped X-ray Absorption Fine Structure (XAFS) data, either in NeXus (h5) or tabular format."/>
                            </when>
                        </conditional>
                        <expand macro="columns"/>
                    </when>
                    <when value="athena">
                        <param name="dat_file" type="data" format="prj" label="Athena project" help="X-ray Absorption Spectroscopy (XAS) data, in Athena project format"/>
                        <expand macro="extract_group"/>
                    </when>
                </conditional>
            </when>
            <when value="true">
                <conditional name="format" >
                    <expand macro="format"/>
                    <when value="plaintext">
                        <conditional name="is_zipped" >
                            <expand macro="is_zipped"/>
                            <when value="">
                                <param name="dat_file" type="data" format="h5,txt" multiple="true" label="XAFS data file" help="X-ray Absorption Fine Structure (XAFS) data, either in NeXus (h5) or tabular format, which will be merged. Accepts individual files or a zip."/>
                            </when>
                            <when value="true">
                                <param name="dat_file" type="data" format="zip" label="Zipped XAFS data files" help="Zipped X-ray Absorption Fine Structure (XAFS) data, either in NeXus (h5) or tabular format. All files in the zip will be merged."/>
                            </when>
                        </conditional>
                        <expand macro="columns"/>
                    </when>
                    <when value="athena">
                        <param name="dat_file" type="data" format="prj" multiple="true" label="Athena project" help="X-ray Absorption Spectroscopy (XAS) data, which will be merged, in Athena project format. Note that when merging Athena groups, background parameters set in file will be lost, and automatic default used unless defined below."/>
                        <expand macro="extract_group"/>
                    </when>
                </conditional>
            </when>
        </conditional>
        <param name="annotation" type="text" label="Annotation" optional="true" help="If set, will annotate the output project(s) with this string. This will be used to generate legends when plotting data."/>
        <section name="processing" expanded="true" title="Processing Options" help="By default, the following processing steps will be performed either with default values, or those contained in the input Athena project (if used). If specified here, these values will be used instead for process in sequence.">
            <conditional name="calibrate">
                <param name="calibrate" type="select" display="radio" label="Calibrate energy" help="If set, will shift the spectrum so that its (automatically determined) edge occurs at the specified value, and any values outside the range will be discarded.">
                    <option value="" selected="true">False</option>
                    <option value="true">True</option>
                </param>
                <when value=""/>
                <when value="true">
                    <param name="calibration_e0" type="float" label="Calibration energy (eV)" optional="true" help="If set, data will be calibrated so that the edge occurs at this energy (after merging, if relevant). Note that this is different from specifying E0 in the pre-edge normalization, as this will the x-axis of the data (so that the Nth x point will no longer align with the Nth y point)."/>
                    <param name="energy_min" type="float" label="Minimum energy (eV)" optional="true" help="If set, data will be cropped below this value in electron volts (after re-calibrating)."/>
                    <param name="energy_max" type="float" label="Maximum energy (eV)" optional="true" help="If set, data will be cropped above this value in electron volts (after re-calibrating)."/>
                </when>
            </conditional>
            <param name="rebin" type="boolean" label="Re-bin data" help="Whether to re-bin along the energy axis to automatically ensure appropriate levels of precision in the pre-edge, near-edge and extended region of the spectrum."/>
            <conditional name="pre_edge">
                <param name="pre_edge" type="select" display="radio" label="Pre-edge normalization" help="If set, will (re)perform forward pre-edge normalization using provided values.">
                    <option value="" selected="true">Use default values</option>
                    <option value="ref">Use reference channel</option>
                    <option value="manual">Use manual values</option>
                </param>
                <when value=""/>
                <when value="ref">
                    <param name="ref_channel" type="text" label="Reference channel" help="The channel to be used as a refernce to determine the pre-edge settings which are then used for the main signal. This can help when the main signal is affected by signal noise."/>
                </when>
                <when value="manual">
                    <param argument="e0" type="float" label="Edge energy (eV)" optional="true" help="If set, normalization will use this as the location of the edge rather than automatically determining it."/>
                    <param argument="pre1" type="float" max="0" label="Pre-edge fit lower energy (eV)" optional="true" help="The lower end of the region used for the pre-edge fitting, relative to the edge energy (and therefore negative)."/>
                    <param argument="pre2" type="float" max="0" label="Pre-edge fit upper energy (eV)" optional="true" help="The upper end of the region used for the pre-edge fitting, relative to the edge energy (and therefore negative)."/>
                    <param argument="norm1" type="float" min="0" label="Post-edge fit lower energy (eV)" optional="true" help="The lower end of the region used for the post-edge fitting, relative to the edge energy (and therefore positive)."/>
                    <param argument="norm2" type="float" min="0" label="Post-edge fit upper energy (eV)" optional="true" help="The upper end of the region used for the post-edge fitting, relative to the edge energy (and therefore positive)."/>
                    <param argument="nnorm" type="integer" min="0" max="5" label="Post-edge fit polynomial degree" optional="true" help="The degree of the polynomial used to fit in the post-edge region."/>
                    <param argument="step" type="float" min="0" label="Edge step" optional="true" help="Magnitude of the step in μ between the pre and post-edge regions at the edge energy."/>
                    <param argument="nvict" type="integer" label="Energy exponent" optional="true" help="Edge fitting will be performed against μ*E**n where n is defined here. This is 0 by default."/>
                </when>
            </conditional>
            <conditional name="xftf">
                <param name="xftf" type="select" display="radio" label="XFTF" help="If set, will (re)perform forward Fourier Transform using provided values.">
                    <option value="" selected="true">False</option>
                    <option value="true">True</option>
                </param>
                <when value=""/>
                <when value="true">
                    <param argument="kmin" type="float" optional="true" min="0.0" help="Minimum k value."/>
                    <param argument="kmax" type="float" optional="true" min="0.0" help="Maximum k value."/>
                    <param argument="kweight" type="float" optional="true" help="Exponent for weighting spectra by raising k to this power."/>
                    <param argument="dk" type="float" optional="true" help="Tapering parameter for Fourier Transform window."/>
                    <param argument="window" type="select" optional="true" help="Fourier Transform window type.">
                        <option value="hanning">Hanning (cosine-squared taper)</option>
                        <option value="parzen">Parzen (linear taper)</option>
                        <option value="welch">Welch (quadratic taper)</option>
                        <option value="gaussian">Gaussian function window</option>
                        <option value="sine">Sine function window</option>
                        <option value="kaiser">Kaiser-Bessel function-derived window</option>
                    </param>
                </when>
            </conditional>
        </section>
        <param name="plot_graph" type="select" multiple="true" display="checkboxes" label="Plot graphs">
            <option value="edge">Pre/post edge fitting</option>
            <option value="flat">Flattened xμ</option>
            <option value="dmude">Derivative of xμ</option>
        </param>
        <param name="zip_outputs" type="boolean" label="Zip outputs" help="Whether to zip all outputs into one dataset."/>
    </inputs>
    <outputs>
        <!-- Single output if zipping outputs -->
        <data name="out_zip" format="zip" from_work_dir="out_zip.zip" label="Zipped Athena project(s) ${annotation} ${on_string}">
            <filter>zip_outputs</filter>
        </data>
        <!-- Single outputs of different types if merging, or not using a zip -->
        <data name="athena_project_file" format="prj" from_work_dir="prj/out.prj" label="Athena project ${annotation} ${on_string}">
            <filter>not zip_outputs</filter>
            <filter>not (merge_inputs["merge_inputs"] == "" and ((merge_inputs["format"]["format"] == "plaintext" and merge_inputs["format"]["is_zipped"]["is_zipped"]) or (merge_inputs["format"]["format"] == "athena" and merge_inputs["format"]["extract_group"]["extract_group"] != "single")))</filter>
        </data>
        <data name="plot" format="png" from_work_dir="plot/out.png" label="Plot of ${annotation} ${on_string}">
            <filter>plot_graph</filter>
            <filter>not zip_outputs</filter>
            <filter>not (merge_inputs["merge_inputs"] == "" and ((merge_inputs["format"]["format"] == "plaintext" and merge_inputs["format"]["is_zipped"]["is_zipped"]) or (merge_inputs["format"]["format"] == "athena" and merge_inputs["format"]["extract_group"]["extract_group"] != "single")))</filter>
        </data>
        <!-- Directories of outputs if using single, non-merged zip as input or extracting multiple/all Athena groups -->
        <collection name="athena_project_file_collection" format="prj" type="list" label="Athena projects ${annotation} ${on_string}">
            <discover_datasets pattern="__name_and_ext__" directory="prj"/>
            <filter>not zip_outputs</filter>
            <filter>merge_inputs["merge_inputs"] == "" and ((merge_inputs["format"]["format"] == "plaintext" and merge_inputs["format"]["is_zipped"]["is_zipped"]) or (merge_inputs["format"]["format"] == "athena" and merge_inputs["format"]["extract_group"]["extract_group"] != "single"))</filter>
        </collection>
        <collection name="plot_collection" format="png" type="list" label="Plots of ${annotation} ${on_string}">
            <discover_datasets pattern="__name_and_ext__" directory="plot"/>
            <filter>plot_graph</filter>
            <filter>not zip_outputs</filter>
            <filter>merge_inputs["merge_inputs"] == "" and ((merge_inputs["format"]["format"] == "plaintext" and merge_inputs["format"]["is_zipped"]["is_zipped"]) or (merge_inputs["format"]["format"] == "athena" and merge_inputs["format"]["extract_group"]["extract_group"] != "single"))</filter>
        </collection>
    </outputs>
    <tests>
        <!-- 1 -->
        <test expect_num_outputs="1">
            <param name="dat_file" value="test.xmu"/>
            <output name="athena_project_file">
                <assert_contents>
                    <has_size value="5400" delta="100"/>
                </assert_contents>
            </output>
        </test>
        <!-- 2 -->
        <test expect_num_outputs="1">
            <param name="dat_file" value="ffi0.tabular"/>
            <output name="athena_project_file">
                <assert_contents>
                    <has_size value="34400" delta="100"/>
                </assert_contents>
            </output>
        </test>
        <!-- 3 -->
        <test expect_num_outputs="1">
            <param name="dat_file" value="ffi1.tabular"/>
            <output name="athena_project_file">
                <assert_contents>
                    <has_size value="4400" delta="100"/>
                </assert_contents>
            </output>
        </test>
        <!-- 4 -->
        <test expect_num_outputs="2">
            <param name="dat_file" value="test.xmu"/>
            <param name="plot_graph" value="edge,flat,dmude"/>
            <output name="athena_project_file">
                <assert_contents>
                    <has_size value="5400" delta="100"/>
                </assert_contents>
            </output>
            <output name="plot">
                <assert_contents>
                    <has_size value="138100" delta="100"/>
                </assert_contents>
            </output>
        </test>
        <!-- 5 -->
        <test expect_num_outputs="2">
            <param name="is_zipped" value="true"/>
            <param name="dat_file" value="test.zip"/>
            <param name="plot_graph" value="edge,flat,dmude"/>
            <output_collection name="athena_project_file_collection" type="list" count="2"/>
            <output_collection name="plot_collection" type="list" count="2"/>
        </test>
        <!-- 6 -->
        <test expect_num_outputs="1">
            <param name="is_zipped" value="true"/>
            <param name="dat_file" value="h5.zip"/>
            <param name="plot_graph" value="edge,flat,dmude"/>
            <param name="zip_outputs" value="true"/>
            <output name="out_zip">
                <assert_contents>
                    <has_size value="75500" delta="500"/>
                </assert_contents>
            </output>
        </test>
        <!-- 7 -->
        <test expect_num_outputs="1">
            <param name="dat_file" value="test.xmu"/>
            <param name="calibrate" value="true"/>
            <param name="energy_min" value="7000"/>
            <output name="athena_project_file">
                <assert_contents>
                    <has_size value="5300" delta="100"/>
                </assert_contents>
            </output>
        </test>
        <!-- 8 -->
        <test expect_num_outputs="2">
            <param name="dat_file" value="test.xmu"/>
            <param name="calibrate" value="true"/>
            <param name="energy_min" value="7000"/>
            <param name="energy_max" value="7200"/>
            <param name="plot_graph" value="edge,flat,dmude"/>
            <output name="athena_project_file">
                <assert_contents>
                    <has_size value="3300" delta="50"/>
                </assert_contents>
            </output>
            <output name="plot">
                <assert_contents>
                    <has_size value="137700" delta="100"/>
                </assert_contents>
            </output>
        </test>
        <!-- 9 -->
        <test expect_num_outputs="1">
            <param name="dat_file" value="test.xmu"/>
            <param name="calibrate" value="true"/>
            <param name="calibration_e0" value="7050"/>
            <param name="energy_min" value="7000"/>
            <param name="energy_max" value="7200"/>
            <output name="athena_project_file">
                <assert_contents>
                    <has_size value="3600" delta="100"/>
                </assert_contents>
            </output>
        </test>
        <!-- 10 -->
        <test expect_num_outputs="1">
            <param name="dat_file" value="test.xmu"/>
            <param name="rebin" value="true"/>
            <output name="athena_project_file">
                <assert_contents>
                    <has_size value="8413" delta="10"/>
                </assert_contents>
            </output>
        </test>
        <!-- 11 -->
        <test expect_num_outputs="1">
            <param name="merge_inputs" value="true"/>
            <param name="dat_file" value="262875_PtSn_OCO_Abu_1.nxs,262876_PtSn_OCO_Abu_2.nxs"/>
            <output name="athena_project_file">
                <assert_contents>
                    <has_size value="37550" delta="50"/>
                </assert_contents>
            </output>
        </test>
        <!-- 12 -->
        <test expect_num_outputs="1">
            <param name="merge_inputs" value="true"/>
            <param name="is_zipped" value="true"/>
            <param name="dat_file" value="test.zip"/>
            <output name="athena_project_file">
                <assert_contents>
                    <has_size value="18000" delta="50"/>
                </assert_contents>
            </output>
        </test>
        <!-- 13: Test merging and plotting multiple prj inputs -->
        <test expect_num_outputs="2">
            <param name="merge_inputs" value="true"/>
            <param name="format" value="athena"/>
            <param name="dat_file" value="test.prj,test.prj"/>
            <param name="plot_graph" value="edge,flat,dmude"/>
            <output name="athena_project_file">
                <assert_contents>
                    <has_size value="4500" delta="100"/>
                </assert_contents>
            </output>
            <output name="plot">
                <assert_contents>
                    <has_size value="145700" delta="100"/>
                </assert_contents>
            </output>
        </test>
        <!-- 14 -->
        <test expect_num_outputs="1">
            <param name="format" value="athena"/>
            <param name="dat_file" value="test.prj"/>
            <output name="athena_project_file">
                <assert_contents>
                    <has_size value="5400" delta="200"/>
                </assert_contents>
            </output>
        </test>
        <!-- 15: Extract multiple groups from Athena .prj -->
        <test expect_num_outputs="1">
            <param name="format" value="athena"/>
            <param name="extract_group" value="multiple"/>
            <param name="group_name" value="merge"/>
            <param name="group_name" value="d_Ref_PtSn_OC_MERGE_CALIBRATE"/>
            <param name="dat_file" value="multiple.prj"/>
            <output_collection name="athena_project_file_collection" type="list" count="2"/>
        </test>
        <!-- 16: Extract all groups from Athena .prj -->
        <test expect_num_outputs="1">
            <param name="format" value="athena"/>
            <param name="extract_group" value="all"/>
            <param name="dat_file" value="multiple.prj"/>
            <output_collection name="athena_project_file_collection" type="list" count="9"/>
        </test>
        <!-- 17: Extract and merge all groups from Athena .prj -->
        <test expect_num_outputs="1">
            <param name="merge_inputs" value="true"/>
            <param name="format" value="athena"/>
            <param name="extract_group" value="all"/>
            <param name="dat_file" value="multiple.prj"/>
            <output name="athena_project_file">
                <assert_contents>
                    <has_size value="26000" delta="100"/>
                </assert_contents>
            </output>
        </test>
        <!-- 18: Use ref channel -->
        <test expect_num_outputs="1">
            <param name="dat_file" value="ffi0.tabular"/>
            <param name="pre_edge" value="ref"/>
            <param name="ref_channel" value="lnitiref"/>
            <output name="athena_project_file">
                <assert_contents>
                    <has_size value="34400" delta="100"/>
                </assert_contents>
            </output>
        </test>
        <!-- 19: Zip file with nested directories -->
        <test expect_num_outputs="2">
            <param name="is_zipped" value="true"/>
            <param name="dat_file" value="nested.zip"/>
            <param name="plot_graph" value="edge,flat,dmude"/>
            <output_collection name="athena_project_file_collection" type="list" count="2"/>
            <output_collection name="plot_collection" type="list" count="2"/>
        </test>
    </tests>
    <help><![CDATA[
        Using Larch, create an Athena project file from the input X-ray Absorption Fine Structure (XAFS) data file.
        
        Accepts both tabular and NeXus/HDF5 formatted data or a zip file containing these formats.
        If column names are not present in tabular data, then the first column is treated as `energy` and the second as `mu`.
        Note that in order to ensure a consistent output, once unzipped all files will sorted first by their parent directories (alphabetically).
        Within a given directory, if all filenames contain digits then the last block of digits will be used to sort the files numerically.
        In the output, all files (regardless of initial filepath) are output in a flat hierarchy, with the number in which it was processed as the file name (zero-padded).
        Care should therefore be taken to ensure input data is consistent with this treatment.

        Optionally, plot the xμ data along with pre and post edge fitting lines for visual inspection. 
    ]]></help>
    <citations>
        <citation type="doi">@TOOL_CITATION@</citation>
        <citation type="doi">10.1107/S0909049505012719</citation>
    </citations>
</tool>