Mercurial > repos > iuc > snapatac2_plotting

<macros>
    <token name="@TOOL_VERSION@">2.8.0</token>
    <token name="@VERSION_SUFFIX@">0</token>
    <token name="@PROFILE@">24.0</token>
    <xml name="xrefs">
        <xrefs>
            <xref type="bio.tools">snapatac</xref>
        </xrefs>
    </xml>
    <xml name="requirements">
        <requirement type="package" version="@TOOL_VERSION@">snapatac2</requirement>
            <requirement type="package" version="0.8.37">hdbscan</requirement>
            <requirement type="package" version="0.10.2">leidenalg</requirement>
            <requirement type="package" version="0.5.7">umap-learn</requirement>
            <requirement type="package" version="3.0.4">xgboost</requirement>
            <requirement type="package" version="0.2.1">python-kaleido</requirement>
            <requirement type="package" version="1.31.0">polars</requirement>
            <requirement type="package" version="5.24.1">plotly</requirement>
            <requirement type="package" version="0.2.1">python-kaleido</requirement>
            <requirement type="package" version="0.0.10">harmonypy</requirement>
            <requirement type="package" version="1.7.4">scanorama</requirement>
        <yield />
    </xml>

    <!-- command section -->
    <token name="@CMD_PREP_ADATA@"><![CDATA[
        ## ln -s does not work here
        cp '$method.adata' 'anndata.h5ad' &&
    ]]></token>
    <token name="@CMD@"><![CDATA[
        cat '$script_file' > '$hidden_output' &&
        python '$script_file' >> '$hidden_output' &&
        touch 'anndata_info.txt' &&
        cat 'anndata_info.txt' @CMD_PRETTIFY_STDOUT@
    ]]></token>
    <token name="@CMD_PRETTIFY_STDOUT@"><![CDATA[
        | sed -r '1 s|AnnData object with (.+) = (.*)\s*|\1: \2|g' | sed "s|'||g"  | sed -r 's|^\s*(.*):\s(.*)|[\1]\n-    \2|g' | sed 's|, |\n-    |g'
    ]]></token>
    <token name="@CMD_GET_GFF@"><![CDATA[
        #if $method.gff_file_condi.gffSource == 'cached':
            ln -s '$method.gff_file_condi.gff_pre_installed.fields.path' gff &&
        #else:
            ln -s '$method.gff_file_condi.gff_history' gff &&
        #end if
    ]]></token>
    <token name="@CMD_GET_FASTA@"><![CDATA[
        #if $method.fasta_file_condi.fastaSource == 'indexed':
            zcat '$method.fasta_file_condi.fasta_pre_installed.fields.path' > fasta.fa &&
            echo "Using built-in FASTA: '$method.fasta_file_condi.fasta_pre_installed.fields.name'" >&2 &&
        #else:
            #if $method.fasta_file_condi.fasta_history.ext.endswith('.gz')
                zcat '$method.fasta_file_condi.fasta_history' > fasta.fa &&
            #else:
            ln -s '$method.fasta_file_condi.fasta_history' fasta.fa &&
            #end if
        #end if
    ]]></token>

    <!-- Config section -->
    <token name="@CONF_IMPORTS@"><![CDATA[
import snapatac2 as snap
import os
    ]]></token>
    <token name="@CONF_READ_INPUTS@"><![CDATA[
adata = snap.read('anndata.h5ad', backed = None)
    ]]></token>
    <token name="@CONF_ANNDATA_WRITE_OUTPUTS@"><![CDATA[
adata.write_h5ad('anndata.h5ad.gz', compression='gzip')
with open('anndata_info.txt','w', encoding='utf-8') as ainfo:
    print(adata, file=ainfo)
    ]]></token>
    <token name="@CONF_PARAMS_RENDER_PLOT@"><![CDATA[
    width = $method.width,
    height = $method.height,
    show = False,
    interactive = False,
    out_file = 'plot.$method.out_file',
    ]]></token>
    <token name="@CONF_PARAMS_DATA_INTEGRATION@"><![CDATA[
use_rep = '$method.use_rep',
#if $method.use_dims != ''
#set $dims = ([x.strip() for x in str($method.use_dims).split(',')])
use_dims=$dims,
#end if
#if $method.groupby != ''
#set $groupby = ([x.strip() for x in str($method.groupby).split(',')])
groupby=$groupby,
#end if
#if $method.key_added != ''
key_added = '$method.key_added',
#end if
    ]]></token>
    <token name="@CONF_IMPORT_MEME@"><![CDATA[
motifs = read_motifs("input.meme")
for motif in motifs:
    motif.name = motif.id.split('+')[0]

unique_motifs = {}
for motif in motifs:
    name = motif.name
    if (
            name not in unique_motifs or
            unique_motifs[name].info_content() < motif.info_content()
        ):
        unique_motifs[name] = motif
motifs = list(unique_motifs.values())


#else:
motifs = read_motifs("input.meme")
for motif in motifs:
    motif.name = motif.id.split('_')[0]
    motif.family = motif.id.split('+')[-1]
    ]]></token>

    <!-- input section -->
    <xml name="sanitize_query" token_validinitial="string.printable">
        <sanitizer>
            <valid initial="@VALIDINITIAL@">
                <remove value="&apos;" />
                <yield/>
            </valid>
        </sanitizer>
    </xml>

    <xml name="param_inputs_anndata" token_multiple="false" token_label="Annotated data matrix">
        <param name="adata" type="data" multiple="@MULTIPLE@" format="h5ad" label="@LABEL@"/>
    </xml>
    <xml name="param_groupby">
        <param argument="groupby" type="text" label="The key of the observation grouping to consider">
            <expand macro="sanitize_query" />
        </param>
    </xml>
    <xml name="param_common_advanced">
        <section name="advanced_common" title="Advanced Options" expanded="false">
            <param name="show_log" type="boolean" checked="false" label="Output Log?" />
        </section>
    </xml>
    <xml name="param_render_plot">
        <param argument="width" type="integer" value="600" label="Width of the plot"/>
        <param argument="height" type="integer" value="400" label="Height of the plot"/>
        <param name="out_file" type="select" optional="true" label="Type of output plot">
            <option value="png" selected="true">PNG</option>
            <option value="svg">SVG</option>
            <option value="pdf">PDF</option>
            <option value="html">HTML</option>
        </param>
    </xml>
    <xml name="param_shift" tokens="varname" token_value="0" token_label="Insertion site correction for the left end">
        <param argument="@VARNAME@" type="integer" value="@VALUE@" label="@LABEL@" help="Note this has no effect on single-end reads"/>
    </xml>
    <xml name="param_chunk_size" tokens="size">
        <param argument="chunk_size" type="integer" value="@SIZE@" label="chunk size"/>
    </xml>
    <xml name="param_min_max_frag_size">
        <param argument="min_frag_size" type="integer" optional="true" value="" label="Minimum fragment size to include"/>
        <param argument="max_frag_size" type="integer" optional="true" value="" label="Maximum fragment size to include"/>
    </xml>
    <xml name="param_data_integration">
        <param argument="use_rep" type="text" value="X_spectral" label="The key for the matrix"/>
        <param argument="use_dims" type="text" optional="true" value="" label="The dimensions used for computation">
            <expand macro="sanitize_query"/>
        </param>
        <param argument="groupby" type="text" optional="true" value="" label="The key of the observation grouping to consider">
            <expand macro="sanitize_query" />
        </param>
        <param argument="key_added" type="text" optional="true" value="" label="If specified, add the result to adata.obsm with this key"/>
    </xml>
    <xml name="param_random_state" token_label="Seed of the random state generator" token_help="">
        <param argument="random_state" type="integer" value="0" label="@LABEL@" help="@HELP@"/>
    </xml>
    <xml name="param_key_added" tokens="key_added">
        <param argument="key_added" type="text" value="@KEY_ADDED@"  label="`adata.obs` key under which t add cluster labels"/>
    </xml>
    <xml name="param_use_rep" token_label="Use the indicated representation in `.obsm`">
        <param argument="use_rep" type="text" value="X_spectral" label="@LABEL@"/>
    </xml>
    <xml name="param_n_iterations">
        <param argument="n_iterations" type="integer" value="-1" label="How many iterations of the Leiden clustering algorithm to perform"
            help="Positive values above 2 define the total number of iterations to perform, -1 has the algorithm run until it reaches its optimal clustering."/>
    </xml>
    <xml name="param_counting_strategy">
        <param argument="counting_strategy" type="select" label="The strategy to compute feature counts">
            <option value="fragment">"fragment": based on the number of fragments that overlap with a region of interest</option>
            <option value="insertion">"insertion": based on the number of insertions that overlap with a region of interest</option>
            <option value="paired-insertion" selected="true">"paired-insertion": similar to "insertion", but it only counts the insertions once if the pair of insertions of a fragment are both within the same region of interest</option>
        </param>
    </xml>
    <xml name="param_chrom_sizes">
        <param argument="chrom_sizes" type="data" format="tabular" label="Chromosome sizes" help="First column the chromosome name and second column the size"/>
    </xml>
    <xml name="param_genome_fasta">
        <conditional name="fasta_file_condi">
            <param name="fastaSource" type="select" label="Select a built-in FASTA or one from your history" help="Choose history if you don't see the correct FASTA.">
                <option value="indexed" selected="true">Use a built-in FASTA</option>
                <option value="history">Use a FASTA from history</option>
            </param>
            <when value="indexed">
                <param name="fasta_pre_installed" type="select" label="Select a FASTA file" help="Select the FASTA file from a list of pre-installed genomes">
                    <options from_data_table="all_fasta">
                        <filter type="sort_by" column="2" />
                    </options>
                </param>
            </when>
            <when value="history">
                <param name="fasta_history" type="data" format="fasta,fasta.gz" label="FASTA file" />
            </when>
        </conditional>
    </xml>
    <xml name="param_gene_anno">
        <conditional name="gff_file_condi">
            <param name="gffSource" type="select" label="Select a built-in GFF file or one from your history"  help="Choose history if you don't see the correct GFF" >
                <option value="cached" selected="true">Use a built-in GFF</option>
                <option value="history">Use a GFF from history</option>
            </param>
            <when value="cached">
                <param name="gff_pre_installed" type="select" label="Select a GFF file" help="Select the GFF from a list of pre-installed files">
                    <options from_data_table="gene_sets">
                        <filter type="sort_by" column="1" />
                    </options>
                </param>
            </when>
            <when value="history">
                <param name="gff_history" type="data" format="gff3.gz" label="Select a GFF file" help="Make sure that the GFF corresponds to the same genome as the FASTA"/>
            </when>
        </conditional>
    </xml>
    <xml name="param_n_comps" token_value="30" token_label="Number of dimensions to keep" token_help="The result is insensitive to this parameter when `weighted_by_sd` is set, as long as it is large enough, e.g. 30.">
        <param argument="n_comps" type="integer" value="@VALUE@" label="@LABEL@" help="@HELP@"/>
    </xml>
    <xml name="param_meme_table">
        <param name="motifs" type="select" label="Select list of transcription factor motifs">
            <options from_data_table="meme">
                <filter type="sort_by" column="2" />
            </options>
        </param>
    </xml>


    <!-- test section -->
    <xml name="test_param_render_plot">
        <param name="width" value="650"/>
        <param name="height" value="450"/>
    </xml>
    <xml name="test_render_plot_matching_text">
        <has_text_matching expression="width = 650"/>
        <has_text_matching expression="height = 450"/>
    </xml>


    <xml name="citations">
        <citations>
            <citation type="doi">10.1038/s41592-023-02139-9</citation>
        </citations>
    </xml>
</macros>
author	iuc
date	Tue, 25 Nov 2025 16:41:12 +0000
parents	05bd4db20227
children