view parse.xml @ 15:11149c89ee4f draft default tip

planemo upload for repository https://github.com/open2c/pairtools commit a7612b0f2c0575d6a78fb24dc87192d44817178a
author iuc
date Thu, 28 Aug 2025 18:00:12 +0000
parents bfb00f8fec97
children
line wrap: on
line source

<tool id="pairtools_parse" name="Pairtools parse" version="@TOOL_VERSION@+galaxy@SUFFIX_VERSION@" profile="@PROFILE_VERSION@" license="MIT">
    <description>Find ligation pairs in alignments and create pairs.</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <command detect_errors="exit_code"><![CDATA[
        #set $output = "output" + str($compress_output)
        ln -s '$output_parsed_pairs' '$output' &&
        pairtools parse
            '$sam_path'
            -c '$chroms_path'
            #if str($assembly_name).strip(): 
                --assembly '$assembly_name'
            #end if
            --min-mapq '$min_mapq'
            --max-molecule-size '$max_molecule_size'
            $drop_readid
            $drop_seq
            $output_stats
            $drop_sam
            #if $output_stats:
                '$parsed_pairs_stats'
            #end if
            #if $select_add_columns.add_columns_selection == "yes":
                --add-columns '$select_add_columns.add_columns'
            #end if
            --walks-policy '$walks_policy'
            --max-inter-align-gap '$max_inter_algn_gap'
            --nproc-in \${GALAXY_SLOTS:-4}
            --nproc-out \${GALAXY_SLOTS:-4}
            #if $sort_output:
                |
            pairtools sort -o '$output'
                --nproc-in \${GALAXY_SLOTS:-4}
                --nproc-out \${GALAXY_SLOTS:-4}
            #else
                 -o '$output'
            #end if

    ]]></command>
    <inputs>
        <param name="sam_path" type="data" format="sam,qname_input_sorted.bam,qname_sorted.bam" label="Input SAM/BAM file" help="Input SAM or BAM (unsorted/name-sorted) file with paired-end sequence alignments of Hi-C molecules."/>
        <param name="chroms_path" argument="-c" type="data" format="tabular" label="Input a chromosome order file" help="Chromosome order used to flip interchromosomal mates. Any scaffolds not listed will be ordered lexicographically."/>
        <param name="assembly_name" type="text" value="" label="Input a name of genome assembly" help="Name of genome assembly (e.g. hg19, mm10) to store in the pairs header"></param>
        <param argument="--min-mapq" type="integer" value="40" min="0" label="Minimal MAPQ" help="Minimal MAPQ score to consider a read as uniquely mapped."/>
        <param argument="--max-molecule-size" type="integer" min="0" value="750" label="Input the maximal size of a Hi-C molecule" help="The maximal size of a Hi-C molecule; used to rescue single ligations(from molecules with three alignments) and to rescue complex ligations."></param>
        <param argument="--drop-readid" type="boolean" truevalue="--drop-readid" falsevalue="" checked="False" label="Do not add read ids to the output"></param>
        <param argument="--drop-seq" type="boolean" truevalue="--drop-seq" falsevalue="" checked="False" label="remove sequences and PHREDs from the sam fields"></param>
        <param argument="--output-stats" type="boolean" truevalue="--output-stats" falsevalue="" checked="False" label="Generate various statistics of pairs file"></param>
        <param argument="--drop-sam" type="boolean" truevalue="--drop-sam" falsevalue="" checked="False" label="Do not add sams to the output"></param>    
        <conditional name="select_add_columns">
            <param name="add_columns_selection" type="select" label="Add additional columns to the output" help="Select if additional columns needs to be added to the output pairs file.">
                <option value="no" selected="true">No</option>
                <option value="yes">Yes</option>
            </param>
            <when value="yes">
                <param argument="--add-columns" type="select" multiple="true" label="Select extra columns describing alignments" help="Multiple options can be selected.">
                    <option value="mapq">MAPQ</option>
                    <option value="pos5">POS5</option>
                    <option value="pos3">POS3</option>
                    <option value="cigar">CIGAR</option>
                    <option value="read_len">Read length</option>
                    <option value="matched_bp">Matched bp</option>
                    <option value="algn_ref_span">algn_ref_span</option>
                    <option value="dist_to_5">dist_to_5</option>
                    <option value="dist_to_3">dist_to_3</option>
                    <option value="seq">seq</option>
                    <option value="mismatches">mismatches</option>
                    <option value="read_side">read_side</option>
                    <option value="algn_idx">algn_idx</option>
                    <option value="same_side_algn_count">same_side_algn_count</option>
                </param>
            </when>
            <when value="no"/>
        </conditional>
        <param argument="--walks-policy" type="select" label="Walks Policy" help="The policy for reporting unrescuable walks.">
            <expand macro="walks_policy_options"/>
        </param>
        <param name="compress_output" type="boolean" truevalue=".gz" falsevalue="" checked="false" label="Compress output file" />   
        <param name="sort_output" type="boolean"  checked="true" label="generate sorted output file" />
        <param argument="max_inter_algn_gap" type="integer" min="0" value="30" label="Max alignment gap" help="read segments that are not covered by any alignment and longer than the specified value are treated as null alignments."/>
    </inputs>
    <outputs>
        <data name="output_parsed_pairs" format="4dn_pairsam" label="${tool.name} on ${on_string}:  .pairs">
            <change_format>
                <when input="compress_output" value=".gz" format="4dn_pairsam.gz"/>
            </change_format>
        </data>
        <data name="parsed_pairs_stats" format="tabular" label="${tool.name} on ${on_string}: parsed.stats">
            <filter>output_stats</filter>
        </data>
    </outputs>
    <tests>
        <!--Test 01 with SAM file as input and default parameters-->
        <test expect_num_outputs="1">
            <param name="sam_path" value="test.sam"/>
            <param name="chroms_path" value="test.genome"/>
            <param name="min_mapq" value="1"/>
            <param name="walks_policy" value="mask"/>
            <param name="max_inter_algn_gap" value="20"/>
            <param name="sort_output" value="false"/>
            <output name="output_parsed_pairs" ftype="4dn_pairsam" file="output_parsed_pairs_sam.pairs" lines_diff="10"/>
        </test>
        <!--Test 02 with SAM file as input and sorted output default parameters-->
        <test expect_num_outputs="1">
            <param name="sam_path" value="test.sam"/>
            <param name="chroms_path" value="test.genome"/>
            <param name="min_mapq" value="1"/>
            <param name="walks_policy" value="mask"/>
            <param name="max_inter_algn_gap" value="20"/>
            <param name="sort_output" value="true"/>
            <output name="output_parsed_pairs" ftype="4dn_pairsam" file="output_parsed_pairs_sam.sorted.pairs" lines_diff="10"/>
        </test>
        <!--Test 03 with BAM file as input and default parameters-->
        <test expect_num_outputs="1">
            <param name="sam_path" value="test.bam"/>
            <param name="chroms_path" value="test.reduced.chrom.sizes"/>
            <param name="min_mapq" value="1"/>
            <param name="walks_policy" value="mask"/>
            <param name="max_inter_algn_gap" value="20"/>
            <param name="sort_output" value="false"/>
            <output name="output_parsed_pairs" ftype="4dn_pairsam" file="output_parsed_pairs_bam.pairs" lines_diff="10"/>
        </test>
        <!--Test 04 with BAM file as input and minimal mapq of 40-->
        <test expect_num_outputs="1">
            <param name="sam_path" value="test.bam"/>
            <param name="chroms_path" value="test.reduced.chrom.sizes"/>
            <param name="min_mapq" value="40"/>
            <param name="walks_policy" value="mask"/>
            <param name="max_inter_algn_gap" value="20"/>
            <param name="sort_output" value="false"/>
            <output name="output_parsed_pairs" ftype="4dn_pairsam" file="output_parsed_pairs_bam_min_mapq_40.pairs" lines_diff="10"/>
        </test>
        <!--Test 05 with BAM file as input and walk policy of 5unique-->
        <test expect_num_outputs="1">
            <param name="sam_path" value="test.bam"/>
            <param name="chroms_path" value="test.reduced.chrom.sizes"/>
            <param name="min_mapq" value="40"/>
            <param name="walks_policy" value="5unique"/>
            <param name="max_inter_algn_gap" value="20"/>
            <param name="sort_output" value="false"/>
            <output name="output_parsed_pairs" ftype="4dn_pairsam" file="output_parsed_pairs_bam_5unique.pairs" lines_diff="10"/>
        </test>
        <!--Test 06 with BAM file as input and read id dropped-->
        <test expect_num_outputs="1">
            <param name="sam_path" value="test.bam"/>
            <param name="chroms_path" value="test.reduced.chrom.sizes"/>
            <param name="min_mapq" value="40"/>
            <param name="walks_policy" value="5unique"/>
            <param name="max_inter_algn_gap" value="20"/>
            <param name="drop_readid" value="true"></param>
            <param name="sort_output" value="false"/>
            <output name="output_parsed_pairs" ftype="4dn_pairsam" file="output_parsed_pairs_bam_readid_dropped.pairs" lines_diff="10"/>
        </test>
        <!--Test 07 with SAM file as input and drop_seq enabled-->
        <test expect_num_outputs="1">
            <param name="sam_path" value="test.sam"/>
            <param name="chroms_path" value="test.genome"/>
            <param name="min_mapq" value="40"/>
            <param name="walks_policy" value="5unique"/>
            <param name="max_inter_algn_gap" value="20"/>
            <param name="drop_seq" value="true"></param>
            <param name="sort_output" value="false"/>
            <output name="output_parsed_pairs" ftype="4dn_pairsam" file="output_parsed_pairs_bam_readid_dropped_seq.pairs" lines_diff="10"/>
        </test>
        <!--Test 08 with SAM file as input and output_stats enabled-->
        <test expect_num_outputs="2">
            <param name="sam_path" value="test.sam"/>
            <param name="chroms_path" value="test.genome"/>
            <param name="min_mapq" value="40"/>
            <param name="walks_policy" value="5unique"/>
            <param name="max_inter_algn_gap" value="20"/>
            <param name="output_stats" value="true"></param>
            <output name="parsed_pairs_stats" file="output_parsed_pairs.stats" lines_diff="10"/>
        </test>
        <!--Test 09 with SAM file as input and default parameters and assembly name -->
        <test expect_num_outputs="1">
            <param name="sam_path" value="test.sam"/>
            <param name="chroms_path" value="test.genome"/>
            <param name="assembly_name" value="test_assembly"/>
            <param name="min_mapq" value="1"/>
            <param name="walks_policy" value="mask"/>
            <param name="max_inter_algn_gap" value="20"/>
            <param name="sort_output" value="false"/>
            <output name="output_parsed_pairs" ftype="4dn_pairsam" file="output_parsed_pairs_sam_assemblyname.pairs" lines_diff="10"/>
        </test>
        <!--Test 10 with SAM file as input and default parameters and assembly name and compressed output-->
        <test expect_num_outputs="1">
            <param name="sam_path" value="test.sam"/>
            <param name="chroms_path" value="test.genome"/>
            <param name="assembly_name" value="test_assembly"/>
            <param name="min_mapq" value="1"/>
            <param name="walks_policy" value="mask"/>
            <param name="max_inter_algn_gap" value="20"/>
            <param name="compress_output" value="true"/>
            <param name="sort_output" value="false"/>
            <output name="output_parsed_pairs" ftype="4dn_pairsam.gz" file="output_parsed_pairs_sam_assemblyname.pairs.gz" decompress="true" lines_diff="10"/>
        </test>
        <!--Test 11 with SAM file as input and default parameters and assembly name and sorted, compressed output-->
        <test expect_num_outputs="1">
            <param name="sam_path" value="test.sam"/>
            <param name="chroms_path" value="test.genome"/>
            <param name="assembly_name" value="test_assembly"/>
            <param name="min_mapq" value="1"/>
            <param name="walks_policy" value="mask"/>
            <param name="max_inter_algn_gap" value="20"/>
            <param name="compress_output" value="true"/>
            <output name="output_parsed_pairs" ftype="4dn_pairsam.gz" file="output_parsed_pairs_sam_assemblyname.sorted.pairs.gz" decompress="true" lines_diff="10"/>
        </test>
        <!--Test 12 with SAM file as input and add columns-->
        <test expect_num_outputs="1">
            <param name="sam_path" value="test.sam"/>
            <param name="chroms_path" value="test.genome"/>
            <param name="min_mapq" value="1"/>
            <param name="walks_policy" value="mask"/>
            <param name="max_inter_algn_gap" value="20"/>
            <conditional name="select_add_columns">
                <param name="add_columns_selection" value="yes"/>
                <param name="add_columns" value="mapq,seq"/>
            </conditional>
            <param name="sort_output" value="false"/>
            <output name="output_parsed_pairs" ftype="4dn_pairsam" file="output_parsed_pairs_sam_mapq.pairs" lines_diff="10"/>
        </test>
    </tests>
    <help><![CDATA[
        **Pairtools parse**

        Detects ligation events in the aligned sequences of DNA molecules formed in Hi-C experiments and reports them in the .pairs/.pairsam format.
        
        sam_path : an input .sam/.bam (unsorted/name-sorted) file with paired-end sequence alignments of Hi-C molecules. 

        By default, the generated .pair/.pairsam output is sorted by piping it through pairtools sort. You can disable this behavior by unchecking the “Generate sorted output file” checkbox.

    ]]></help>
    <expand macro="citations"/>
    <expand macro="creator"/>
</tool>