diff dedup.xml @ 8:914533f3507e draft

planemo upload for repository https://github.com/open2c/pairtools commit d80cc18cb4f1f67065ed77c6225ef9cea906b727
author iuc
date Fri, 27 Jun 2025 20:33:05 +0000
parents 55a53b5e308d
children 41faa653d86c
line wrap: on
line diff
--- a/dedup.xml	Mon Mar 10 09:36:35 2025 +0000
+++ b/dedup.xml	Fri Jun 27 20:33:05 2025 +0000
@@ -15,14 +15,21 @@
             #if $output_stats:
                   --output-stats '$dedup_pairs_stats'
             #end if
+            #if $output_bytile_stats:
+                  --keep-parent-id
+                  --output-bytile-stats '$dedup_bytile_stats'
+            #end if
             --nproc-in \${GALAXY_SLOTS:-4}
             --nproc-out \${GALAXY_SLOTS:-4}
+            --max-mismatch ${max_mismatch}
     ]]></command>
     <inputs>      
         <param name="pairs_path" type="data" format="4dn_pairs,4dn_pairsam" label="Input pairs file" help="Input triu-flipped sorted .pairs or .pairsam file"/>
         <param argument="--mark-dups" type="boolean" truevalue="--mark-dups" falsevalue="" checked="True" label="Duplicate pairs are marked as DD in pair_type and as a duplicate in the SAM entries"/>
         <param argument="--output-dups" type="boolean" truevalue="--output-dups" falsevalue="" checked="False" label="Output file for duplicate pairs"/>
         <param argument="--output-stats" type="boolean" truevalue="--output-stats" falsevalue="" checked="False" label="Output file for duplicate statistics"/>
+        <param argument="--max-mismatch" type="integer" value="3" min="0" label="Maximum number of mismatches. Pairs with both sides mapped within this distance &quot;bp&quot; from each other are considered duplicates."/>
+        <param argument="--output-bytile-stats" type="boolean" truevalue="--output-bytile-stats" falsevalue="" checked="False" label="Output file for optical duplicate statistics for datasets with original Illumina-generated read IDs."/>
     </inputs>
     <outputs>
         <data name="output_dedup_pairs" format_source="pairs_path" label="${tool.name} on ${on_string}: Deduplicated Pairs"/>
@@ -32,8 +39,12 @@
         <data name="dedup_pairs_stats" format="tabular" label="${tool.name} on ${on_string}: Deduplicated stats">
             <filter>output_stats</filter>
         </data>
+        <data name="dedup_bytile_stats" format="tabular" label="${tool.name} on ${on_string}: By-tile stats">
+            <filter>output_bytile_stats</filter>
+        </data>
     </outputs>
     <tests>
+ 
         <!--Test 01 with default parameters-->
         <test expect_num_outputs="1">
             <param name="pairs_path" value="output_sorted_pairs.pairsam"/>
@@ -55,6 +66,25 @@
             <output name="output_dedup_pairs" file="output_dedup_pairs_markdups.pairsam" lines_diff="20"/>
             <output name="dedup_pairs_stats"  file="output_dedup_pairs.stats" lines_diff="20"/>
         </test>
+        <!--Test 04 mark_dups and output_stats enabled, max_mismatch set to 0-->
+        <test expect_num_outputs="2">
+            <param name="pairs_path" value="output_sorted_pairs.pairsam"/>
+            <param name="mark_dups" value="true"></param>
+            <param name="output_stats" value="true"></param>
+            <param name="max_mismatch"  value="0"></param>
+            <output name="output_dedup_pairs" file="output_dedup_max_mismatch0_sorted.pairsam" lines_diff="20"/>
+            <output name="dedup_pairs_stats"  file="output_dedup_max_mismatch0_sorted.stats" lines_diff="20"/>
+        </test>
+        <!--Test 05 mark_dups and output_stats + bytile_stats enabled-->
+        <test expect_num_outputs="3">
+            <param name="pairs_path" value="output_sorted_pairs.pairsam"/>
+            <param name="mark_dups" value="true"></param>
+            <param name="output_stats" value="true"></param>
+            <param name="output_bytile_stats" value="true"></param>
+            <output name="output_dedup_pairs" file="output_dedup_max_parent_id_bytile_sorted.pairsam" lines_diff="20"/>
+            <output name="dedup_pairs_stats"  file="output_dedup_max_parent_id_bytile_sorted.stats" lines_diff="20"/>
+            <output name="dedup_bytile_stats"  file="output_dedup_max_parent_id_bytile_sorted_tile_dups.stats" lines_diff="20"/>
+        </test>
     </tests>
     <help><![CDATA[
         **Pairtools dedup**