changeset 1:8e1fb7253c1d draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 613efcbc9de2f712eb2ad9540e3e63f7b968640c
author artbio
date Fri, 03 Oct 2025 00:04:22 +0000
parents 3a185b812f46
children 66a56502199d
files macros.xml snp_pileup_for_facets_wrapper.sh test-data/expected_pileup.csv.gz
diffstat 3 files changed, 14 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Wed Oct 01 01:39:32 2025 +0000
+++ b/macros.xml	Fri Oct 03 00:04:22 2025 +0000
@@ -1,7 +1,7 @@
 <macros>
     <token name="@pipefail@"><![CDATA[set -o | grep -q pipefail && set -o pipefail;]]></token>
     <token name="@TOOL_VERSION@">0.6.2</token>
-    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@VERSION_SUFFIX@">1</token>
     <token name="@PROFILE@">23.0</token>
     <xml name="requirements">
         <requirements>
@@ -10,6 +10,7 @@
             <requirement type="package" version="1.22.1">samtools</requirement>
             <requirement type="package" version="1.22.1">htslib</requirement>
             <requirement type="package" version="20250822">parallel</requirement>
+            <requirement type="package" version="9.5">coreutils</requirement>
         </requirements>
     </xml>    <xml name="citations">
         <citations>
--- a/snp_pileup_for_facets_wrapper.sh	Wed Oct 01 01:39:32 2025 +0000
+++ b/snp_pileup_for_facets_wrapper.sh	Fri Oct 03 00:04:22 2025 +0000
@@ -113,7 +113,7 @@
     "${SAMTOOLS_EXE}" index "${temp_tbam}"
 
     echo "Running snp-pileup on chromosome ${chrom}..."
-    "${SNP_PILEUP_EXE}" -q "${mapq}" -Q "${baseq}" ${count_orphans} "${temp_vcf}" "${temp_output}" "${temp_nbam}" "${temp_tbam}"
+    "${SNP_PILEUP_EXE}" --pseudo-snps=300 -q "${mapq}" -Q "${baseq}" ${count_orphans} "${temp_vcf}" "${temp_output}" "${temp_nbam}" "${temp_tbam}"
 }
 
 # Export all necessary variables AND the function so they are available to the sub-shells created by GNU Parallel.
@@ -125,17 +125,23 @@
 
 echo "Parallel processing finished. Concatenating results..."
 
-# The "gather" part remains the same
+# gather job outputs and sort chromosome remains the same
 FIRST_FILE=$(ls -1v "${TMPDIR}"/*.csv 2>/dev/null | head -n 1)
 if [ -z "${FIRST_FILE}" ]; then
     echo "Error: No pileup files were generated." >&2
     exit 1
 fi
 
-(head -n 1 "${FIRST_FILE}" && \
- tail -n +2 -q "${TMPDIR}"/*.csv) | \
-bgzip > "${output_pileup}"
+# Use command grouping { ...; } to pipe the combined output of head and tail.
+# This entire pipeline writes the final, sorted, compressed file.
+{
+    # 1. Print the header once.
+    head -n 1 "${FIRST_FILE}";
 
-echo "Concatenation and compression complete."
+    # 2. Concatenate all files (skipping their headers).
+    tail -q -n +2 "${TMPDIR}"/*.csv;
+
+} | sort -t, -k1,1V -k2,2n | bgzip > "${output_pileup}"
+echo "Concatenation, sorting and compression complete."
 echo "Final output is in ${output_pileup}"
 echo "Script finished successfully. The temporary directory will be removed by the trap."
Binary file test-data/expected_pileup.csv.gz has changed