Mercurial > repos > iuc > craq
comparison craq.xml @ 0:429feea2cf6d draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/craq commit fe19727db664bcad91b66546149cbf34a6a012e7
| author | iuc |
|---|---|
| date | Wed, 18 Mar 2026 13:17:11 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:429feea2cf6d |
|---|---|
| 1 <tool id="craq" name="CRAQ" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
| 2 <description>assess the accuracy of assembled genomic sequences</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="requirements"/> | |
| 7 <command detect_errors="exit_code"><![CDATA[ | |
| 8 ## Decision Tree to structure and prepare input files | |
| 9 ## Craq expects sorted and indexed bam files | |
| 10 cp '$genome' genome.fa && | |
| 11 | |
| 12 ## Prepare SMS input | |
| 13 #if $sms_input: | |
| 14 #if $sms_input[0].is_of_type('bam'): | |
| 15 cp '${sms_input[0]}' 'sms_sorted.bam' && | |
| 16 samtools index 'sms_sorted.bam' && | |
| 17 #else: | |
| 18 #for $i, $f in enumerate($sms_input): | |
| 19 cp '$f' 'sms_${i}.${f.ext}' && | |
| 20 #end for | |
| 21 #end if | |
| 22 #end if | |
| 23 | |
| 24 ## Prepare NGS input | |
| 25 #if $ngs_input: | |
| 26 #if $ngs_input[0].is_of_type('bam'): | |
| 27 cp '${ngs_input[0]}' 'ngs_sorted.bam' && | |
| 28 samtools index 'ngs_sorted.bam' && | |
| 29 #else: | |
| 30 #for $i, $f in enumerate($ngs_input): | |
| 31 cp '$f' 'ngs_${i}.${f.ext}' && | |
| 32 #end for | |
| 33 #end if | |
| 34 #end if | |
| 35 | |
| 36 | |
| 37 ## Build tool command line | |
| 38 craq | |
| 39 -g genome.fa | |
| 40 | |
| 41 ## SMS input | |
| 42 #if $sms_input: | |
| 43 -sms | |
| 44 #if $sms_input[0].is_of_type('bam'): | |
| 45 'sms_sorted.bam' | |
| 46 #else: | |
| 47 #set $sms_files = ','.join(['sms_%d.%s' % ($i, $f.ext) for $i, $f in enumerate($sms_input)]) | |
| 48 $sms_files | |
| 49 #end if | |
| 50 #end if | |
| 51 | |
| 52 ## NGS input | |
| 53 #if $ngs_input: | |
| 54 -ngs | |
| 55 #if $ngs_input[0].is_of_type('bam'): | |
| 56 'ngs_sorted.bam' | |
| 57 #else: | |
| 58 #set $ngs_files = ','.join(['ngs_%d.%s' % ($i, $f.ext) for $i, $f in enumerate($ngs_input)]) | |
| 59 $ngs_files | |
| 60 #end if | |
| 61 #end if | |
| 62 | |
| 63 ## Filter parameters | |
| 64 -sn $filter_params.sn | |
| 65 -sf $filter_params.sf | |
| 66 -ln $filter_params.ln | |
| 67 -lf $filter_params.lf | |
| 68 -hmin $filter_params.hmin | |
| 69 -hmax $filter_params.hmax | |
| 70 -mgs $filter_params.mgs | |
| 71 --sms_coverage $filter_params.sms_coverage | |
| 72 --ngs_coverage $filter_params.ngs_coverage | |
| 73 | |
| 74 ## Other parameters | |
| 75 $other_params.ser | |
| 76 $other_params.snv | |
| 77 --gapmodel $other_params.gapmodel | |
| 78 $other_params.break | |
| 79 --map $other_params.map | |
| 80 --mapq $other_params.mapq | |
| 81 --norm_window $other_params.norm_window | |
| 82 --regional_window $other_params.regional_window | |
| 83 $other_params.plot | |
| 84 #if $other_params.plot_ids: | |
| 85 --plot_ids $other_params.plot_ids | |
| 86 #end if | |
| 87 --thread "\${GALAXY_SLOTS:-8}" | |
| 88 -D outputs | |
| 89 ]]></command> | |
| 90 <inputs> | |
| 91 <param name="genome" type="data" label="Assembly sequence file" format="fasta" help="The genome assembly to be evaluated in FASTA format"/> | |
| 92 <param name="sms_input" type="data" optional="true" multiple="true" label="SMS long-read alignment or sequences" format="bam,fastq,fastq.gz" help="Provide either a single BAM file OR multiple FASTQ files (uncompressed or gzipped). At least one of SMS or NGS input must be provided"/> | |
| 93 <param name="ngs_input" type="data" optional="true" multiple="true" label="NGS short-read alignment or sequences" format="bam,fastq,fastq.gz" help="Provide either a single BAM file OR multiple FASTQ files (uncompressed or gzipped). At least one of SMS or NGS input must be provided"/> | |
| 94 <section name="filter_params" title="Filter Parameters" expanded="False"> | |
| 95 <param argument="-sn" type="integer" min="0" value="2" label="Minimum number of NGS clipped-reads" help="Minimum number of NGS reads that must show clipping to flag potential errors"/> | |
| 96 <param argument="-sf" type="float" min="0.0" max="1.0" value="0.75" label="Minimum proportion of NGS clipped-reads" help="Minimum proportion of NGS reads that must show clipping relative to total coverage"/> | |
| 97 <param argument="-ln" type="integer" min="0" value="2" label="Minimum number of SMS clipped-reads" help="Minimum number of SMS long reads that must show clipping to flag potential errors"/> | |
| 98 <param argument="-lf" type="float" min="0.0" max="1.0" value="0.75" label="Minimum proportion of SMS clipped-reads" help="Minimum proportion of SMS reads that must show clipping relative to total coverage"/> | |
| 99 <param argument="-hmin" type="float" min="0.0" max="1.0" value="0.4" label="Lower clipping rate for heterozygous allele" help="Lower clipping rate threshold to identify heterozygous variants (CRHs)"/> | |
| 100 <param argument="-hmax" type="float" min="0.0" max="1.0" value="0.6" label="Upper clipping rate for heterozygous allele" help="Upper clipping rate threshold to identify heterozygous variants (CRHs)"/> | |
| 101 <param argument="-mgs" type="integer" min="1" value="10" label="Minimum gap size (bp)" help="Gap[N] sequences longer than this threshold will be treated as breakage"/> | |
| 102 <param argument="--sms_coverage" type="integer" min="0" value="100" label="Average SMS coverage" help="Expected average SMS long-read coverage depth for normalization"/> | |
| 103 <param argument="--ngs_coverage" type="integer" min="0" value="100" label="Average NGS coverage" help="Expected average NGS short-read coverage depth for normalization"/> | |
| 104 </section> | |
| 105 <section name="other_params" title="Other Parameters" expanded="False"> | |
| 106 <param argument="-ser" type="boolean" checked="true" truevalue="-ser T" falsevalue="" label="Search error regions near breakpoints" help="Search noisy error regions near CRE/CSE breakpoints"/> | |
| 107 <param argument="-snv" type="boolean" checked="false" truevalue="-snv T" falsevalue="" label="Report SNV/heterozygous variants" help="Report tiny indel errors or heterozygous variants under 40bp.(Resource intensive)"/> | |
| 108 <param argument="--gapmodel" type="select" label="Gap model" help="Gap[N] treatment"> | |
| 109 <option value="1" selected="true">CRE (regional error)</option> | |
| 110 <option value="2">CSE (structural error)</option> | |
| 111 </param> | |
| 112 <param argument="--break" type="boolean" checked="false" truevalue="--break T" falsevalue="" label="Break chimeric fragments" help="Detect and break chimeric contigs at conflict breakpoints"/> | |
| 113 <param argument="--map" type="select" label="Mapping preset" help="Ignored if .bam provided"> | |
| 114 <option value="map-hifi" selected="true">PacBio HiFi</option> | |
| 115 <option value="map-pb" >PacBio CLR</option> | |
| 116 <option value="map-ont">Nanopore</option> | |
| 117 </param> | |
| 118 <param argument="--mapq" type="integer" min="0" max="60" value="20" label="Minimum mapping quality" help="Minimum read mapping quality threshold"/> | |
| 119 <param argument="--norm_window" type="float" min="0.0" max="1.0" value="0.0001" label="Normalization window fraction" help="Fraction of the total assembly length used as the window size for normalizing error counts"/> | |
| 120 <param argument="--regional_window" type="integer" min="1" value="500000" label="Regional quality window size (bp)" help="Window size in base pairs for regional quality benchmarking across the assembly"/> | |
| 121 <param argument="--plot" type="boolean" checked="false" truevalue="--plot T" falsevalue="" label="Generate plots" help="Create CRAQ visualization plots"/> | |
| 122 <param argument="--plot_ids" type="data" format="tabular,txt" optional="true" label="Selected assembly IDs for plotting" help="File listing specific assembly IDs to plot (default: all IDs)"/> | |
| 123 <param name="advanced_output" type="boolean" checked="false" label="Output advanced error region files" help="Output detailed CRE/CRH and CSE/CSH BED files for regional and structural error regions"/> | |
| 124 </section> | |
| 125 </inputs> | |
| 126 <outputs> | |
| 127 <collection name="runAQI_out" type="list" label="${tool.name} on ${on_string}: AQI Results"> | |
| 128 <discover_datasets pattern="__designation_and_ext__" directory="outputs/runAQI_out"/> | |
| 129 </collection> | |
| 130 <collection name="sr_out" type="list" label="${tool.name} on ${on_string}: Short Reads outputs"> | |
| 131 <discover_datasets pattern="__designation_and_ext__" directory="outputs/SRout"/> | |
| 132 <filter>ngs_input</filter> | |
| 133 </collection> | |
| 134 <collection name="lr_out" type="list" label="${tool.name} on ${on_string}: Long Reads outputs"> | |
| 135 <discover_datasets pattern="__designation_and_ext__" directory="outputs/LRout"/> | |
| 136 <filter>sms_input</filter> | |
| 137 </collection> | |
| 138 <collection name="regional_errors" type="list" label="${tool.name} on ${on_string}: Regional Error Regions"> | |
| 139 <discover_datasets pattern="__designation_and_ext__" directory="outputs/runAQI_out/locER_out"/> | |
| 140 <filter>other_params['advanced_output']</filter> | |
| 141 </collection> | |
| 142 <collection name="structural_errors" type="list" label="${tool.name} on ${on_string}: Structural Error Regions"> | |
| 143 <discover_datasets pattern="__designation_and_ext__" directory="outputs/runAQI_out/strER_out"/> | |
| 144 <filter>other_params['advanced_output']</filter> | |
| 145 </collection> | |
| 146 </outputs> | |
| 147 <tests> | |
| 148 <!-- Test 1: Genome + SMS BAM Input, no NGS input --> | |
| 149 <test expect_num_outputs="2"> | |
| 150 <param name="genome" location="https://zenodo.org/records/19091739/files/genome.fa"/> | |
| 151 <param name="sms_input" location="https://zenodo.org/records/19091739/files/SMS_sort.bam"/> | |
| 152 <output_collection name="runAQI_out" type="list" count="3"/> | |
| 153 <output_collection name="lr_out" type="list" count="10"/> | |
| 154 </test> | |
| 155 <!-- Test 2: Genome + NGS BAM Input, no SMS input --> | |
| 156 <test expect_num_outputs="2"> | |
| 157 <param name="genome" location="https://zenodo.org/records/19091739/files/genome.fa"/> | |
| 158 <param name="ngs_input" location="https://zenodo.org/records/19091739/files/NGS_sort.bam"/> | |
| 159 <section name="other_params"> | |
| 160 <param name="plot" value="true"/> | |
| 161 </section> | |
| 162 <output_collection name="runAQI_out" type="list" count="4"/> | |
| 163 <output_collection name="sr_out" type="list" count="11"/> | |
| 164 </test> | |
| 165 <!-- Test 3: NGS FASTQ pair + break + snv variants + MAPQ30 --> | |
| 166 <test expect_num_outputs="3"> | |
| 167 <param name="genome" location="https://zenodo.org/records/19091739/files/genome.fa"/> | |
| 168 <param name="ngs_input" location="https://zenodo.org/records/19091739/files/NGS_R1.fq.gz,https://zenodo.org/records/19091739/files/NGS_R2.fq.gz"/> | |
| 169 <param name="sms_input" location="https://zenodo.org/records/19091739/files/SMS_sort.bam"/> | |
| 170 <section name="other_params"> | |
| 171 <param name="break" value="true"/> | |
| 172 <param name="snv" value="true"/> | |
| 173 <param name="mapq" value="30"/> | |
| 174 </section> | |
| 175 <output_collection name="runAQI_out" type="list" count="4"/> | |
| 176 <output_collection name="sr_out" type="list" count="9"/> | |
| 177 </test> | |
| 178 <!-- Test 4: Genome + NGS Paired FASTQ + SMS BAM + Advanced outputs--> | |
| 179 <test expect_num_outputs="5"> | |
| 180 <param name="genome" location="https://zenodo.org/records/19091739/files/genome.fa"/> | |
| 181 <param name="ngs_input" location="https://zenodo.org/records/19091739/files/NGS_R1.fq.gz,https://zenodo.org/records/19091739/files/NGS_R2.fq.gz"/> | |
| 182 <param name="sms_input" location="https://zenodo.org/records/19091739/files/SMS_sort.bam"/> | |
| 183 <section name="other_params"> | |
| 184 <param name="advanced_output" value="true"/> | |
| 185 </section> | |
| 186 <output_collection name="runAQI_out" type="list" count="3"/> | |
| 187 <output_collection name="lr_out" type="list" count="10"/> | |
| 188 <output_collection name="sr_out" type="list" count="9"/> | |
| 189 <output_collection name="regional_errors" type="list" count="5"/> | |
| 190 <output_collection name="structural_errors" type="list" count="6"/> | |
| 191 </test> | |
| 192 <!-- Test 5: Plot + file ids selected for plotting --> | |
| 193 <test expect_num_outputs="2"> | |
| 194 <param name="genome" location="https://zenodo.org/records/19091739/files/genome.fa"/> | |
| 195 <param name="ngs_input" location="https://zenodo.org/records/19091739/files/SMS_sort.bam"/> | |
| 196 <section name="other_params"> | |
| 197 <param name="plot" value="true"/> | |
| 198 <param name="plot_ids" value="ids.txt"/> | |
| 199 </section> | |
| 200 <output_collection name="runAQI_out" type="list" count="4"/> | |
| 201 <output_collection name="sr_out" type="list" count="11"/> | |
| 202 </test> | |
| 203 </tests> | |
| 204 <help><![CDATA[ | |
| 205 **What it does?** | |
| 206 | |
| 207 CRAQ (Clipping Reveals Assembly Quality) is a reference-free genome assembly quality evaluator. | |
| 208 It identifies potential errors in assembled sequences by analysing how reads clip (fail to align continuously) | |
| 209 at specific positions - without requiring a reference genome. | |
| 210 | |
| 211 CRAQ produces two key quality scores: | |
| 212 | |
| 213 - **R-AQI** (Regional Assembly Quality Index): captures small-scale errors such as indels and local misassemblies detected by short reads | |
| 214 - **S-AQI** (Structural Assembly Quality Index): captures large-scale structural errors such as chimeric joins and inversions detected by long reads | |
| 215 | |
| 216 ----- | |
| 217 | |
| 218 **Inputs** | |
| 219 | |
| 220 +---------------------------+----------+----------------------------------------------------------+ | |
| 221 | Input | Required | Description | | |
| 222 +===========================+==========+==========================================================+ | |
| 223 | Assembly FASTA | Yes | Genome assembly to evaluate in FASTA format | | |
| 224 +---------------------------+----------+----------------------------------------------------------+ | |
| 225 | SMS long-read data | No* | PacBio or Nanopore data as BAM (sorted or unsorted) | | |
| 226 | | | OR one or more FASTQ/FASTQ.GZ sequence files | | |
| 227 +---------------------------+----------+----------------------------------------------------------+ | |
| 228 | NGS short-read data | No* | Illumina data as BAM (sorted or unsorted) | | |
| 229 | | | OR one or more FASTQ/FASTQ.GZ sequence files | | |
| 230 +---------------------------+----------+----------------------------------------------------------+ | |
| 231 | |
| 232 \* At least one of SMS or NGS input must be provided. Using both together gives the most complete assessment. | |
| 233 | |
| 234 .. class:: warningmark | |
| 235 | |
| 236 If providing sequence files (FASTQ) rather than alignments, CRAQ will perform the mapping internally | |
| 237 using minimap2 for SMS and BWA for NGS. Ensure the correct mapping preset is selected under | |
| 238 **Other Parameters** when using raw reads. | |
| 239 | |
| 240 ----- | |
| 241 | |
| 242 **Outputs** | |
| 243 | |
| 244 **1) AQI Results** *(always produced)* | |
| 245 | |
| 246 The primary output collection containing: | |
| 247 | |
| 248 - ``AQI_summary.txt`` - final R-AQI and S-AQI scores summarising overall assembly quality | |
| 249 - ``regional_statistics.txt`` - per-region breakdown of error counts and coverage | |
| 250 - ``circos_plot.pdf`` - visualisation of quality metrics across the assembly *(only if plotting is enabled)* | |
| 251 | |
| 252 **2) Long Read Outputs** *(produced when SMS input is provided)* | |
| 253 | |
| 254 - Filtered long-read alignment in BAM format with index | |
| 255 - Putative structural error (CSE) breakpoint coordinates | |
| 256 - Heterozygous variant (CSH) breakpoint coordinates flagged by long reads | |
| 257 | |
| 258 **3) Short Read Outputs** *(produced when NGS input is provided)* | |
| 259 | |
| 260 - Filtered short-read alignment in BAM format with index | |
| 261 - Putative regional error (CRE) coordinates flagged by short reads | |
| 262 - Heterozygous variant (CRH) coordinates from short-read clipping patterns | |
| 263 | |
| 264 **4) Regional Error Regions** *(advanced output, optional)* | |
| 265 | |
| 266 BED files with precise coordinates of: | |
| 267 | |
| 268 - CRE (Clipping-based Regional Errors): local assembly errors detected by short reads | |
| 269 - CRH (Clipping-based Regional Heterozygous variants): heterozygous positions in regional context | |
| 270 | |
| 271 **5) Structural Error Regions** *(advanced output, optional)* | |
| 272 | |
| 273 BED files with precise coordinates of: | |
| 274 | |
| 275 - CSE (Clipping-based Structural Errors): large-scale misassemblies detected by long reads | |
| 276 - CSH (Clipping-based Structural Heterozygous variants): heterozygous structural positions | |
| 277 - Low-coverage regions and ambiguous breakpoints | |
| 278 | |
| 279 ----- | |
| 280 | |
| 281 **Interpreting AQI Scores** | |
| 282 | |
| 283 Both R-AQI and S-AQI are scored from 0 to 100, where higher is better: | |
| 284 | |
| 285 +------------+-------------------+------------------------------------------+ | |
| 286 | Score | Quality | Interpretation | | |
| 287 +============+===================+==========================================+ | |
| 288 | 90 – 100 | Excellent | Very few errors, high-confidence assembly| | |
| 289 +------------+-------------------+------------------------------------------+ | |
| 290 | 70 – 89 | Good | Minor errors, suitable for most analyses | | |
| 291 +------------+-------------------+------------------------------------------+ | |
| 292 | 50 – 69 | Moderate | Noticeable errors, use with caution | | |
| 293 +------------+-------------------+------------------------------------------+ | |
| 294 | < 50 | Poor | Significant errors, reassembly advised | | |
| 295 +------------+-------------------+------------------------------------------+ | |
| 296 | |
| 297 ]]></help> | |
| 298 <expand macro="citations"/> | |
| 299 <expand macro="creators"/> | |
| 300 </tool> |
