Mercurial > repos > nml > pneumocat
diff pneumocat.xml @ 0:33de0245ed99 draft default tip
"planemo upload for repository https://github.com/phe-bioinformatics/PneumoCaT commit c1002f7ad15e676357c6489878291de07bbde841"
author | nml |
---|---|
date | Tue, 24 Mar 2020 13:27:46 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pneumocat.xml Tue Mar 24 13:27:46 2020 -0400 @@ -0,0 +1,180 @@ +<tool id="pneumocat" name="PneumoCaT" version="@VERSION@"> + <description> Pneumococcal Capsular Typing of illumina reads</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <command detect_errors="exit_code"> +<![CDATA[ + +#import os +#import re + +#def check_ending($name, $forward_read=True) + ## Pneumocat needs to have name_1.fastq and name_2.fastq to work + ## Check for correct ending and change ending if needed + + #if re.search(r'(\.|_)\S*(1|2)*$', $name) + #if $forward_read + #return re.sub(r'(\.|_)\S*(1|2)*$', '_R1.fastq', $name) + #else + #return re.sub(r'(\.|_)\S*(1|2)*$', '_R2.fastq', $name) + #end if + + #else + #if $forward_read + #return '{}_R1.fastq'.format($name) + #else + #return '{}_R2.fastq'.format($name) + #end if + #end if +#end def + + +#if $input.type == 'paired' + #set $initial = re.sub('[^\w_]', '_', os.path.splitext($input.forward.name)[0]) + + #set $for_input = $check_ending($initial) + #set $rev_input = $check_ending($initial, forward_read=False) + + ln -s '$input.forward' ./$for_input && + ln -s '$input.reverse' ./$rev_input && + +#elif $input.type == 'paired_collection' + #set $initial = re.sub('[^\w_]', '_', os.path.splitext($input.fastq_collection.forward.name)[0]) + + #set $for_input = $check_ending($initial) + #set $rev_input = $check_ending($initial, forward_read=False) + + ln -s '$input.fastq_collection.forward' ./$for_input && + ln -s '$input.fastq_collection.reverse' ./$rev_input && + +#end if + +PneumoCaT.py -1 '$for_input' -2 '$rev_input' -o outputs --threads '\${GALAXY_SLOTS:-1}' --cleanup +]]> + </command> + <inputs> + <conditional name="input"> + <param name="type" type="select" label="Sequence Data Type"> + <option value="paired">Paired-end reads (FASTQ)</option> + <option value="paired_collection">Paired-end reads collection (FASTQ)</option> + </param> + <when value="paired"> + <param name="forward" + type="data" format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" + optional="false" + multiple="false" + label="Forward reads (FASTQ)" + help="Must have ASCII encoded quality scores" + /> + <param name="reverse" + type="data" format="fastq,fastqsanger,fastq.gz,fastqsanger.gz" + optional="false" + label="Reverse reads (FASTQ)" + help="File format must match the Forward FASTQ file" + /> + </when> + <when value="paired_collection"> + <param name="fastq_collection" + type="data_collection" format="fastq,fastqsanger, fastq.gz, fastqsanger.gz" + collection_type="paired" + optional="false" + label="Paired-end reads collection (FASTQ)" + /> + </when> + </conditional> + </inputs> + <outputs> + <data format="txt" name="coverage_summary" from_work_dir="outputs/coverage_summary.txt" label="PneumoCaT Coverage Summary.txt"/> + <data format="xml" name="results" from_work_dir="outputs/*.results.xml" label="PneumoCaT Results.xml"/> + <data format="xml" name="specific_results" from_work_dir="outputs/SNP_based_serotyping/*.results.xml" label="PneumoCaT Serotype Distinction.xml"/> + <data format="txt" name="variant_summary" from_work_dir="outputs/SNP_based_serotyping/variant_summary.yml" label="PneumoCaT Variant Summary.yml"/> + </outputs> + <tests> + <test> + <conditional name="input"> + <param name="type" value="paired" /> + <param name="forward" value="09N_R1.fastq" /> + <param name="reverse" value="09N_R2.fastq" /> + </conditional> + <output name="coverage_summary" file="coverage_summary.txt" /> + <output name="results" file="results.xml" /> + </test> + <test> + <conditional name="input"> + <param name="type" value="paired_collection" /> + <param name="fastq_collection"> + <collection type="paired"> + <element name="forward" value="09N_R1.fastq.gz" ftype="fastq.gz" /> + <element name="reverse" value="09N_R2.fastq.gz" ftype="fastq.gz" /> + </collection> + </param> + </conditional> + <output name="coverage_summary" file="coverage_summary.txt" /> + <output name="results" file="results.xml" /> + </test> + </tests> + <help> +<![CDATA[ + +PneumoCaT +--------- + +PneumoCaT (Pneumococcal Capsular Typing) uses a two-step step approach to assign capsular type to +S.pneumoniae genomic data (Illumina). More info can be found at the `PneumoCaT github page <https://github.com/phe-bioinformatics/PneumoCaT>`_ + +Program Steps +############# + +- **Step 1:** Reads from each readset are mapped to capsular locus sequences for all known capsular types using bowtie2 + + - This step is considered successful if the readset matches > 90% to one or more capsular locus sequences + + - If only a singular capsular locus is matched, PneumoCaT terminates and reports that as the assigned capsular type + + - If more than 1 loci are matched then the tool moves to step 2 + +- **Step 2:** Variant calling with the capsular type variant database + + - Used to distinguish serotypes within a serogroup/genogroup + +Please note PneumoCaT applies a quality metric requiring a mean depth of 20 reads across the mapped sequence +and a minimum depth of 5 reads for mapping. The report will retrun "Failed" if these conditions are not met. + +Inputs +###### + +- **Paired-end Illumina reads** with one of the following example formats is prefered: + + - <name>_1.fastq and <name>_2.fastq + + - <name>_R1.fastq and <name>_R2.fastq + + - <name>_R1.fastqsanger.gz and <name>_R2.fastqsanger.gz + +- If the reads are not formatted as above, the wrapper will append _R1.fastq and _R2.fastq to allow function + +Outputs +####### + +Please see `**PneumoCaTs interpreting results document** <https://github.com/phe-bioinformatics/PneumoCaT/blob/master/Documentation/InterpretingResults.pdf>`_ +for full up-to-date information on how to interpret PneumoCaT results. + +Galaxy will output 4 results running PneumoCaT with 2 of the results only appearing if **Step 2** variant calling is done + +**1. Coverage Summary.txt** -- Always output unless fails -- + +**2. Results.xml** -- Always output unless fails -- + +**3. Serotype distinction.xml** -- Step 2 Required -- + +**4. Variant Summary.txt** -- Step 2 Required -- + + +**Note** - Galaxy will always output files 3 and 4 even if step 2 is not done. In these cases, the files will have no data. + +]]> + </help> + <expand macro="citations" /> +</tool>