Mercurial > repos > iuc > tetyper
diff tetyper.xml @ 0:8bc80c2a15b3 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tetyper commit ee0a9acc788f8c313bb9fd91a42c6f9c66ac5bef"
author | iuc |
---|---|
date | Thu, 28 Nov 2019 14:41:08 -0500 |
parents | |
children | 36093854bfc7 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tetyper.xml Thu Nov 28 14:41:08 2019 -0500 @@ -0,0 +1,170 @@ +<tool id="tetyper" name="TETyper" version="@TOOL_VERSION@+galaxy0"> + <description>Transposable Element Typer</description> + <macros> + <token name="@TOOL_VERSION@">1.1</token> + </macros> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">tetyper</requirement> + </requirements> + <command detect_errors="exit_code"> + <![CDATA[ + TETyper.py + --threads \${GALAXY_SLOTS:-1} + #if $collection_paired.selector == "paired" + --fq1 '${collection_paired.forward_input}' --fq2 '${collection_paired.reverse_input}' + #elif $collection_paired.selector == "collection": + --fq1 '${collection_paired.input_pair.forward}' --fq2 '${collection_paired.input_pair.reverse}' + #end if + --ref '${reference}' + --flank_len '${flank_length}' + --min_reads '${min_reads}' + --min_each_strand '${min_each_strand}' + --min_mapped_len '${min_mapped_len}' + --min_qual '${min_qual}' + #if $snp_profiles + --snp_profiles '${snp_profiles}' + #end if + #if $struct_profiles + --struct_profiles '${struct_profiles}' + #end if + --outprefix output + ]]> + </command> + <inputs> + <conditional name="collection_paired"> + <param name="selector" type="select" label="Collection or paired reads" > + <option value="collection">Collection</option> + <option value="paired" selected="True">Paired</option> + </param> + <when value="collection"> + <param format="fastq,fastq.gz" name="input_pair" type="data_collection" collection_type="paired" label="Collection of paired reads" help="FASTQ datasets" /> + </when> + <when value="paired"> + <param format="fastq,fastq.gz" name="forward_input" type="data" label="Forward strand" help="FASTQ dataset"/> + <param format="fastq,fastq.gz" name="reverse_input" type="data" label="Reverse strand" help="FASTQ dataset"/> + </when> + </conditional> + <param name="reference" type="data" format="fasta" label="Transposable Element Reference"/> + <param name="flank_length" type="integer" min="4" value="5" max="16" label="Flank Length" help="Length of flanking region to extract."/> + <param name="min_reads" type="integer" min="1" value="10" max="100" label="Minimum Reads" help="Minimum read number for including a specific flanking sequence."/> + <param name="min_each_strand" type="integer" min="1" value="1" max="100" label="Minimum Reads (each strand)" help="Minimum read number for each strand for including a specific flanking sequence."/> + <param name="min_mapped_len" type="integer" min="8" value="30" max="100" label="Minimum Mapped Length" help="Minimum length of mapping for a read to be used in determining flanking sequences. Higher values are more robust to spurious mapping. Lower values will recover more reads."/> + <param name="min_qual" type="integer" min="0" value="10" max="100" label="Minimum quality" help="Minimum quality value across extracted flanking sequence." /> + <param name="snp_profiles" type="data" format="text" label="SNP Profiles" optional="true" /> + <param name="struct_profiles" type="data" format="text" label="Structural Variant Profiles" optional="true" /> + <param name="include_log" type="boolean" label="Include log in output"/> + </inputs> + <outputs> + <data name="summary" format="tabular" from_work_dir="output_summary.txt" label="${tool.name} on ${on_string}: summary"/> + <data name="snps" format="vcf" from_work_dir="output.vcf" label="${tool.name} on ${on_string}: SNPs"/> + <data name="blast" format="tabular" from_work_dir="output_blast.txt" label="${tool.name} on ${on_string}: BLAST alignment"/> + <data name="alignment" format="bam" from_work_dir="output.bam" label="${tool.name} on ${on_string}: BWA alignment"/> + <data name="log" format="text" from_work_dir="output.log" label="${tool.name} on ${on_string}: log"> + <filter>include_log</filter> + </data> + </outputs> + <tests> + <test> + <param name="reference" value="Tn4401b-1.fasta" /> + <conditional name="collection_paired"> + <param name="selector" value="paired" /> + <param name="forward_input" value="ERR1911133_Tn4401b-1_mapped_subsampled_1.fastq" /> + <param name="reverse_input" value="ERR1911133_Tn4401b-1_mapped_subsampled_2.fastq" /> + </conditional> + <output name="summary" file="output_summary_1.txt" /> + </test> + <test> + <param name="reference" value="Tn4401b-1.fasta" /> + <conditional name="collection_paired"> + <param name="selector" value="paired" /> + <param name="forward_input" value="ERR1911133_Tn4401b-1_mapped_subsampled_1.fastq" /> + <param name="reverse_input" value="ERR1911133_Tn4401b-1_mapped_subsampled_2.fastq" /> + </conditional> + <param name="snp_profiles" value="Tn4401b_snp_profiles.txt" /> + <param name="struct_profiles" value="Tn4401b_struct_profiles.txt" /> + <output name="summary" file="output_summary_2.txt" /> + </test> + </tests> + <help> + <![CDATA[ +**What it does** + +TETyper is designed for typing a specific transposable element (TE) of interest from paired-end sequencing data. It determines single nucleotide variants (SNVs) and deletions within the TE, as well as flanking sequences surrounding the TE. + +**Input** + +**SNP Profiles**: A tab-delimited file with the following columns: + +1. Profile ID +2. Homozygous SNPs +3. Heterozygous SNPs + +SNPs are represented in the format [REF][POSITION][ALT], and separated by pipe (`|`) characters. SNPs should be ordered by position. Valid alt-bases for heterozygous SNPs are: `M,R,W,S,Y,K` + +For example: + +:: + + 1 none none + 2 C8015T none + 3 C8015T|T9621C none + 4 T7199A|C8015T|T9621C none + 6 C7509G|T7917G none + N2 none C8015Y + N4 none A5178R + N5 none C8015Y|T9663Y + +**Structural Variant Profiles**: A tab-delimited file with the following columns: + +1. Profile ID +2. Structural Variants + +Structural Variants are represented in the format [START-POSITION]-[END-POSITION], and separated by pipe (`|`) characters. + +For example: + +:: + + Tn4401b none + Tn4401a 7020-7118 + Tn4401h 6919-7106 + Tn4401_truncC 1-7127|9198-10006 + +**Output** + +TETyper will produce a tab-seperated output file with the following outputs: + ++--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Description | ++==========================+==================================================================================================================================================================================================================================================+ +| Deletions | A list of sequence ranges corresponding to regions of the reference classified as deletions for this sample, or "none" for no deletions. | ++--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Structural_variant | If --struct_profiles is specified and the pattern of deletions above corresponds to one of these profiles, then the profile name is given, otherwise "unknown". | ++--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| SNPs_homozygous | A list of homozygous SNPs identified, or "none". | ++--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| SNPs_heterozygous | A list of heterozygous SNPs identified, or "none". | ++--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Heterozygous_SNP_counts | For each heterozygous SNP, the number of reads supporting the reference and alternative calls, or "none" if there are no heterozygous SNPs. | ++--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| SNP_variant | If --snp_profiles is specified and the pattern of homozygous and heterozygous SNPs corresponds to one of these profiles, then the profile name is given. Otherwise "unknown". | ++--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Combined_variant | Single name combining Structural_variant and SNP_variant, separated by "-". | ++--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Left_flanks | A list of distinct sequences passing quality filters that flank the start position of the reference. | ++--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Right_flanks | A list of distinct sequences passing quality filters that flank the end position of the reference. | ++--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Left_flank_counts | The number of high quality reads supporting each of the left flanking sequences. | ++--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Right_flank_counts | The number of high quality reads supporting each of the right flanking sequences. | ++--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| X_Y_presence | If --show_region is specified as --show_region X-Y, this column shows 1 if the entirety of that region is classified as present (i.e. no overlap with deleted regions), or 0 otherwise. If --show_region is unspecified, this column is omitted. | ++--------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ + + ]]> + </help> + <citations> + <citation type="doi">10.1099/mgen.0.000232</citation> + </citations> +</tool>