Mercurial > repos > iuc > tasmanian_mismatch
diff Tasmanian.xml @ 0:bc0b40dec7d2 draft
"planemo upload for repository https://github.com/nebiolabs/tasmanian-mismatch commit a64a371ca1ed117c9bd8af743d847128fbce461c"
author | iuc |
---|---|
date | Wed, 20 May 2020 17:23:42 -0400 |
parents | |
children | b15fbf90db53 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Tasmanian.xml Wed May 20 17:23:42 2020 -0400 @@ -0,0 +1,158 @@ +<tool id="tasmanian_mismatch" name="Analysis of artifacts with Tasmanian" version="0.1.3" profile="18.09"> + <requirements> + <requirement type="package" version="0.1.3">tasmanian-mismatch</requirement> + <requirement type="package" version="1.9">samtools</requirement> + </requirements> + <command detect_errors="exit_code"> + <![CDATA[ + + #set $reference_fasta_filename = "localref.fa" + + #if str( $reference_source.reference_source_selector ) == "history": + ln -s '${reference_source.ref_file}' '${reference_fasta_filename}' && + #else: + #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path ) + #end if + + samtools view '${bam_input}' | + + #if $bed_filename + run_intersections -b '$bed_filename' | + #end if + + run_tasmanian + -q '${basequality}' + -s '${softclips}' + -m '${mapquality}' + -c '${confidence}' + -r '${reference_fasta_filename}' > '${output_table}' + + ]]></command> + <inputs> + <!-- Bam alignment file --> + <param type="data" name="bam_input" label="Bam/Sam alignemnt file" format="bam" help="Specify BAM/SAM dataset. If not using a bed file, this file MUST BE SORTED"/> + <!-- reference genome upload --> + <conditional name="reference_source"> + <param name="reference_source_selector" type="select" label="Reference genome" help="You can select a reference genome from your history or use a built-in index (Local cache)"> + <option value="cached">Local cache</option> + <option value="history">History</option> + </param> + <when value="cached"> + <param name="ref_file" type="select" label="Select the reference genome from the list"> + <options from_data_table="all_fasta"> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No indexes are available" /> + </options> + </param> + </when> + <when value="history"> + <param name="ref_file" type="data" format="fasta" label="Use reference genome from history" help="You can first upload a FASTA sequence to the history" /> + </when> + </conditional> + + <!-- bed file --> + <param name="bed_filename" type="data" format="bed" optional="true" label="Select a bed file" help="The bed file should contain at least: "chrN", "start" and "stop", and is tab separated."/> + + <!-- Additional parameters --> + <param name="confidence" label="Boundary" type="integer" value="20" min="0" max="100" + help="Number of bases in boundary region, from 0 to length of the read (read help below). Default=20"/> + <param name="softclips" label="Choose an action with softclips" type="select" display="radio" + help="How sofclips whould be treated. Values include 0,1 or 2 (read the help below). Default=0"> + <option value="1">Never use softcliped bases</option> + <option value="2">Always use softcliped bases</option> + <option value="0" selected="True">Automatic desicion (Default)</option> + </param> + <param name="mapquality" label="Map quality" type="integer" min="0" max="70" value="20" help="Exclude reads with lower mapQ than this number. Default=20"/> + <param name="basequality" label="Base quality" type="integer" min="0" max="70" value="20" help="Exclude bases with lower Base quality than this number. Default=20"/> + <param name="keepHTML_conditional" type="select" label="keep HTML output file?"> + <option value="yes">Yes</option> + <option value="no">No</option> + </param> + </inputs> + + <outputs> + <data name="output_table" format="txt" /> + <data format="html" name="html_file" from_work_dir="Tasmanian_artifact_report.html" label="tasmanian-mismatch results table"> + <filter>keepHTML_conditional == "yes"</filter> + </data> + </outputs> + + <tests> + <!-- test when reference from history with bed--> + <test> + <param name="bam_input" value="test2.bam" ftype="bam"/> + <param name="reference_source_selector" value="history"/> + <param name="ref_file" value="small_region.fa"/> + <param name="bed_filename" value="test2.bed" ftype="bed"/> + <output name="output_table" file="test2-bed.output" lines_diff="4"/> + </test> + <!-- test when reference from history without bed--> + <test> + <param name="bam_input" value="test2.bam" ftype="bam"/> + <param name="reference_source_selector" value="history"/> + <param name="ref_file" value="small_region.fa"/> + <output name="output_table" file="test2-nobed.output" lines_diff="4"/> + </test> + <!-- test when reference from cached--> + <test> + <param name="bam_input" value="test2.bam" ftype="bam" dbkey="hg38"/> + <param name="reference_source_selector" value="cached"/> + <param name="ref_file" value="hg38"/> + <output name="output_table" file="test2-nobed.output" lines_diff="4"/> + </test> + </tests> + + <help> + <![CDATA[ + +**What it does** + +This tool counts the number/proportion of mismatches per position along the read, +for each read (see figure below). + +.. image:: ${static_path}/images/snapshot_good.jpg + :height: 350 + :width: 650 + +----- + +**What is special** + +By providing a bed file, tasmanian-mismatch will count mismatches from all regions depicted in the figure below, +and will report them separately. Also, a parameter defined as *"confidence"* allows including reads with >= +bases in the boundary region in a separate group. This is useful when the bed refers to repeat regions. Since these +regions might not have been correctly placed in the assembly or are not the same in different individuals, we can +include this *confidence* repeat regions where we have high confidence on the reference genome to which we mapped the reads. + +.. image:: ${static_path}/images/intersections_tasmanian.jpg + :height: 150 + :width: 650 + +Softclips are critical in FFPE (Formalin-fixed paraffin-embedded) experiments as mismatches tend to accumulate at the ends of the reads. Most often, softclips +are all accepted during the analysis and many real mismatches are indirectly excluded from the analysis. Hence, this tool +provides different ways to deal with this: + +The *softclips* field allows for 3 different ways at treating softclips: +0) Exclude these region if there is less than 2/3 identity with the reference genome +1) Exclude all softclipped bases +2) Include all softclipped bases + +.. class:: warningmark + +BAM/SAM file must be **sorted** if not using a bed file. + + ]]> + </help> + <citations> + <citation type="bibtex"> + @misc{githubtasmanian, + author = {Langhorst B., Others, Erijman A.}, + year = {2020}, + title = {TBD}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/nebiolabs/tasmanian-mismatch}, + } + </citation> + </citations> +</tool>