Mercurial > repos > iuc > samtools_cram_to_bam
changeset 0:0637018367e0 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/samtools/cram_to_bam commit 411130b45dc30f7f24f41cdeec5e148c5d8faf40
author | iuc |
---|---|
date | Tue, 09 May 2017 11:18:56 -0400 |
parents | |
children | d459230531aa |
files | macros.xml samtools_cram_to_bam.xml tool-data/fasta_indexes.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
diffstat | 5 files changed, 227 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Tue May 09 11:18:56 2017 -0400 @@ -0,0 +1,69 @@ +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="1.3.1">samtools</requirement> + <yield/> + </requirements> + </xml> + <token name="@TOOL_VERSION@">1.3.1</token> + <xml name="citations"> + <citations> + <citation type="bibtex"> + @misc{SAM_def, + title={Definition of SAM/BAM format}, + url = {https://samtools.github.io/hts-specs/},} + </citation> + <citation type="doi">10.1093/bioinformatics/btp352</citation> + <citation type="doi">10.1093/bioinformatics/btr076</citation> + <citation type="doi">10.1093/bioinformatics/btr509</citation> + <citation type="bibtex"> + @misc{Danecek_et_al, + Author={Danecek, P., Schiffels, S., Durbin, R.}, + title={Multiallelic calling model in bcftools (-m)}, + url = {http://samtools.github.io/bcftools/call-m.pdf},} + </citation> + <citation type="bibtex"> + @misc{Durbin_VCQC, + Author={Durbin, R.}, + title={Segregation based metric for variant call QC}, + url = {http://samtools.github.io/bcftools/rd-SegBias.pdf},} + </citation> + <citation type="bibtex"> + @misc{Li_SamMath, + Author={Li, H.}, + title={Mathematical Notes on SAMtools Algorithms}, + url = {http://www.broadinstitute.org/gatk/media/docs/Samtools.pdf},} + </citation> + <citation type="bibtex"> + @misc{SamTools_github, + title={SAMTools GitHub page}, + url = {https://github.com/samtools/samtools},} + </citation> + </citations> + </xml> + <xml name="version_command"> + <version_command><![CDATA[samtools 2>&1 | grep Version]]></version_command> + </xml> + <xml name="stdio"> + <stdio> + <exit_code range="1:" level="fatal" description="Error" /> + </stdio> + </xml> + <token name="@no-chrom-options@"> +----- + +.. class:: warningmark + +**No options available? How to re-detect metadata** + +If you see a "No options available" within the "**Select references (chromosomes and contigs) you would like to restrict bam to**" drop down, you need to re-detect metadata for the dataset you are trying to process. To do this follow these steps: + +1. Click on the **pencil** icon adjacent to the dataset in the history +2. A new menu will appear in the center pane of the interface +3. Click **Datatype** tab +4. Set **New Type** to **BAM** +5. Click **Save** + +The medatada will be re-detected and you will be able to see the list of reference sequences in the "**Select references (chromosomes and contigs) you would like to restrict bam to**" drop-down. + </token> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/samtools_cram_to_bam.xml Tue May 09 11:18:56 2017 -0400 @@ -0,0 +1,117 @@ +<tool id="samtools_cram_to_bam" name="samtools CRAM to BAM" version="@TOOL_VERSION@"> + <description>convert CRAM alignments to BAM format</description> + + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="stdio"/> + <expand macro="version_command"/> + + <command><![CDATA[ + #if str( $input_alignment.metadata.cram_index ) != "None": + ln -f -s '${input_alignment.metadata.cram_index}' '${input_alignment}.crai' && + #end if + + #if $reference_source.reference_source_selector == 'history': + #set ref_fa = 'ref.fa' + ln -s '${reference_source.input_reference}' ref.fa && + #else: + #set ref_fa = str( $reference_source.input_reference.fields.path ) + #end if + + samtools view + #if $parameter_regions.target_region == "regions_bed_file" + -L '${parameter_regions.regions_bed_file}' + #end if + -@ \${GALAXY_SLOTS:-2} + -b + -T '$ref_fa' + -o '$output_alignment' + '$input_alignment' + #if $parameter_regions.target_region == "region" + '${parameter_regions.region_string}' + #end if + ]]></command> + + <inputs> + <param name="input_alignment" type="data" format="cram" label="CRAM alignment file"/> + <conditional name="reference_source"> + <param name="reference_source_selector" type="select" label="Load reference genome from"> + <option value="cached">Local cache</option> + <option value="history">History</option> + </param> + <when value="cached"> + <param name="input_reference" type="select" label="Reference genome"> + <options from_data_table="fasta_indexes"> + <filter type="data_meta" ref="input_alignment" key="dbkey" column="1" /> + </options> + <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> + </param> + </when> + <when value="history"> + <param name="input_reference" type="data" format="fasta" label="Reference FASTA file"/> + </when> + </conditional> + <conditional name="parameter_regions"> + <param name="target_region" type="select" label="Choose conversion within specific genomic region(s)"> + <option value="entire_input_file">Entire BAM alignment file</option> + <option value="region">Specific region</option> + <option value="regions_bed_file">List of specific regions (BED file)</option> + </param> + <when value="entire_input_file" /> + <when value="region"> + <param name="region_string" type="text" label="Samtools: region in which pileup is generated" help="e.g. chrX or chr:start-end" /> + </when> + <when value="regions_bed_file"> + <param name="regions_bed_file" argument="-L" type="data" format="bed" label="Only include reads overlapping this BED file" /> + </when> + </conditional> + </inputs> + + <outputs> + <data name="output_alignment" format="bam" label="$tool.name on ${on_string}.bam"></data> + </outputs> + + <tests> + <test> + <param name="input_alignment" value="test.cram" ftype="cram" /> + <param name="reference_source_selector" value="history" /> + <param name="input_reference" value="test.fa" /> + + <output name="output_alignment" file="test.bam" compare="sim_size" delta="250" /> + </test> + <test> + <param name="input_alignment" value="test.cram" ftype="cram" /> + <param name="reference_source_selector" value="history" /> + <param name="input_reference" value="test.fa" /> + <param name="target_region" value="region" /> + <param name="region_string" value="CHROMOSOME_I" /> + + <output name="output_alignment" file="test.bam" compare="sim_size" delta="250" /> + </test> + <test> + <param name="input_alignment" value="test.cram" ftype="cram" /> + <param name="reference_source_selector" value="history" /> + <param name="input_reference" value="test.fa" /> + <param name="target_region" value="regions_bed_file" /> + <param name="regions_bed_file" value="test.bed" ftype="bed" /> + + <output name="output_alignment" file="test.bam" compare="sim_size" delta="250" /> + </test> + <test> + <param name="input_alignment" value="test2.cram" dbkey="equCab2" ftype="cram" /> + <param name="reference_source_selector" value="cached" /> + <param name="input_reference" value="equCab2chrM" /> + <param name="target_region" value="entire_input_file" /> + <output name="output_alignment" file="sam_to_bam_out2.bam" compare="sim_size" delta="250" /> + </test> + </tests> + + <help><![CDATA[ +**What this tool does** + +Converts alignments from the CRAM format to the BAM format using the ``samtools view`` command. + ]]></help> + <expand macro="citations"/> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/fasta_indexes.loc.sample Tue May 09 11:18:56 2017 -0400 @@ -0,0 +1,29 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Samtools indexed sequences data files. You will need +#to create these data files and then create a fasta_indexes.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The fasta_indexes.loc +#file has this format (white space characters are TAB characters): +# +# <unique_build_id> <dbkey> <display_name> <file_base_path> +# +#So, for example, if you had hg19 Canonical indexed stored in +# +# /depot/data2/galaxy/hg19/sam/, +# +#then the fasta_indexes.loc entry would look like this: +# +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +# +#and your /depot/data2/galaxy/hg19/sam/ directory +#would contain hg19canon.fa and hg19canon.fa.fai files. +# +#Your fasta_indexes.loc file should include an entry per line for +#each index set you have stored. The file in the path does actually +#exist, but it should never be directly used. Instead, the name serves +#as a prefix for the index file. For example: +# +#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa +#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Tue May 09 11:18:56 2017 -0400 @@ -0,0 +1,6 @@ +<tables> + <table name="fasta_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/fasta_indexes.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Tue May 09 11:18:56 2017 -0400 @@ -0,0 +1,6 @@ +<tables> + <table name="fasta_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="${__HERE__}/test-data/cached_locally/fasta_indexes.loc" /> + </table> +</tables>