Mercurial > repos > rnateam > footprint
view footprint.xml @ 1:0d94a529f925 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/footprint commit 6767a5ffb02052c844e9d862c79912f998f39d8e
author | rnateam |
---|---|
date | Mon, 20 Nov 2017 05:04:27 -0500 |
parents | 4bff424dfa47 |
children |
line wrap: on
line source
<?xml version="1.0" encoding="UTF-8"?> <tool id="footprint" name="footprint" version="1.0.0"> <requirements> <requirement type="package" version="1.0.0">footprint</requirement> </requirements> <command detect_errors="aggressive"><![CDATA[ ln -s '$bam_file' ./bam_file.bam && find_footprints.sh ./bam_file.bam '$chrom_sizes' '$motif_coords' ## genome source #if $refGenomeSource.genomeSource == "history": '$refGenomeSource.ownFile' #else '$refGenomeSource.builtin.fields.path' #end if '$factor_name' '$bias_file' '$peak_file' $no_of_components $background $fixed_bg && mv *.PARAM PARAM && mv *.RESULTS RESULTS && mv *.plot2.png plot2.png && mv *.plot1.png plot1.png ]]> </command> <inputs> <param name="bam_file" type="data" format="BAM" label="alignment bam file" help="" /> <param name="chrom_sizes" type="data" format="tablular" label="chromosome length" help="" /> <param name="motif_coords" type="data" format="BED" label="coordinates of motif" help="" /> <conditional name="refGenomeSource"> <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in genome?" help="The version of genome against which the reads were aligned."> <option value="fai" selected="True"> Use a built-in genome</option> <option value="history"> Use a genome from my current history</option> </param> <when value="fai"> <param name="builtin" type="select" label="Select a reference genome"> <options from_data_table="sam_fa_indices"> <filter type="sort_by" column="1" /> <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/> </options> </param> </when> <when value="history"> <param name="ownFile" type="data" format="fasta" label="Select the reference genome" help="Genome sequences in FASTA format" /> </when> </conditional> <param name="factor_name" type="text" label="transcription factor" help="e.g. CTCF" /> <param name="bias_file" type="data" format="tabular,txt" label="cleavage/transposition bias" help="" /> <param name="peak_file" type="data" format="tabular" label="coordinates of ChIP-seq peaks" help="" /> <param name="no_of_components" type="select" label="number of components"> <option value="2" selected="true">2</option> <option value="3">3</option> </param> <param name="background" type="select" label="background components"> <option value="Seq" selected="true">Seq</option> <option value="Flat">Flat</option> </param> <param name="fixed_bg" type="select" label="fixed background component"> <option value="TRUE" selected="true">TRUE</option> <option value="FALSE">FALSE</option> </param> </inputs> <outputs> <data name="RESULTS" format="tabular" from_work_dir="RESULTS" label="${tool.name} on ${on_string}: results" /> <data name="PARAM" format="txt" from_work_dir="PARAM" label="${tool.name} on ${on_string}: parameters" /> <data name="plot1" format="png" from_work_dir="plot1.png" label="${tool.name} on ${on_string}: plot 1" /> <data name="plot2" format="png" from_work_dir="plot2.png" label="${tool.name} on ${on_string}: plot 2" /> </outputs> <tests> <test> <param name="bam_file" value="input_ATAC_HEK293_hg19_chr1.bam" /> <param name="chrom_sizes" value="input_hg19.chr1.chrom.size" /> <param name="motif_coords" value="input_CTCF_motifs_hg19_chr1.bed" /> <param name="genomeSource" value="history" /> <param name="ownFile" value="input_hg19_chr1.fa" /> <param name="factor_name" value="CTCF" /> <param name="bias_file" value="input_SeqBias_ATAC.txt" /> <param name="peak_file" value="input_CTCF_HEK293_chip_hg19_chr1.bed" /> <param name="no_of_components" value="2" /> <param name="background" value="Seq" /> <param name="fixed_bg" value="TRUE" /> <output name="RESULTS" file="output.RESULTS" ftype="tabular" compare="sim_size"/> <output name="PARAM" file="output.PARAM" ftype="txt" compare="sim_size"/> <output name="plot1" file="output_plot1.png" ftype="png" compare="sim_size" delta="15000" /> <output name="plot2" file="output_plot2.png" ftype="png" compare="sim_size" delta="15000" /> </test> </tests> <help><![CDATA[.. class:: infomark **Purpose** This is a pipeline to find transcription factor footprints in ATAC-seq or DNase-seq data. ----- .. class:: infomark **Inputs** alignment bam file * A bam file from the ATAC-seq or DNase-seq experiment. chromosome length * A tab delimited file with 2 columns. * The first column is the chromosome name and the second column is the chromosome length for the appropriate organism and genome build. * Example: chr1 10000000 coordinates of motif * A 6-column bed file with the coordinates of motif matches (eg resulting from scanning the genome with a PWM) for the transcription factor of interest. * The 6 columns should contain chromosome, start coordinate, end coordinate, name, score and strand information in this order. The coordinates should be closed (1-based). * Example: chr1 24782 24800 . 11.60 - * There should not be any additional columns. transcription factor * The name of the transcription factor of interest supplied by the user, e.g. CTCF. cleavage/transposition bias * The cleavage/transposition bias of the different protocols, for all 6-mers. * Provided `options`_: ATAC, DNase double hit or DNase single hit protocols. .. _options: https://ohlerlab.mdc-berlin.de/software/Reproducible_footprinting_139/ coordinates of ChIP-seq peaks * A file with the coordinates of the ChIP-seq peaks for the transcription factor of interest. * The format is flexible as long as the first 3 columns (chromosome, start coordinate, end coordinate) are present. * Example: chr1 237622 237882 number of components * Total number of footprint and background components that should be learned from the data. * Options are 2 (1 fp and 1 bg) and 3 (2 fp and 1 bg) components. background components * The mode of initialization for the background component. Options are "Flat" or "Seq". * Choosing "Flat" initializes this component as a uniform distribution. * Choosing "Seq" initializes it as the signal profile that would be expected solely due to the protocol bias (given by the cleavage/transposition bias file). fixed background component * Whether the background component should be kept fixed. * Options are TRUE or FALSE. * Setting "TRUE" keeps this component fixed, whereas setting "FALSE" lets it be reestimated during training. * In general, if the background is estimated from bias (option "Seq"), it is recommended to keep it fixed. ----- .. class:: infomark **Outputs** results * The results of the footprinting analysis. * The first 6 columns harbor the motif information (identical to the 'coordinates of motif'). * The 7th column has the footprint score (log-odds of footprint versus background) for each motif instance. * The following columns show the probabilities for the individual footprint and background components. parameters * Gives the trained parameters for the footprint and background components. * It includes as many lines as components (eg the first line has the parameters for the first component). plot 1 * A plot with two panels, showing the initial components above and the final trained components below. * The plotted values for the final components are given in the 'parameters' output file explained above. plot 2 * A plot only with the final trained components. * In a model where 2 components are used, this plot is identical to the bottom panel in plot1. * When 3 components are used, this plot shows the weighted average of the 2 footprint components as the final footprint profile. ]]></help> <citations> <citation type="bibtex">@ARTICLE{footprint, author = {Aslihan Karabacak, Galip Gurkan Yardimci, Ricardo Wurmus, Dilmurat Yusuf, Uwe Ohler}, title = {To submit}, journal = {}, year = {}, volume = {}, pages = {} }</citation> </citations> </tool>