# HG changeset patch # User timpalpant # Date 1340158509 14400 # Node ID a77e126ae856eefe20dfacb9daefbee228abb74f # Parent 01d5d20eaadd41354d3381880e2edea5418c7c6c Reupload since last upload did not load correctly diff -r 01d5d20eaadd -r a77e126ae856 dist/java-genomics-toolkit.jar Binary file dist/java-genomics-toolkit.jar has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._Add.xml Binary file galaxy-conf/._Add.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._Autocorrelation.xml Binary file galaxy-conf/._Autocorrelation.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._Average.xml Binary file galaxy-conf/._Average.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._BaseAlignCounts.xml Binary file galaxy-conf/._BaseAlignCounts.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._Correlate.xml Binary file galaxy-conf/._Correlate.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._DNAPropertyCalculator.xml Binary file galaxy-conf/._DNAPropertyCalculator.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._Divide.xml Binary file galaxy-conf/._Divide.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._Downsample.xml Binary file galaxy-conf/._Downsample.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._DynaPro.xml Binary file galaxy-conf/._DynaPro.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._FastqIlluminaToSanger.xml Binary file galaxy-conf/._FastqIlluminaToSanger.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._FindAbsoluteMaxima.xml Binary file galaxy-conf/._FindAbsoluteMaxima.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._FindBoundaryNucleosomes.xml Binary file galaxy-conf/._FindBoundaryNucleosomes.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._FindNMers.xml Binary file galaxy-conf/._FindNMers.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._FindOutlierRegions.xml Binary file galaxy-conf/._FindOutlierRegions.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._GaussianSmooth.xml Binary file galaxy-conf/._GaussianSmooth.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._GeneTrackToBedGraph.xml Binary file galaxy-conf/._GeneTrackToBedGraph.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._GeneTrackToWig.xml Binary file galaxy-conf/._GeneTrackToWig.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._GreedyCaller.xml Binary file galaxy-conf/._GreedyCaller.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._InterpolateDiscontinuousData.xml Binary file galaxy-conf/._InterpolateDiscontinuousData.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._IntervalAverager.xml Binary file galaxy-conf/._IntervalAverager.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._IntervalLengthDistribution.xml Binary file galaxy-conf/._IntervalLengthDistribution.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._IntervalStats.xml Binary file galaxy-conf/._IntervalStats.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._IntervalToBed.xml Binary file galaxy-conf/._IntervalToBed.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._IntervalToWig.xml Binary file galaxy-conf/._IntervalToWig.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._KMeans.xml Binary file galaxy-conf/._KMeans.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._LogTransform.xml Binary file galaxy-conf/._LogTransform.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._MapDyads.xml Binary file galaxy-conf/._MapDyads.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._MatrixAligner.xml Binary file galaxy-conf/._MatrixAligner.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._MovingAverageSmooth.xml Binary file galaxy-conf/._MovingAverageSmooth.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._Multiply.xml Binary file galaxy-conf/._Multiply.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._PercusDecomposition.xml Binary file galaxy-conf/._PercusDecomposition.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._Phasogram.xml Binary file galaxy-conf/._Phasogram.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._PowerSpectrum.xml Binary file galaxy-conf/._PowerSpectrum.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._ReadLengthDistributionMatrix.xml Binary file galaxy-conf/._ReadLengthDistributionMatrix.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._RollingReadLength.xml Binary file galaxy-conf/._RollingReadLength.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._RomanNumeralize.xml Binary file galaxy-conf/._RomanNumeralize.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._Scale.xml Binary file galaxy-conf/._Scale.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._Shift.xml Binary file galaxy-conf/._Shift.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._StripMatrix.xml Binary file galaxy-conf/._StripMatrix.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._Subtract.xml Binary file galaxy-conf/._Subtract.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._Summary.xml Binary file galaxy-conf/._Summary.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._ValueDistribution.xml Binary file galaxy-conf/._ValueDistribution.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._WaveletTransform.xml Binary file galaxy-conf/._WaveletTransform.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._ZScore.xml Binary file galaxy-conf/._ZScore.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._galaxyToolRunner.sh Binary file galaxy-conf/._galaxyToolRunner.sh has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._log4j.properties Binary file galaxy-conf/._log4j.properties has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/._matrix2png.xml Binary file galaxy-conf/._matrix2png.xml has changed diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/Add.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/Add.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,27 @@ + + multiple (Big)Wig files + + galaxyToolRunner.sh wigmath.Add -o $output + #for $input in $inputs + ${input.file} + #end for + + + + + + + + + + + + +This tool will add all values in the specified Wig files base pair by base pair. + +.. class:: infomark + +**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/Autocorrelation.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/Autocorrelation.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,40 @@ + + of data in a Wiggle file + galaxyToolRunner.sh ngs.Autocorrelation -i $input -l $windows -m $max -o $output + + + + + + + + + + + +This tool computes the unnormalized autocovariance_ of intervals of data in a Wig file. + +.. _autocovariance: http://en.wikipedia.org/wiki/Autocorrelation + +----- + +**Syntax** + +- **Input data** is the genomic data on which to compute the autocorrelation. +- **List of intervals:** The autocorrelation will be computed for each genomic interval specified in this list. +- **Maximum shift:** In computing the autocorrelation, the data will be phase-shifted up to this limit. + +----- + +.. class:: infomark + +**TIP:** For more information, see Wikipedia_ (right click to open this link in another window). + +.. _Wikipedia: http://en.wikipedia.org/wiki/Autocorrelation + +.. class:: infomark + +**TIP:** If your input data does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format. Similarly, the intervals must be in either Bed, BedGraph, or GFF format. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/Average.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/Average.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,33 @@ + + multiple (Big)Wig files + + galaxyToolRunner.sh wigmath.Average -o $output $file1 $file2 + #for $input in $inputs + ${input.file} + #end for + + + + + + + + + + + + + + + + + + +This tool will average the values of the provided Wig files, base pair by base pair. + +.. class:: infomark + +**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/BaseAlignCounts.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/BaseAlignCounts.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,43 @@ + + of sequencing reads + galaxyToolRunner.sh ngs.BaseAlignCounts -i $input -a ${chromInfo} -x $X -o $output + + + + + + + + + + + +This tool produces a new Wig file with the number of reads/intervals overlapping each base pair. Reads can be artificially extended to match known fragment lengths. If you wish to count the number of reads starting at each base pair, set the read extension to 1. If you wish to count the number of intervals overlapping each base pair, set the extension to -1. + +----- + +.. class:: warningmark + +This tool requires sequencing reads in SAM, BAM, Bed, or BedGraph format. If you are artificially extending reads, ensure that the strand is set correctly in SAM, BAM, and Bed files. + +.. class:: warningmark + +Paired-end reads are considered to be the entire fragment (the distance from the 5' end of mate 1 to the 5' end of mate 2) if the extension is set to -1. + +.. class:: infomark + +If you would like to convert valued interval data (e.g. BedGraph files from microarrays) to Wig format, use the Converters -> Interval to Wig converter. + +.. class:: infomark + +**TIP:** If you are going to be using reads in SAM format for multiple analyses, it is often more efficient to first convert it into BAM format using NGS: SAM Tools -> SAM-to-BAM. + +----- + +**Syntax** + +- **Sequencing reads** are mapped reads from a high-throughput sequencing experiment. +- **In silico extension:** Reads will be artificially extended from their 5' end to be this length. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/Correlate.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/Correlate.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,60 @@ + + multiple (Big)Wig files + + galaxyToolRunner.sh wigmath.Correlate -w $window -s $step -t $type -o $output $file1 $file2 + #for $input in $inputs + ${input.file} + #end for + + + + + + + + + + + + + + + + + + + + +This tool will compute a correlation matrix between the supplied Wig or BigWig files. Each row/column in the matrix is added in the order that files are added above, starting from the top left. The Wig file is downsampled into sliding windows with the specified bin size and shift by computing the mean value in each window. These windows are then correlated using either Pearson_'s Product-Moment correlation coefficient or Spearman_'s rank correlation coefficient. If the window size is set to 1, the correlation is calculated between all base pairs in the genome. + +.. _Pearson: http://en.wikipedia.org/wiki/Pearson_product-moment_correlation_coefficient + +.. _Spearman: http://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient + +----- + +**Syntax** + +- **Inputs** are the genomic data to correlate +- **Window size** is the size of the window to bin data into +- **Sliding step size** is the shift step size of the sliding window used during binning +- **Correlation metric** is the type of correlation to calculate + +----- + +.. class:: warningmark + +**WARN:** In order to calculate the correlation coefficient, the data is loaded into entirely into memory. For large genomes, this may require a lot of RAM unless comparably larger window sizes are used. + +----- + +**Citation** + +This tool was inspired by ACT_ from the Gerstein lab. + +.. _ACT: http://act.gersteinlab.org + +J Jee*, J Rozowsky*, KY Yip*, L Lochovsky, R Bjornson, G Zhong, Z Zhang, Y Fu, J Wang, Z Weng, M Gerstein. ACT: Aggregation and Correlation Toolbox for Analyses of Genome Tracks. (2011) Bioinformatics 27(8): 1152-4. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/DNAPropertyCalculator.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/DNAPropertyCalculator.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,114 @@ + + for a genome + galaxyToolRunner.sh dna.DNAPropertyCalculator -i + #if $refGenomeSource.genomeSource == "history": + $refGenomeSource.ownFile + #else + ${refGenomeSource.index.fields.path} + #end if + $N -p $property -o $output + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +This tool will create a new Wig file with genome-wide calculations of sequence-specific DNA properties determined from local n-nucleotide sequences. DNA properties are calculated using AJT_. + +.. _AJT: http://www.abeel.be/ajt + +----- + +**Example** + +To calculate GC-content, choose your genome assembly and select "GC" as the property. This will create a new Wig file in which G and C nucleotides are represented by 1, while A and T nucleotides are represented by -1. If you would like to compute GC-content in 10-bp windows, use the WigMath -> Moving average tool to compute a moving average with 10bp windows. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/Divide.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/Divide.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,36 @@ + + two (Big)Wig files + galaxyToolRunner.sh wigmath.Divide -n $dividend -d $divisor -o $output + + + + + + + + + + + + + + + + + + + + + + + + + + + +.. class:: infomark + +**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/Downsample.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/Downsample.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,61 @@ + + a (Big)Wig file + galaxyToolRunner.sh wigmath.Downsample -i $input -m $metric -w $window -o $output + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +This tool can be used to reduce the resolution and file size of Wig files for easier upload to UCSC. Data is downsampled in non-overlapping windows starting from the beginning of each chromosome. Each window can be downsampled as the mean, minimum, maximum, total, or coverage of the original data. + +----- + +**Downsampling Methods** + +- **Mean:** the arithmetic mean of the values in the original data window +- **Minimum:** the least value in the original data window +- **Maximum:** the greatest value in the original data window +- **Coverage:** the fraction of bases with values in the original window +- **Total:** the sum of all values in the original data window + +----- + +.. class:: infomark + +**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/DynaPro.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/DynaPro.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,48 @@ + + using DynaPro + galaxyToolRunner.sh nucleosomes.DynaPro -i $input -n $N + #if str( $mean ) != '' + -m $mean + #end if + + #if str( $variance ) != '' + -v $variance + #end if + -o $output + + + + + + + + + + + + +.. class:: warningmark + +At present, this tool is only suitable for small genomes (yeast) since entire chromosomes must be loaded into memory. + +----- + +Equilibrium nucleosome distribution is modeled as a one-dimensional fluid of hard rods adsorbing and moving within an external potential. This tool provides a simplified version of the DynaPro_ algorithm for a single factor interacting with hard-core repulsion. + +.. _DynaPro: http://nucleosome.rutgers.edu/nucleosome/ + +----- + +**Syntax** + +- **Energy landscape** is the external potential function for each genomic base pair, and must be in Wig format. +- **Nucleosome size** is the hard-core interaction size. + +----- + +**Citation** + +Morozov AV, Fortney K, Gaykalova DA, Studitsky VM, Widom J and Siggia ED (2009) Using DNA mechanics to predict in vitro nucleosome positions and formation energies. Nucleic Acids Res 37: 4707–4722. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/ExtractDataFromRegion.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/ExtractDataFromRegion.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,27 @@ + + for a genomic interval + + galaxyToolRunner.sh ngs.ExtractDataFromRegion --chr $chr --start $start --stop $stop -o $output $file1 + #for $input in $inputs + ${input.file} + #end for + + + + + + + + + + + + + + + + +This tool will extract data from Wig or BigWig file(s) for a specific region of the genome. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/FastqIlluminaToSanger.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/FastqIlluminaToSanger.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,29 @@ + + from Illumina to Sanger + galaxyToolRunner.sh converters.FastqIlluminaToSanger -i $input -o $output + + + + + + + + + + + + + + +This tool will convert a FASTQ file with ASCII quality scores encoded in Illumina 1.3-1.7 format (Phred+64) to Sanger format (Phred+33) for use with Bowtie and other Galaxy tools. Illumina CASAVA >= 1.8 already produces FASTQ files in Sanger format, so this tool should not be used on new Illumina sequencing data. This tool is a simpler, faster version of the FASTQ Groomer that does little error checking but performs much faster. If you are unsure what format your file is in, or need to do other conversions, use the FASTQ Groomer instead. + +For more information, read about FASTQ formats_ (right-click to open in new window). + +.. _formats: http://en.wikipedia.org/wiki/FASTQ_format + +.. class:: warningmark + +This tool requires fastqillumina formatted data. If you have fastq data that was not correctly autodetected, change the metadata by clicking on the pencil icon for the dataset. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/FindAbsoluteMaxima.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/FindAbsoluteMaxima.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,52 @@ + + in intervals + + galaxyToolRunner.sh ngs.FindAbsoluteMaxima -l $window -o $output + #for $input in $inputs + ${input.file} + #end for + + + + + + + + + + + + + +This tool can be used to find the location of the maximum value in genomic intervals, such as finding the peak summit inside a set of peak calls. + +.. class:: infomark + +**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. Intervals must be provided in Bed, BedGraph, or GFF format. + +----- + +**Example** + + +if **Intervals** are genes :: + + chr11 5203271 5204877 NM_000518 0 - + chr11 5210634 5212434 NM_000519 0 - + chr11 5226077 5227663 NM_000559 0 - + +and **Wig files** are :: + + Data1.wig + Data2.wig + +this tool will find the location of the maximum value in each interval for each of the provided Wig/BigWig files, and append them in columns in the order that they were added :: + + chr11 5203271 5204877 NM_000518 0 - 5203374 5204300 + chr11 5210634 5212434 NM_000519 0 - 5210638 5212450 + chr11 5226077 5227663 NM_000559 0 - 5226800 5226241 + +where column 7 is the location of the maximum value in that interval for Data1.wig, and column 7 is the location of the maximum value in that interval for Data2.wig. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/FindBoundaryNucleosomes.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/FindBoundaryNucleosomes.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,30 @@ + + in windows + galaxyToolRunner.sh nucleosomes.FindBoundaryNucleosomes -i $input -l $loci -o $output + + + + + + + + + + +.. class:: infomark + +Use the Call Nucleosomes tool to create a file of called nucleosomes, then use this tool to identify the first nucleosome's dyad position (peak maximum) from the 5' and 3' end of the gene. + +.. class:: infomark + +**TIP:** Nucleosome calls must be in tabular format of the kind produced by the Nucleosomes -> Call nucleosomes tool. Intervals must be in either Bed, BedGraph, or GFF format. + +----- + +**Syntax** + +- **Nucleosome calls** is a list of stereotypic nucleosome position calls. +- **List of intervals:** The 5' and 3' boundary nucleosomes will be found for each interval in this list + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/FindNMers.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/FindNMers.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,52 @@ + + in a DNA sequence + galaxyToolRunner.sh dna.FindNMers -i + #if $refGenomeSource.genomeSource == "history": + $refGenomeSource.ownFile + #else + ${refGenomeSource.index.fields.path} + #end if + -m $mismatches -n $nmer $rc -o $output + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +This tool will find all matches of a given NMer in a DNA sequence. Sequences may be provided in FASTA format or selected from available reference genomes. Mismatches are allowed, but not insertions/deletions. The output is a Bed file with the locations of matches in the reference sequence. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/FindOutlierRegions.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/FindOutlierRegions.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,33 @@ + + such as CNVs + galaxyToolRunner.sh ngs.FindOutlierRegions -i $input -w $window -t $threshold $below -o $output + + + + + + + + + + + + +This tool identifies regions of the genome that may be repetitive elements or CNVs by scanning for windows that have an exceptionally high mean relative to the genome-wide mean. + +----- + +.. class:: infomark + +**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. + +----- + +**Syntax** + +- **Input data** is Wig or BigWig formatted data from a high-throughput sequencing experiment. +- **Window size** is the size of the moving average to use. +- **Threshold** is the fold times the genome-wide mean that a window's mean must be in order to be considered an outlier region. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/GaussianSmooth.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/GaussianSmooth.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,40 @@ + + a (Big)Wig file + galaxyToolRunner.sh wigmath.GaussianSmooth -i $input -s $S -o $output + + + + + + + + + + + + + + + + + + + + + + + + + + + +This tool smooths genomic data with an area-preserving Gaussian_ filter. The Gaussian filter is computed out to +/- 3 standard deviations. + +.. _Gaussian: http://en.wikipedia.org/wiki/Gaussian_filter + +.. class:: infomark + +**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/GeneTrackToBedGraph.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/GeneTrackToBedGraph.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,27 @@ + + converter + galaxyToolRunner.sh converters.GeneTrackToBedGraph -i $input -o $output + + + + + + + + + + + + + + +This tool will sum the counts from the forward and reverse strands in a GeneTrack_ index to create a BedGraph file. + +.. _GeneTrack: http://atlas.bx.psu.edu/genetrack/docs/genetrack.html + +.. class:: warningmark + +This tool requires GeneTrack formatted data. If you have tabular data that was not correctly autodetected, change the metadata by clicking on the pencil icon for the dataset. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/GeneTrackToWig.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/GeneTrackToWig.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,39 @@ + + converter + galaxyToolRunner.sh converters.GeneTrackToWig -i $input -s $shift $zero -a ${chromInfo} -o $output + + + + + + + + + + + + + + + + + + + + + + + + + + +This tool will convert GeneTrack_ format files into Wig files, optionally offsetting the + and - strand counts by a specified value before merging them. + +.. _GeneTrack: http://atlas.bx.psu.edu/genetrack/docs/genetrack.html + +.. class:: warningmark + +This tool requires GeneTrack formatted data. If you have tabular data that was not correctly autodetected, change the metadata by clicking on the pencil icon for the dataset. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/GreedyCaller.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/GreedyCaller.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,49 @@ + + in an MNase experiment + galaxyToolRunner.sh nucleosomes.GreedyCaller -d $dyads -s $smoothed -n $N -o $output + + + + + + + + + + + +Stereotypic nucleosome positions are identified from dyad density maps using an approach similar to the previously reported greedy algorithm in GeneTrack_ (Albert, et al. 2008). Nucleosome calls are identified at peak maxima (p) in the smoothed dyad density map, and then excluded in the surrounding window [p–N, p+N], where N is the assumed nucleosome size in base pairs. This process is continued until all possible sterically hindered nucleosome positions are identified. + +.. _GeneTrack: http://atlas.bx.psu.edu/genetrack/docs/genetrack.html + +.. class:: warningmark + +This tool requires dyad counts and smoothed dyad counts in Wig or BigWig format. Smoothed dyad counts can be generated from dyad counts using the WigMath -> Gaussian smooth tool. + +----- + +**Syntax** + +- **Dyad counts** is the relative number of nucleosomes positioned at each base pair. +- **Smoothed dyad counts** should correspond to a smoothed version of the **Dyad counts** +- **Assumed nucleosome size** is the window size used while identifying maxima to restrict overlapping calls. + +----- + +**Output** + +The output format has 10 columns defined as follows + +- 1. **Chromosome:** the chromosome of this nucleosome call +- 2. **Start:** the lower coordinate of the call window, equal to the dyad position - N/2 +- 3. **Stop:** the higher coordinate of the call window, equal to the dyad position + N/2 +- 4. **Length:** the window size (N) of the nucleosome call, equal to the value specified when the tool was run +- 5. **Length standard deviation:** the standard deviation of the nucleosome call length (equal to 0 because it is not currently calculated) +- 6. **Dyad:** the location of the peak maximum (p) in the smoothed dyad density data +- 7. **Dyad standard deviation:** the standard deviation of dyad density around the dyad mean in the dyad counts data +- 8. **Conditional position:** the probability that a nucleosome is at this exact dyad location as opposed to anywhere else in the nucleosome call window [p-N/2, p+N/2] +- 9. **Dyad mean:** the mean of the dyad counts in the window [p-N/2, p+N/2] +- 10. **Occupancy:** the sum of the dyad counts in the window [p-N/2, p+N/2] + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/InterpolateDiscontinuousData.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/InterpolateDiscontinuousData.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,40 @@ + + missing values in a (Big)Wig file + galaxyToolRunner.sh converters.InterpolateDiscontinousData -i $input -t $type -m $max -o $output + + + + + + + + + + + + + + + +This tool will attempt to interpolate missing values (NaN) in a Wig file that result when converting discontinuous microarray probe data to Wig format. Stretches of missing data that extend longer than the allowed maximum will be left as NaN. + +----- + +**Interpolation types** + +- **Nearest** uses the value of the nearest base pair that has data +- **Linear** uses a linear interpolant between the values of the nearest two probes +- **Cubic** uses a cubic interpolant between the values of the nearest two probes + +For more information, see Wikipedia_. + +.. _Wikipedia: http://en.wikipedia.org/wiki/Interpolation + +----- + +.. class:: infomark + +**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use the Converters -> IntervalToWig tool to convert Bed, BedGraph, or GFF-formatted microarray data to Wig format, then use this tool to interpolate the missing values between probes. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/IntervalAverager.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/IntervalAverager.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,40 @@ + + that have been aligned + + galaxyToolRunner.sh visualization.IntervalAverager -l $loci -o $output $file1 + #for $input in $inputs + ${input.file} + #end for + + + + + + + + + + + + + + +This tool calculates the average signal for a set of aligned intervals. Intervals are lined up on their alignment point (column 5 in the Bed file), flipped if on the - strand, and averaged. The output is equivalent to aligning the data in a matrix and then taking the columnwise average of the matrix. + +Intervals with alignment points must be provided in the following extended Bed format :: + + chr low high id alignment strand + +.. class:: infomark + +**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. + +----- + +**Syntax** + +- **Sequencing data** is the genomic data used to create the average +- **List of intervals** is a list of intervals in Bed format with alignment points + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/IntervalLengthDistribution.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/IntervalLengthDistribution.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,38 @@ + + of read lengths + galaxyToolRunner.sh ngs.IntervalLengthDistribution -i $input $freq -o $output + + + + + + + + + + + + + + + + + + + + + + + + + + + +This tool calculates the distribution of interval lengths from a list of intervals or reads in SAM, BAM, Bed, BedGraph, or GFF format. + +.. class:: warningmark + +For paired-end sequencing reads, the length is the length of the fragment (5' end of read 1 to 5' end of read 2) + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/IntervalStats.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/IntervalStats.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,44 @@ + + of data in a (Big)Wig file + + galaxyToolRunner.sh ngs.IntervalStats -l $windows -s $stat -o $output + #for $input in $inputs + ${input.file} + #end for + + + + + + + + + + + + + + + + + + + +This tool calculates the arithmetic mean, maximum, or minimum value for the Wig data in each interval. For each Wig file provided, an additional column is added to the output file in the order that they are added above. + +.. class:: infomark + +**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. + +----- + +**Example** + +Calculate the mean change in nucleosome occupancy for each gene in the yeast genome: + +- 1. Create a "change in occupancy" dataset by subtracting the normalized occupancy Wig files from your two conditions using the WigMath -> Subtract tool. +- 2. Upload a list of intervals corresponding to the genes in the yeast genome, or pull the data from UCSC using Get Data -> UCSC Main. +- 3. Calculate the mean change in occupancy for each gene using this tool and the datasets from (1) and (2). + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/IntervalToBed.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/IntervalToBed.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,19 @@ + + converter + galaxyToolRunner.sh converters.IntervalToBed -i $input -o $output + + + + + + + + +This tool will convert any file in SAM, BAM, GFF, BedGraph, BigBed, or VCF format to Bed format. + +.. class:: warningmark + +For SAM/BAM data, paired-end reads are converted to Bed format as the entire fragment (5' end of mate 1 to the 5' end of mate 2). Single-end reads are converted to Bed format as the read itself, with strand information. If your SAM/BAM file contains both mate alignments from a paired-end sequencing run (i.e. two entries for each fragment), you should first filter out reads from either the + or - strand with the SAM Tools -> Filter SAM tool to avoid producing redundant entries in the output Bed file. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/IntervalToWig.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/IntervalToWig.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,21 @@ + + converter + galaxyToolRunner.sh converters.IntervalToWig -i $input $zero -a ${chromInfo} -o $output + + + + + + + + + + +This tool converts data from an interval format, such as Bed, BedGraph or GFF, to Wig format. This can be used to convert data from microarrays to Wig format. The value of each interval is mapped into the Wig file. Intervals that overlap in the original file (multiple-valued base pairs) are averaged, and bases without data in the original interval file are set to NaN. + +.. class:: warningmark + +This tool requires Bed, BedGraph, or GFF formatted data. If you have tabular data that was not correctly autodetected, change the metadata by clicking on the pencil icon for the dataset. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/KMeans.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/KMeans.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,41 @@ + + an aligned matrix + galaxyToolRunner.sh visualization.KMeans -i $input -k $K -1 $min -2 $max -o $output + + + + + + + + + + + + + + +.. class:: warningmark + +This tool requires tabular data in matrix2png format (with column AND row headers). For more information about the required format and usage instructions, see the matrix2png_ website. + +.. _matrix2png: http://bioinformatics.ubc.ca/matrix2png/dataformat.html + +.. class:: infomark + +You can use the "Align values in a matrix" tool to create a matrix, then use this tool to cluster the matrix with k-means. + +.. class:: infomark + +**TIP:** You can use the **min** and **max** columns to cluster a large matrix based on a subset of the columns. For example, you could cluster a 4000x4000 matrix on columns 200-300 by setting min = 200 and max = 300. This will greatly increase the efficiency of distance calculations during the k-means EM, and also allows you to cluster based on specific regions, such as promoters or coding sequences. + +----- + +This tool will cluster the rows in an aligned matrix with KMeans_. The implementation builds upon the KMeansPlusPlusClusterer available in commons-math3_. + +.. _KMeans: http://en.wikipedia.org/wiki/K-means_clustering + +.. _commons-math3: http://commons.apache.org/math/ + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/LogTransform.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/LogTransform.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,51 @@ + + a (Big)Wig file + galaxyToolRunner.sh wigmath.LogTransform -i $input -b $base -o $output + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +.. class:: infomark + +**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/MapDyads.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/MapDyads.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,41 @@ + + from sequencing reads + + galaxyToolRunner.sh nucleosomes.MapDyads -i $input -a ${chromInfo} -o $output + #if $type.read == 'single' + -s $type.size + #end if + + + + + + + + + + + + + + + + + + + + + + +This tool produces a Wig file with the number of dyads at each base pair. For paired-end MNase data, dyads are approximated using the center of the fragment. For Bed/BedGraph formatted input, this means the center of the interval; for SAM/BAM formatted input, this means the middle between the 5' end of mate 1 and the 5' end of mate 2. For single-end data, the estimated mononucleosome fragment length (N) must be specified, which will be used to offset reads from the + and - strands by +/- N/2. + +.. class:: warningmark + +This tool requires sequencing reads in SAM, BAM, Bed, or BedGraph format. + +.. class:: warningmark + +Since BedGraph format does not contain strand information, all reads in BedGraph format are considered to be on the 5' strand. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/MatrixAligner.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/MatrixAligner.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,80 @@ + + for a heatmap + galaxyToolRunner.sh visualization.MatrixAligner -i $input -l $loci -m $M -o $output + + + + + + + + + + + + +This tool aligns sequencing data into a rectangular matrix for creating a heatmap with matrix2png. Data from each interval is lined up on the specified alignment point (column 5 in the Bed file), and flipped if on the - strand so that all intervals are 5'-to-3' from left-to-right. + +Intervals with alignment points must be provided in the following extended Bed format :: + + chr low high id alignment strand + +The heatmap is created by taking each interval in the **List of Intervals**, retrieving the data for that interval from the Wig file, and adding it as a new row in the matrix. Intervals are processed in their original order. + +----- + +**Syntax** + +- **Sequencing data** is the genomic data used to create the matrix +- **List of intervals** is a list of intervals in Bed format with alignment points +- **Maximum row length** is the maximum allowed width of the matrix. If aligned intervals extend outside of this width, they will be truncated. + +----- + +.. class:: infomark + +**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. + +.. class:: warningmark + +Large heatmap matrices may require a long time to generate in Galaxy because it validates that the output is in correct tab-delimited format. To reduce the size of an MxN matrix with large M, rows (N) can be truncated using the maximum row length parameter. Rows are truncated from the alignment point (symmetrically) if possible, or as nearly symmetrically as possible. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/MovingAverageSmooth.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/MovingAverageSmooth.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,71 @@ + + a (Big)Wig file + galaxyToolRunner.sh wigmath.MovingAverageSmooth -i $input -w $W -o $output + + + + + + + + + + + + + +This tool smooths genomic data with a mean_ filter of the specified width. + +.. _mean: http://en.wikipedia.org/wiki/Moving_average + +.. class:: warningmark + +Note that for the moving average to be perfectly symmetric, the window should be an odd number of base pairs. + +.. class:: infomark + +**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/Multiply.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/Multiply.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,27 @@ + + (Big)Wig files + + galaxyToolRunner.sh wigmath.Multiply -o $output + #for $input in $inputs + ${input.file} + #end for + + + + + + + + + + + + +This tool multiplies Wig or BigWig files base pair by base pair. + +.. class:: infomark + +**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/PairOverlappingNucleosomes.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/PairOverlappingNucleosomes.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,18 @@ + + by overlap + galaxyToolRunner.sh nucleosomes.PairOverlappingNucleosomes -a $input1 -b $input2 -m $N -o $output + + + + + + + + + + + +This tool will pair overlapping nucleosomes from two sets of nucleosome calls. In the event that multiple calls overlap, the one with the largest overlap is selected as a match. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/PercusDecomposition.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/PercusDecomposition.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,25 @@ + + from occupancy data + galaxyToolRunner.sh nucleosomes.PercusDecomposition -d $dyads -n $N -o $output + + + + + + + + + + +This tool derives an external potential energy function from experimental nucleosome positioning data by assuming that nucleosomes interact with DNA like a fluid of hard rods. This energy function can then be used to derive sequence-specific nucleosome formation preferences, while accounting for hard-core steric restriction by adjacent nucleosomes. This tool is a reimplementation of the algorithm described in (Locke et al. 2010). + +----- + +**Citations** + +Locke G, Tolkunov D, Moqtaderi Z, Struhl K and Morozov AV (2010) High-throughput sequencing reveals a simple model of nucleosome energetics. Proceedings of the National Academy of Sciences 107: 20998–21003 + +Percus JK (1976) Equilibrium state of a classical fluid of hard rods in an external field. J Stat Phys 15: 505–511 + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/Phasogram.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/Phasogram.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,27 @@ + + of dyads + galaxyToolRunner.sh nucleosomes.Phasogram -i $input -m $max -o $output + + + + + + + + + + +This tool calculates the phase distribution of sequencing data. It can be used to identify genome-wide periodicities. Phase counts are aggregated for each base pair across the genome. This is equivalent to summing the autocovariance of a sliding window across the genome. The tool is a reimplementation of the algorithm described in (Valouev et al. 2011). + +.. class:: infomark + +**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. + +----- + +**Citation** + +Valouev A, Johnson SM, Boyd SD, Smith CL, Fire AZ and Sidow A (2011) Determinants of nucleosome organization in primary human cells. Nature 474: 516–520 + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/PowerSpectrum.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/PowerSpectrum.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,59 @@ + + of data in a Wiggle file + galaxyToolRunner.sh ngs.PowerSpectrum -i $input -l $windows -m $max -o $output + + + + + + + + + + + +This tool computes the power spectrum of intervals of sequencing data. For each interval provided, the normalized power spectrum is calculated, representing the relative power in each frequency. Power spectra are normalized to have total power 1, with the DC component (0 frequency) removed. Power spectra are computed using the FFT_ implementation in JTransforms_. + +.. _FFT: http://en.wikipedia.org/wiki/Fast_Fourier_transform + +.. _JTransforms: http://sites.google.com/site/piotrwendykier/software/jtransforms + +----- + +**Syntax** + +- **Input data** is the genomic data on which to compute the power spectrum. +- **List of intervals:** The power spectrum will be computed for each genomic interval specified in this list. +- **Number of frequencies:** The power spectrum will be truncated at this frequency in the output + +----- + +**Output** + +The output has the following format :: + + chr start stop id alignment strand freq1 freq2 ... + +up to the maximum frequency specified. Frequencies are truncated to reduce the size of the output since signals are often band-limited. + +----- + +.. class:: warningmark + +**NOTE:** Even though frequencies may be truncated in the output, all frequencies in the power spectrum are computed and used for normalization. + +.. class:: infomark + +**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. Intervals must be provided in Bed, BedGraph, or GFF format. + +----- + +This tool is equivalent to the following Matlab commands, where x is a vector with the interval of sequencing data :: + + N = length(x); + f = fft(x); + p = abs(f(2:N/2)).^2; + p = p / sum(p); + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/ReadLengthDistributionMatrix.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/ReadLengthDistributionMatrix.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,63 @@ + + across a genomic interval + galaxyToolRunner.sh ngs.ReadLengthDistributionMatrix -i $input --chr $chr --start $start --stop $stop --min $min --max $max --bin $bin -o $output + + + + + + + + + + + + + + + +This tool will create a matrix (in matrix2png_ format) with the distribution of read lengths over each base pair. Reads are binned by genomic location and length to create a matrix where each column represents the distribution of read lengths over that base pair. The resulting matrix can be turned into heatmap using the Visualization -> Make heatmap with matrix2png tool. + +.. _matrix2png: http://bioinformatics.ubc.ca/matrix2png/dataformat.html + +.. class:: warningmark + +This tool requires paired-end SAM, BAM, Bed, or BedGraph formatted data. Using single-end data will result in a constant read length. + +----- + +**Syntax** + +- **Mapped reads** are the mapped paired-end reads used to make the histograms +- **Chromosome** a locus in the genome +- **Start base pair** a locus in the genome +- **Stop base pair** a locus in the genome +- **Minimum fragment length** is the lowest fragment length bin. Reads shorter than this will be ignored. +- **Maximum fragment length** is the highest fragment length bin. Reads longer than this will be ignored. +- **Fragment length bin size** is the bin size used when making the fragment length histograms + +----- + +**Example** + +Make a matrix with the read length distribution across the region chrI:5001-6000, looking at reads 100-200bp in length in bins of 1bp: + +- **Chromosome:** chrI +- **Start:** 5001 +- **Stop:** 6000 +- **Minimum fragment length:** 100 +- **Maximum fragment length:** 200 +- **Fragment length bin size:** 1 + +The resulting matrix will be 1000x101, with each column representing a base pair and each row representing a read length. The column headers give the base pair and the row headers give the read length. + +----- + +**Citation** + +This tool was inspired by the analysis and figures in + +Floer M, Wang X, Prabhu V, Berrozpe G, Narayan S, Spagna D, Alvarez D, Kendall J, Krasnitz A, Stepansky A, Hicks J, Bryant GO and Ptashne M (2010) A RSC/nucleosome complex determines chromatin architecture and facilitates activator binding. Cell 141: 407–418 + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/RollingReadLength.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/RollingReadLength.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,20 @@ + + over each locus + galaxyToolRunner.sh ngs.RollingReadLength -i $input -a ${chromInfo} -o $output + + + + + + + + + +This tool will compute the mean length of all fragments overlapping a given locus, and can be used to identify sites with exceptionally long or short reads. + +.. class:: warningmark + +This tool requires paired-end SAM, BAM, Bed, or BedGraph formatted data. Using single-end data will result in a constant read length. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/RomanNumeralize.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/RomanNumeralize.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,22 @@ + + on any file + galaxyToolRunner.sh converters.RomanNumeralize -i $input -o $output + + + + + + + + + + + + + + + +This tool scans any file with chromosomal coordinates of the form "chr5" and replaces them with "chrV". + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/Scale.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/Scale.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,60 @@ + + a (Big)Wig file + galaxyToolRunner.sh wigmath.Scale -i $input -m $M $chr -o $output + + + + + + + + + + + + + + +This tool will multiply all values in a Wig file by a scale factor. For example, this can be used to normalize to read depth by multiplying by 1/(# reads). By default, the tool will scale to 1/(mean value), which is equivalent to dividing by coverage and multiplying by the size of the genome. The resulting output file should have mean 1. + +.. class:: infomark + +**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/Shift.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/Shift.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,21 @@ + + a (Big)Wig file + galaxyToolRunner.sh wigmath.Shift -i $input -m $M $chr -o $output + + + + + + + + + + +This tool will shift all values in a Wig file by a scalar so that the output has the desired mean. + +.. class:: infomark + +**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/StripMatrix.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/StripMatrix.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,37 @@ + + from an aligned matrix + galaxyToolRunner.sh visualization.StripMatrix -i $input -o $output + + + + + + + + + + + + + + + +This tool is intended to strip the column/row headers off of an aligned matrix (in matrix2png format) for easy import into Matlab or other software where only the data values are required. It removes the first row and first column from a tabular file. + +----- + +**Example** + +If the following tabular matrix is used as input :: + + ID col1 col2 col3 + row1 2 4 5 + row2 5 1 1 + +then the following tabular matrix will be produced as output :: + + 2 4 5 + 5 1 1 + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/Subtract.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/Subtract.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,47 @@ + + two (Big)Wig files + galaxyToolRunner.sh wigmath.Subtract -m $minuend -s $subtrahend -o $output + + + + + + + + + + + + + +This tool will subtract the values in one Wig file from another, base pair by base pair. + +.. class:: infomark + +**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/Summary.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/Summary.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,83 @@ + + of a (Big)Wig file + galaxyToolRunner.sh wigmath.Summary -i $input -o $output + + + + + + + + + + + + + + + + + + + + + + + +This tool will output a summary of a Wig or BigWig file, including information about the chromosomes and types of contigs in the Wig file, as well as basic descriptive statistics. + +----- + +**Example:** + +The following is an example of the output of this tool :: + + ASCII Text Wiggle file: track type=wiggle_0 + Chromosomes: + 2micron start=1 stop=6318 + chrVI start=1 stop=270148 + chrI start=1 stop=230208 + chrIII start=1 stop=316617 + chrXII start=1 stop=1078175 + chrXV start=1 stop=1091289 + chrXVI start=1 stop=948062 + chrII start=1 stop=813178 + chrVIII start=1 stop=562643 + chrX start=1 stop=745742 + chrXIII start=1 stop=924429 + chrV start=1 stop=576869 + chrXIV start=1 stop=784333 + chrIV start=1 stop=1531919 + chrXI start=1 stop=666454 + chrIX start=1 stop=439885 + chrM start=1 stop=85779 + chrVII start=1 stop=1090947 + Contigs: + fixedStep chrom=2micron start=1 span=1 step=1 + fixedStep chrom=chrVI start=1 span=1 step=1 + fixedStep chrom=chrI start=1 span=1 step=1 + fixedStep chrom=chrIII start=1 span=1 step=1 + fixedStep chrom=chrXII start=1 span=1 step=1 + fixedStep chrom=chrXVI start=1 span=1 step=1 + fixedStep chrom=chrXV start=1 span=1 step=1 + fixedStep chrom=chrII start=1 span=1 step=1 + fixedStep chrom=chrVIII start=1 span=1 step=1 + fixedStep chrom=chrXIII start=1 span=1 step=1 + fixedStep chrom=chrX start=1 span=1 step=1 + fixedStep chrom=chrV start=1 span=1 step=1 + fixedStep chrom=chrXIV start=1 span=1 step=1 + fixedStep chrom=chrIV start=1 span=1 step=1 + fixedStep chrom=chrXI start=1 span=1 step=1 + fixedStep chrom=chrIX start=1 span=1 step=1 + fixedStep chrom=chrM start=1 span=1 step=1 + fixedStep chrom=chrVII start=1 span=1 step=1 + Basic Statistics: + Mean: 1.000000164913575 + Standard Deviation: 1.8843731523620193 + Total: 1.2162997005843896E7 + Bases Covered: 12162995 + Min value: 0.0 + Max value: 277.98996 + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/ValueDistribution.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/ValueDistribution.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,53 @@ + + of a (Big)Wig file + galaxyToolRunner.sh wigmath.ValueDistribution -i $input + #if str( $min ) != '' + --min $min + #end if + + #if str( $max ) != '' + --max $max + #end if + + -n $bins -o $output + + + + + + + + + + + + + +This tool computes a histogram of the values in a Wig file, as well as the moments of the distribution. + +----- + +**Syntax** + +- **Input data** is the genomic data used to compute the histogram. +- **Minimum bin value** is the smallest bin. If unset, it is equal to the minimum value in the input data +- **Maximum bin value** is the largest bin. If unset, it is equal to the maximum value in the input data +- **Number of bins** is the number of bins to use. The bin size will be equal to (max - min) / (# bins). + +----- + +**Output** + +The output is in 2-column tabular format, where the first column represents the lower edge of a bin inteval and the second column represents the number of values that fell in that bin. For example if the **minimum bin value** is 0, the **maximum bin value** is 0.3, and the **number of bins** is 3, then the following output might be produced :: + + bin count + <0 3 + 0 1 + 0.1 10 + 0.2 4 + >0.3 12 + +where there were 3 values in (-inf, 0), 1 value in [0, 0.1), 10 values in [0.1, 0.2), 4 values in [0.2, 0.3), and 12 values in [0.3, inf). + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/WaveletTransform.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/WaveletTransform.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,40 @@ + + across a genomic interval + galaxyToolRunner.sh ngs.WaveletTransform -i $input -w $wavelet --chr $chr --start $start --stop $stop --min $min --max $max --step $N -o $output + + + + + + + + + + + + + + + + +This tool will perform a Wavelet_ scaling_ analysis on an interval of genomic data. For each base pair in the interval, the similarity (correlation) is calculated between the data and the wavelet over a range of scales. This can be used to identify high-frequency and low-frequency features in the data. The output is a matrix in matrix2png format that can be used to generate a heatmap: along the x-axis (columns) are the base pairs in the interval, along the y-axis (rows) are the correlation coefficients for each Wavelet size, with the largest Wavelet scale at the top and the smallest scale at the bottom. + +.. _Wavelet: http://en.wikipedia.org/wiki/Wavelet + +.. _scaling: http://en.wikipedia.org/wiki/Scaleogram + +----- + +**Syntax** + +- **Input data** is the genomic data on which to compute the Wavelet scaling analysis. +- **Wavelet** a single column of values representing a discrete Wavelet. +- **Chromosome** a locus in the genome +- **Start base pair** a locus in the genome +- **Stop base pair** a locus in the genome +- **Minimum wavelet size** the smallest Wavelet to analyze +- **Minimum wavelet size** the largest Wavelet to analyze +- **Scaling step size** the step size for the range of wavelet scales. The provided wavelet will be scaled to each size in the set min:step:max by interpolating the provided wavelet. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/ZScore.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/ZScore.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,51 @@ + + a (Big)Wig file + galaxyToolRunner.sh wigmath.ZScore -i $input $chr -o $output + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +This tool will compute normal scores (Z-scores) for each of the values in a Wig file. For each base pair, the Z-scored value is equal to the deviance from the mean divided by the standard deviation (i.e. the number of standard deviations a value is away from the mean). The output file should have mean 0 and standard deviation 1. + +.. class:: infomark + +This tool is equivalent to using the **Mean Shift** tool to shift a Wig file to mean 0, then using the **Scale** tool to scale by 1/(standard deviation). + +.. class:: infomark + +**TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. + + + diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/galaxyToolRunner.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/galaxyToolRunner.sh Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +if [ $# -eq 0 ] +then + echo "USAGE: galaxyToolRunner.sh APPNAME [ARGS]"; + exit; +fi + +# Verify that the user has Java 7 installed +# Otherwise there will be an obscure UnsupportedClassVersion error +version=$(java -version 2>&1 | awk -F '"' '/version/ {print $2}') +if [[ "$version" < "1.7" ]]; then + echo "Need Java 7 or greater. You have Java $version installed." + exit +fi + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +java -Dlog4j.configuration=log4j.properties -cp $DIR:$DIR/../build:$DIR/../dist/*:$DIR/../lib/* edu.unc.genomics."$@" \ No newline at end of file diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/log4j.properties --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/log4j.properties Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,12 @@ +log4j.rootLogger=info, stdout + +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout + +# Pattern to output the caller's file name and line number. +log4j.appender.stdout.layout.ConversionPattern=%m%n + +# Only output errors from the BigWig library +log4j.logger.org.broad.igv.bbfile=ERROR +# Only output errors from java-genomics-io +log4j.logger.edu.unc.genomics.io=ERROR \ No newline at end of file diff -r 01d5d20eaadd -r a77e126ae856 galaxy-conf/matrix2png.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/galaxy-conf/matrix2png.xml Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,114 @@ + + using matrix2png + matrix2png + matrix2png -data $input + #if str( $range ) != '' + -range $range + #end if + + -con $con + -size $size + -numcolors $numcolors + -minsize $minsize + -mincolor $mincolor + -maxcolor $maxcolor + -bkgcolor $bkgcolor + -missingcolor $missingcolor + + #if str( $map ) != '' + -map $map + #end if + + #if str( $discrete_color_maps.discrete ) != 'false' + -discrete + -dmap $dmap + #end if + + #if str( $numr ) != '' + -numr $numr + #end if + + #if str( $numc ) != '' + -numc $numc + #end if + + -startrow $startrow + -startcol $startcol + + #if str( $trim ) != '0' + -trim $trim + #end if + + #if str( $title ) != '' + -title '$title' + #end if + + $z $b $d $s $r $c $f $e $l $u + + > $output + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +.. class:: warningmark + +This tool requires that matrix2png be installed and available in Galaxy's PATH. + +.. class:: warningmark + +This tool requires tabular data with column AND row headers. For more information about the required format and usage instructions, see the matrix2png_ website. + +.. _matrix2png: http://bioinformatics.ubc.ca/matrix2png/dataformat.html + +.. class:: warningmark + +It is recommended to specify the colorspace range since outliers will often skew it otherwise. + + + diff -r 01d5d20eaadd -r a77e126ae856 lib/BigWig.jar Binary file lib/BigWig.jar has changed diff -r 01d5d20eaadd -r a77e126ae856 lib/commons-lang3-3.1.jar Binary file lib/commons-lang3-3.1.jar has changed diff -r 01d5d20eaadd -r a77e126ae856 lib/commons-math3-3.0.jar Binary file lib/commons-math3-3.0.jar has changed diff -r 01d5d20eaadd -r a77e126ae856 lib/dnaproperties-1732.jar Binary file lib/dnaproperties-1732.jar has changed diff -r 01d5d20eaadd -r a77e126ae856 lib/hamcrest-core-1.1.0.jar Binary file lib/hamcrest-core-1.1.0.jar has changed diff -r 01d5d20eaadd -r a77e126ae856 lib/java-genomics-io.jar Binary file lib/java-genomics-io.jar has changed diff -r 01d5d20eaadd -r a77e126ae856 lib/jcommander-1.27.jar Binary file lib/jcommander-1.27.jar has changed diff -r 01d5d20eaadd -r a77e126ae856 lib/jtransforms-2.4.jar Binary file lib/jtransforms-2.4.jar has changed diff -r 01d5d20eaadd -r a77e126ae856 lib/junit.jar Binary file lib/junit.jar has changed diff -r 01d5d20eaadd -r a77e126ae856 lib/log4j-1.2.15.jar Binary file lib/log4j-1.2.15.jar has changed diff -r 01d5d20eaadd -r a77e126ae856 lib/picard-1.67.jar Binary file lib/picard-1.67.jar has changed diff -r 01d5d20eaadd -r a77e126ae856 lib/sam-1.67.jar Binary file lib/sam-1.67.jar has changed diff -r 01d5d20eaadd -r a77e126ae856 sam_fa_indices.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sam_fa_indices.loc.sample Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,28 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Samtools indexed sequences data files. You will need +#to create these data files and then create a sam_fa_indices.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The sam_fa_indices.loc +#file has this format (white space characters are TAB characters): +# +#index +# +#So, for example, if you had hg18 indexed stored in +#/depot/data2/galaxy/sam/, +#then the sam_fa_indices.loc entry would look like this: +# +#index hg18 /depot/data2/galaxy/sam/hg18.fa +# +#and your /depot/data2/galaxy/sam/ directory +#would contain hg18.fa and hg18.fa.fai files: +# +#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.fa +#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.fa.fai +# +#Your sam_fa_indices.loc file should include an entry per line for +#each index set you have stored. The file in the path does actually +#exist, but it should never be directly used. Instead, the name serves +#as a prefix for the index file. For example: +# +#index hg18 /depot/data2/galaxy/sam/hg18.fa +#index hg19 /depot/data2/galaxy/sam/hg19.fa diff -r 01d5d20eaadd -r a77e126ae856 tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Tue Jun 19 22:15:09 2012 -0400 @@ -0,0 +1,118 @@ + + + + + value, dbkey, name, path + +
+ + + value, dbkey, formats, name, path + +
+ + + value, name, path + +
+ + + value, name, path + +
+ + + value, dbkey, name, path + +
+ + + value, dbkey, name, path + +
+ + + value, dbkey, name, path + +
+ + + value, dbkey, name, path + +
+ + + value, dbkey, name, path + +
+ + + name, value, dbkey, species + +
+ + + value, dbkey, name, path + +
+ + + value, name, path + +
+ + + value, name, path + +
+ + + value, name, path + +
+ + + line_type, value, path + +
+ + + value, dbkey, name, path + +
+ + + value, dbkey, name, path + +
+ + + value, name, gatk_value, tools_valid_for + +
+ + + value, dbkey, name, path + +
+ + + value, dbkey, name, path + +
+ + + value, dbkey, name, path + +
+ + + value, name, path + +
+ + + value, dbkey, name, path + +
+