# HG changeset patch # User lparsons # Date 1381172473 14400 # Node ID 580ee0c4bc4e123bc4b2574f57f33e09b3e3872d # Parent cc5eaa9376d85d5ae9d83d44a27ca6d3d452174a Fixes from Bjorn Gruning: create symlinks under $TMP and clean them up afterwards, replace R dependency with the Tool Shed R3 package, add --install-scripts, prepend tool-ids with rseqc diff -r cc5eaa9376d8 -r 580ee0c4bc4e RPKM_count.xml --- a/RPKM_count.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/RPKM_count.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,75 +1,75 @@ - - calculates raw count and RPKM values for transcript at exon, intron, and mRNA level - - numpy - rseqc - + + calculates raw count and RPKM values for transcript at exon, intron, and mRNA level + + numpy + rseqc + ln -s "${input}" "local_input.bam" && ln -s "${input.metadata.bam_index}" "local_input.bam.bai" && RPKM_count.py -i "local_input.bam" -o output -r $refgene - #if str($strand_type.strand_specific) == "pair" - -d - #if str($strand_type.pair_type) == "sd" - '1++,1--,2+-,2-+' - #else - '1+-,1-+,2++,2--' - #end if - #end if - - #if str($strand_type.strand_specific) == "single" - -d - #if str($strand_type.single_type) == "s" - '++,--' - #else - '+-,-+' - #end if - #end if - - #if $skiphits - -u - #end if - - #if $onlyexonic - -e - #end if + #if str($strand_type.strand_specific) == "pair" + -d + #if str($strand_type.pair_type) == "sd" + '1++,1--,2+-,2-+' + #else + '1+-,1-+,2++,2--' + #end if + #end if - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + #if str($strand_type.strand_specific) == "single" + -d + #if str($strand_type.single_type) == "s" + '++,--' + #else + '+-,-+' + #end if + #end if + + #if $skiphits + -u + #end if + + #if $onlyexonic + -e + #end if + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RPKM_count.py +++++++++++++ @@ -83,22 +83,22 @@ ++++++++++++++ Input BAM/SAM file - Alignment file in BAM/SAM format. + Alignment file in BAM/SAM format. Reference gene model - Gene model in BED format. + Gene model in BED format. Strand sequencing type (default=none) - See Infer Experiment tool if uncertain. + See Infer Experiment tool if uncertain. Options ++++++++++++++ Skip Multiple Hit Reads - Use Multiple hit reads or use only uniquely mapped reads. + Use Multiple hit reads or use only uniquely mapped reads. Only use exonic reads - Renders program only used exonic (UTR exons and CDS exons) reads, otherwise use all reads. + Renders program only used exonic (UTR exons and CDS exons) reads, otherwise use all reads. Sample Output ++++++++++++++ @@ -113,7 +113,7 @@ chr1 29313959 29314417 NM_001166007_exon_2 0 '+' 1699 4 74.158 0.175 chr1 29319841 29320054 NM_001166007_exon_3 0 '+' 528 1 49.554 0.094 ===== ======== ======== ===================== ===== =========== ============= ============= ======== ========= - + ----- About RSeQC @@ -128,5 +128,5 @@ .. _RSeQC: http://rseqc.sourceforge.net/ - + diff -r cc5eaa9376d8 -r 580ee0c4bc4e RPKM_saturation.xml --- a/RPKM_saturation.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/RPKM_saturation.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,72 +1,72 @@ - - calculates raw count and RPKM values for transcript at exon, intron, and mRNA level - - R - numpy - rseqc - - RPKM_saturation.py -i $input -o output -r $refgene - - #if str($strand_type.strand_specific) == "pair" - -d - #if str($strand_type.pair_type) == "sd" - '1++,1--,2+-,2-+' - #else - '1+-,1-+,2++,2--' - #end if - #end if - - #if str($strand_type.strand_specific) == "single" - -d - #if str($strand_type.single_type) == "s" - '++,--' - #else - '+-,-+' - #end if - #end if - - -l $percentileFloor -u $percentileCeiling -s $percentileStep -c $rpkmCutoff + + calculates raw count and RPKM values for transcript at exon, intron, and mRNA level + + R + numpy + rseqc + + RPKM_saturation.py -i $input -o output -r $refgene - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + #if str($strand_type.strand_specific) == "pair" + -d + #if str($strand_type.pair_type) == "sd" + '1++,1--,2+-,2-+' + #else + '1+-,1-+,2++,2--' + #end if + #end if + + #if str($strand_type.strand_specific) == "single" + -d + #if str($strand_type.single_type) == "s" + '++,--' + #else + '+-,-+' + #end if + #end if + + -l $percentileFloor -u $percentileCeiling -s $percentileStep -c $rpkmCutoff + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RPKM_saturation.py ++++++++++++++++++ @@ -93,22 +93,22 @@ ++++++++++++++ Input BAM/SAM file - Alignment file in BAM/SAM format. + Alignment file in BAM/SAM format. Reference gene model - Gene model in BED format. + Gene model in BED format. Strand sequencing type (default=none) - See Infer Experiment tool if uncertain. + See Infer Experiment tool if uncertain. Options ++++++++++++++ Skip Multiple Hit Reads - Use Multiple hit reads or use only uniquely mapped reads. + Use Multiple hit reads or use only uniquely mapped reads. Only use exonic reads - Renders program only used exonic (UTR exons and CDS exons) reads, otherwise use all reads. + Renders program only used exonic (UTR exons and CDS exons) reads, otherwise use all reads. Output ++++++++++++++ @@ -124,10 +124,10 @@ :scale: 80 % - All transcripts were sorted in ascending order according to expression level (RPKM). Then they are divided into 4 groups: - 1. Q1 (0-25%): Transcripts with expression level ranked below 25 percentile. - 2. Q2 (25-50%): Transcripts with expression level ranked between 25 percentile and 50 percentile. - 3. Q3 (50-75%): Transcripts with expression level ranked between 50 percentile and 75 percentile. - 4. Q4 (75-100%): Transcripts with expression level ranked above 75 percentile. + 1. Q1 (0-25%): Transcripts with expression level ranked below 25 percentile. + 2. Q2 (25-50%): Transcripts with expression level ranked between 25 percentile and 50 percentile. + 3. Q3 (50-75%): Transcripts with expression level ranked between 50 percentile and 75 percentile. + 4. Q4 (75-100%): Transcripts with expression level ranked above 75 percentile. - BAM/SAM file containing more than 100 million alignments will make module very slow. - Follow example below to visualize a particular transcript (using R console):: @@ -156,5 +156,5 @@ .. _RSeQC: http://rseqc.sourceforge.net/ - + diff -r cc5eaa9376d8 -r 580ee0c4bc4e bam2wig.xml --- a/bam2wig.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/bam2wig.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,92 +1,101 @@ - - - converts all types of RNA-seq data from .bam to .wig - - - R - numpy - rseqc - - - ln -s "${input}" "local_input.bam" && - ln -s "${input.metadata.bam_index}" "local_input.bam.bai" && - bam2wig.py -i "local_input.bam" -s $chromsize -o outfile + + + converts all types of RNA-seq data from .bam to .wig + + + R + numpy + rseqc + + + + #import tempfile, os + #set $tmp_input = tempfile.NamedTemporaryFile() + #set $tmp_input_name = $input_singles_tmp_handle.name + #silent $tmp_input.close() + + ln -s "${input}" $tmp_input_name && + ln -s "${input.metadata.bam_index}" $tmp_input_name + ".bai" && + bam2wig.py -i "local_input.bam" -s $chromsize -o outfile - #if str($strand_type.strand_specific) == "pair" - -d - #if str($strand_type.pair_type) == "sd" - '1++,1--,2+-,2-+' - #else - '1+-,1-+,2++,2--' - #end if - #end if + #if str($strand_type.strand_specific) == "pair" + -d + #if str($strand_type.pair_type) == "sd" + '1++,1--,2+-,2-+' + #else + '1+-,1-+,2++,2--' + #end if + #end if - #if str($strand_type.strand_specific) == "single" - -d - #if str($strand_type.single_type) == "s" - '++,--' - #else - '+-,-+' - #end if - #end if + #if str($strand_type.strand_specific) == "single" + -d + #if str($strand_type.single_type) == "s" + '++,--' + #else + '+-,-+' + #end if + #end if - #if $wigsum.wigsum_type - -t $wigsum.totalwig - #end if + #if $wigsum.wigsum_type + -t $wigsum.totalwig + #end if - #if $skipmultihits - -u - #end if - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - strand_type['strand_specific'] == 'none' - - - strand_type['strand_specific'] != 'none' - - - strand_type['strand_specific'] != 'none' - - + #if $skipmultihits + -u + #end if + ; + rm $tmp_input_name + ".bai" ; + rm $tmp_input_name + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + strand_type['strand_specific'] == 'none' + + + strand_type['strand_specific'] != 'none' + + + strand_type['strand_specific'] != 'none' + + - + bam2wig.py ++++++++++ @@ -103,19 +112,19 @@ ++++++++++++++ Input BAM file - Alignment file in BAM format (SAM is not supported). BAM file will be sorted and indexed using samTools. + Alignment file in BAM format (SAM is not supported). BAM file will be sorted and indexed using samTools. Chromosome size file - Tab or space separated text file with 2 columns: first column is chromosome name, second column is size of the chromosome. Chromosome names (such as "chr1") should be consistent between this file and BAM file. + Tab or space separated text file with 2 columns: first column is chromosome name, second column is size of the chromosome. Chromosome names (such as "chr1") should be consistent between this file and BAM file. Specified wigsum (default=none) - Specified wigsum. Wigsum of 100000000 equals to coverage achieved by 1 million 100nt reads. Ignore this option to disable normalization. + Specified wigsum. Wigsum of 100000000 equals to coverage achieved by 1 million 100nt reads. Ignore this option to disable normalization. Skip multiple Hit reads - skips multiple hit reads or only use uniquely mapped reads + skips multiple hit reads or only use uniquely mapped reads Strand-specific (default=none) - How read(s) were stranded during sequencing. If you are not sure about the strand rule, run infer_experiment.py + How read(s) were stranded during sequencing. If you are not sure about the strand rule, run infer_experiment.py Outputs ++++++++++++++ @@ -143,5 +152,5 @@ .. _wiggle: http://genome.ucsc.edu/goldenPath/help/wiggle.html .. _bigwig: http://genome.ucsc.edu/FAQ/FAQformat.html#format6.1 - + diff -r cc5eaa9376d8 -r 580ee0c4bc4e bam_stat.xml --- a/bam_stat.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/bam_stat.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,26 +1,26 @@ - - - reads mapping statistics for a provided BAM or SAM file. - - - numpy - rseqc - s - - bam_stat.py -i $input -q $mapqual 2> $output - - - - - - - - + + + reads mapping statistics for a provided BAM or SAM file. + + + numpy + rseqc + s + + bam_stat.py -i $input -q $mapqual 2> $output + - + + + + + + + + bam_stat.py +++++++++++ @@ -61,5 +61,5 @@ .. _RSeQC: http://rseqc.sourceforge.net/ .. _`mapping quality`: http://genome.sph.umich.edu/wiki/Mapping_Quality_Scores - + diff -r cc5eaa9376d8 -r 580ee0c4bc4e clipping_profile.xml --- a/clipping_profile.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/clipping_profile.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,27 +1,27 @@ - - - estimates clipping profile of RNA-seq reads from BAM or SAM file - - - R - numpy - rseqc - - - clipping_profile.py -i $input -o output - - - - - - - - + + + estimates clipping profile of RNA-seq reads from BAM or SAM file + + + R + numpy + rseqc + + + clipping_profile.py -i $input -o output + - + + + + + + + + clipping_profile.py +++++++++++++++++++ @@ -42,7 +42,7 @@ .. image:: http://rseqc.sourceforge.net/_images/clipping_good.png :height: 600 px :width: 600 px - :scale: 80 % + :scale: 80 % ----- @@ -57,5 +57,5 @@ .. _RSeQC: http://rseqc.sourceforge.net/ - + diff -r cc5eaa9376d8 -r 580ee0c4bc4e geneBody_coverage.xml --- a/geneBody_coverage.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/geneBody_coverage.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,29 +1,29 @@ - - - Read coverage over gene body. - - - R - numpy - rseqc - - - geneBody_coverage.py -i $input -r $refgene -o output - - - - - - - - - - + + + Read coverage over gene body. + + + R + numpy + rseqc + + + geneBody_coverage.py -i $input -r $refgene -o output + - + + + + + + + + + + geneBody_coverage.py ++++++++++++++++++++ @@ -54,7 +54,7 @@ .. image:: http://rseqc.sourceforge.net/_images/geneBody_coverage.png :height: 600 px :width: 600 px - :scale: 80 % + :scale: 80 % ----- diff -r cc5eaa9376d8 -r 580ee0c4bc4e geneBody_coverage2.xml --- a/geneBody_coverage2.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/geneBody_coverage2.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,29 +1,29 @@ - - - Read coverage over gene body. - - - R - numpy - rseqc - - - geneBody_coverage2.py -i $input -r $refgene -o output - - - - - - - - - - + + + Read coverage over gene body + + + R + numpy + rseqc + + + geneBody_coverage2.py -i $input -r $refgene -o output + - + + + + + + + + + + geneBody_coverage2.py +++++++++++++++++++++ @@ -35,10 +35,10 @@ ++++++++++++++ Input BAM/SAM file - Alignment file in BAM/SAM format. + Alignment file in BAM/SAM format. Reference gene model - Gene Model in BED format. + Gene Model in BED format. Outputs @@ -67,5 +67,5 @@ - + diff -r cc5eaa9376d8 -r 580ee0c4bc4e infer_experiment.xml --- a/infer_experiment.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/infer_experiment.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,35 +1,35 @@ - - speculates how RNA-seq were configured - - numpy - rseqc - - infer_experiment.py -i $input -r $refgene - - #if $sample_size.boolean - -s $sample_size.size - #end if - - > $output - - - - - - - - - - - - - - + + speculates how RNA-seq were configured + + numpy + rseqc + + + infer_experiment.py -i $input -r $refgene + #if $sample_size.boolean + -s $sample_size.size + #end if + + > $output + - + + + + + + + + + + + + + + infer_experiment.py +++++++++++++++++++ @@ -42,13 +42,13 @@ ++++++++++++++ Input BAM/SAM file - Alignment file in BAM/SAM format. + Alignment file in BAM/SAM format. Reference gene model - Gene model in BED format. + Gene model in BED format. Number of usable sampled reads (default=200000) - Number of usable reads sampled from SAM/BAM file. More reads will give more accurate estimation, but make program little slower. + Number of usable reads sampled from SAM/BAM file. More reads will give more accurate estimation, but make program little slower. Outputs +++++++ @@ -88,37 +88,37 @@ **Example1** :: - ========================================================= - This is PairEnd Data :: + ========================================================= + This is PairEnd Data :: - Fraction of reads explained by "1++,1--,2+-,2-+": 0.4992 - Fraction of reads explained by "1+-,1-+,2++,2--": 0.5008 - Fraction of reads explained by other combinations: 0.0000 - ========================================================= + Fraction of reads explained by "1++,1--,2+-,2-+": 0.4992 + Fraction of reads explained by "1+-,1-+,2++,2--": 0.5008 + Fraction of reads explained by other combinations: 0.0000 + ========================================================= *Conclusion*: We can infer that this is NOT a strand specific because 50% of reads can be explained by "1++,1--,2+-,2-+", while the other 50% can be explained by "1+-,1-+,2++,2--". **Example2** :: - ============================================================ - This is PairEnd Data + ============================================================ + This is PairEnd Data - Fraction of reads explained by "1++,1--,2+-,2-+": 0.9644 :: - Fraction of reads explained by "1+-,1-+,2++,2--": 0.0356 - Fraction of reads explained by other combinations: 0.0000 - ============================================================ - + Fraction of reads explained by "1++,1--,2+-,2-+": 0.9644 :: + Fraction of reads explained by "1+-,1-+,2++,2--": 0.0356 + Fraction of reads explained by other combinations: 0.0000 + ============================================================ + *Conclusion*: We can infer that this is a strand-specific RNA-seq data. strandness of read1 is consistent with that of gene model, while strandness of read2 is opposite to the strand of reference gene model. **Example3** :: - ========================================================= - This is SingleEnd Data :: + ========================================================= + This is SingleEnd Data :: - Fraction of reads explained by "++,--": 0.9840 :: - Fraction of reads explained by "+-,-+": 0.0160 - Fraction of reads explained by other combinations: 0.0000 - ========================================================= + Fraction of reads explained by "++,--": 0.9840 :: + Fraction of reads explained by "+-,-+": 0.0160 + Fraction of reads explained by other combinations: 0.0000 + ========================================================= *Conclusion*: This is single-end, strand specific RNA-seq data. Strandness of reads are concordant with strandness of reference gene. @@ -137,5 +137,5 @@ .. _RSeQC: http://rseqc.sourceforge.net/ - + diff -r cc5eaa9376d8 -r 580ee0c4bc4e inner_distance.xml --- a/inner_distance.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/inner_distance.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,57 +1,58 @@ - - calculate the inner distance (or insert size) between two paired RNA reads - - R - numpy - rseqc - - inner_distance.py -i $input -o output -r $refgene - - #if $bounds.hasLowerBound - -l $bounds.lowerBound - #end if - - #if $bounds2.hasUpperBound - -u $bounds2.upperBound - #end if + + calculate the inner distance (or insert size) between two paired RNA reads + + R + numpy + rseqc + + + inner_distance.py -i $input -o output -r $refgene - #if $steps.step - -s $steps.stepSize - #end if - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + #if $bounds.hasLowerBound + -l $bounds.lowerBound + #end if + + #if $bounds2.hasUpperBound + -u $bounds2.upperBound + #end if + + #if $steps.step + -s $steps.stepSize + #end if + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + inner_distance.py +++++++++++++++++ @@ -71,16 +72,16 @@ ++++++++++++++ Input BAM/SAM file - Alignment file in BAM/SAM format. + Alignment file in BAM/SAM format. Reference gene model - Gene model in BED format. + Gene model in BED format. Estimated Upper/Lower Bounds (defaults=250 and -250) - Estimated upper/lower bounds of inner distance (bp). + Estimated upper/lower bounds of inner distance (bp). Step size (default=5) - Step size of histogram + Step size of histogram Output @@ -118,5 +119,5 @@ .. _RSeQC: http://rseqc.sourceforge.net/ - + diff -r cc5eaa9376d8 -r 580ee0c4bc4e junction_annotation.xml --- a/junction_annotation.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/junction_annotation.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,38 +1,38 @@ - - compares detected splice junctions to reference gene model - - R - numpy - rseqc - - junction_annotation.py -i $input -o output -r $refgene - - #if $intron.hasIntron - -m $intron.min_Intron - #end if - - - - - - - - - - - - - - - - - - + + compares detected splice junctions to reference gene model + + R + numpy + rseqc + + + junction_annotation.py + -i $input -o output -r $refgene + #if $intron.hasIntron + -m $intron.min_Intron + #end if + - + + + + + + + + + + + + + + + + + junction_annotation.py ++++++++++++++++++++++ @@ -54,13 +54,13 @@ ++++++++++++++ Input BAM/SAM file - Alignment file in BAM/SAM format. + Alignment file in BAM/SAM format. Reference gene model - Gene model in BED format. + Gene model in BED format. Minimum intron length (default=50) - Minimum intron length (bp). + Minimum intron length (bp). Output @@ -97,5 +97,5 @@ - + diff -r cc5eaa9376d8 -r 580ee0c4bc4e junction_saturation.xml --- a/junction_saturation.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/junction_saturation.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,40 +1,40 @@ - - detects splice junctions from each subset and compares them to reference gene model - - R - numpy - rseqc - - junction_saturation.py -i $input -o output -r $refgene -m $intronSize -v $minSplice - - #if $percentiles.specifyPercentiles - -l $percentiles.lowBound -u $percentiles.upBound -s $percentiles.percentileStep - #end if + + detects splice junctions from each subset and compares them to reference gene model + + R + numpy + rseqc + + junction_saturation.py -i $input -o output -r $refgene -m $intronSize -v $minSplice - - - - - - - - - - - - - - - - - - - + #if $percentiles.specifyPercentiles + -l $percentiles.lowBound -u $percentiles.upBound -s $percentiles.percentileStep + #end if + + - + + + + + + + + + + + + + + + + + + + junction_saturation.py ++++++++++++++++++++++ @@ -52,19 +52,19 @@ ++++++++++++++ Input BAM/SAM file - Alignment file in BAM/SAM format. + Alignment file in BAM/SAM format. Reference gene model - Gene model in BED format. + Gene model in BED format. Sampling Percentiles - Upper Bound, Lower Bound, Sampling Increment (defaults= 100, 5, and 5) - Sampling starts from the Lower Bound and increments to the Upper Bound at the rate of the Sampling Increment. + Sampling starts from the Lower Bound and increments to the Upper Bound at the rate of the Sampling Increment. Minimum intron length (default=50) - Minimum intron length (bp). + Minimum intron length (bp). Minimum coverage (default=1) - Minimum number of supportting reads to call a junction. + Minimum number of supportting reads to call a junction. Output ++++++++++++++ @@ -95,5 +95,5 @@ - + diff -r cc5eaa9376d8 -r 580ee0c4bc4e read_GC.xml --- a/read_GC.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/read_GC.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,25 +1,26 @@ - - determines GC% and read count - - R - numpy - rseqc - - read_GC.py -i $input -o output - - - - - - - - - + + determines GC% and read count + + R + numpy + rseqc + + + read_GC.py -i $input -o output + - + + + + + + + + + read_GC.py ++++++++++ @@ -28,7 +29,7 @@ ++++++++++++++ Input BAM/SAM file - Alignment file in BAM/SAM format. + Alignment file in BAM/SAM format. Output ++++++++++++++ @@ -56,5 +57,5 @@ .. _RSeQC: http://rseqc.sourceforge.net/ - + diff -r cc5eaa9376d8 -r 580ee0c4bc4e read_NVC.xml --- a/read_NVC.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/read_NVC.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,30 +1,27 @@ - - to check the nucleotide composition bias - - R - numpy - rseqc - - read_NVC.py -i $input -o output - - #if $nx - -x - #end if - - - - - - - - - - + + to check the nucleotide composition bias + + R + numpy + rseqc + + + read_NVC.py -i $input -o output $nx + - + + + + + + + + + + read_NVC.py +++++++++++ @@ -41,10 +38,10 @@ ++++++++++++++ Input BAM/SAM file - Alignment file in BAM/SAM format. + Alignment file in BAM/SAM format. Include N,X in NVC plot - Plots N and X alongside A, T, C, and G in plot. + Plots N and X alongside A, T, C, and G in plot. Output ++++++++++++++ @@ -76,5 +73,5 @@ .. _RSeQC: http://rseqc.sourceforge.net/ - + diff -r cc5eaa9376d8 -r 580ee0c4bc4e read_distribution.xml --- a/read_distribution.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/read_distribution.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,23 +1,24 @@ - - calculates how mapped reads were distributed over genome feature - - numpy - rseqc - - read_distribution.py -i $input -r $refgene > $output - - - - - - - - + + calculates how mapped reads were distributed over genome feature + + numpy + rseqc + + + read_distribution.py -i $input -r $refgene > $output + - + + + + + + + + read_distribution.py ++++++++++++++++++++ @@ -45,10 +46,10 @@ ++++++++++++++ Input BAM/SAM file - Alignment file in BAM/SAM format. + Alignment file in BAM/SAM format. Reference gene model - Gene model in BED format. + Gene model in BED format. Sample Output ++++++++++++++ @@ -85,5 +86,5 @@ - + diff -r cc5eaa9376d8 -r 580ee0c4bc4e read_duplication.xml --- a/read_duplication.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/read_duplication.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,27 +1,28 @@ - - determines reads duplication rate with sequence-based and mapping-based strategies - - R - numpy - rseqc - - read_duplication.py -i $input -o output -u $upLimit - - - - - - - - - - - + + determines reads duplication rate with sequence-based and mapping-based strategies + + R + numpy + rseqc + + + read_duplication.py -i $input -o output -u $upLimit + - + + + + + + + + + + + read_duplication.py +++++++++++++++++++ @@ -34,10 +35,10 @@ ++++++++++++++ Input BAM/SAM file - Alignment file in BAM/SAM format. + Alignment file in BAM/SAM format. Upper Limit of Plotted Duplicated Times (default=500) - Only used for plotting. + Only used for plotting. Output ++++++++++++++ @@ -66,5 +67,5 @@ .. _RSeQC: http://rseqc.sourceforge.net/ - + diff -r cc5eaa9376d8 -r 580ee0c4bc4e read_quality.xml --- a/read_quality.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/read_quality.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,26 +1,27 @@ - - determines Phred quality score - - R - numpy - rseqc - - read_quality.py -i $input -o output -r $reduce - - - - - - - - - - + + determines Phred quality score + + R + numpy + rseqc + + + read_quality.py -i $input -o output -r $reduce + - + + + + + + + + + + read_quality.py +++++++++++++++ @@ -37,10 +38,10 @@ ++++++++++++++ Input BAM/SAM file - Alignment file in BAM/SAM format. + Alignment file in BAM/SAM format. Ignore phred scores less than this number (default=1000) - To avoid making huge vector in R, nucleotide with certain phred score represented less than this number will be ignored. Increase this number save more memory while reduce precision. This option only applies to the 'boxplot'. + To avoid making huge vector in R, nucleotide with certain phred score represented less than this number will be ignored. Increase this number save more memory while reduce precision. This option only applies to the 'boxplot'. Output ++++++++++++++ @@ -73,5 +74,5 @@ .. _RSeQC: http://rseqc.sourceforge.net/ - + diff -r cc5eaa9376d8 -r 580ee0c4bc4e samtoolshelper.py --- a/samtoolshelper.py Wed Oct 02 02:20:04 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,20 +0,0 @@ -import sys -import subprocess as sp -import os - -# Creates the sorted and indexed bam/bai files that are requried for both bam2wig and RSEQC_count -def samtools_sorted(bam): - sortedbam = bam + ".sorted" - indexedbam = ".".join([sortedbam,"bam.bai"]) - sp.call(['samtools', 'sort', '-m 1000000000', bam, sortedbam]) - sortedbam = sortedbam + '.bam' - sp.call(['samtools', 'index', sortedbam, indexedbam]) - return sortedbam - -def main(args): - args[2] = samtools_sorted(args[2]) - sp.call(args) - - -if __name__ == "__main__": - main(sys.argv[1:]) \ No newline at end of file diff -r cc5eaa9376d8 -r 580ee0c4bc4e tool_dependencies.xml --- a/tool_dependencies.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/tool_dependencies.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,49 +1,25 @@ - + + + + + + + - http://cran.rstudio.com/src/base/R-2/R-2.11.0.tar.gz - - ./configure --enable-R-shlib \ - --with-readline=no \ - --with-x=no \ - --prefix=$INSTALL_DIR \ - --libdir=$INSTALL_DIR/lib \ - --disable-R-framework - - make && make install + http://sourceforge.net/projects/rseqc/files/RSeQC-2.3.7.tar.gz + python setup.py install --install-lib $INSTALL_DIR/lib/python --install-scripts $INSTALL_DIR/bin - $INSTALL_DIR/lib/R - $INSTALL_DIR/lib/R/library - $INSTALL_DIR/lib/R/bin + $INSTALL_DIR/lib/python + $INSTALL_DIR/bin - R is a free software environment for statistical computing and graphics. - NOTE: See custom compilation options above - - - - - - - - - http://sourceforge.net/projects/rseqc/files/RSeQC-2.3.7.tar.gz - python setup.py install --root $INSTALL_DIR --prefix . --install-lib lib - - $INSTALL_DIR/lib - - - $INSTALL_DIR/bin - - - - - RSeQC version 2.3.7, documentation available at http://dldcc-web.brc.bcm.edu/lilab/liguow/CGI/rseqc/_build/html/index.html#. - Requires gcc, python, numpy, and R + RSeQC version 2.3.7, documentation available at http://dldcc-web.brc.bcm.edu/lilab/liguow/CGI/rseqc/_build/html/index.html. + Requires gcc and python.