Mercurial > repos > nilesh > rseqc
changeset 51:09846d5169fa draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rseqc commit 37fb1988971807c6a072e1afd98eeea02329ee83
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/FPKM_count.xml Tue Mar 14 10:23:21 2017 -0400 @@ -0,0 +1,132 @@ +<tool id="rseqc_FPKM_count" name="FPKM Count" version="@WRAPPER_VERSION@"> + <description>calculates raw read count, FPM, and FPKM for each gene</description> + + <macros> + <import>rseqc_macros.xml</import> + </macros> + + <expand macro="requirements" /> + + <expand macro="stdio" /> + + <version_command><![CDATA[FPKM_count.py --version]]></version_command> + + <command><![CDATA[ + ln -sf '${input}' 'local_input.bam' && + ln -sf '${input.metadata.bam_index}' 'local_input.bam.bai' && + FPKM_count.py -i 'local_input.bam' -o output -r '${refgene}' + + #if str($strand_type.strand_specific) == "pair" + -d + #if str($strand_type.pair_type) == "sd" + '1++,1--,2+-,2-+' + #else + '1+-,1-+,2++,2--' + #end if + #end if + + #if str($strand_type.strand_specific) == "single" + -d + #if str($strand_type.single_type) == "s" + '++,--' + #else + '+-,-+' + #end if + #end if + + @MULTIHITS@ + + $onlyexonic + --single-read="${singleread}" + ]]> + </command> + + <inputs> + <expand macro="bam_param" /> + <expand macro="refgene_param" /> + <expand macro="strand_type_param" /> + <expand macro="multihits_param" /> + <param name="onlyexonic" type="boolean" value="false" truevalue="--only-exonic" falsevalue="" label="Only use exonic (UTR exons and CDS exons) reads, otherwise use all reads" help="(--only-exonic)"/> + <param name="singleread" type="select" label="How should read-pairs that only have one end mapped be counted?" help="(--single-read)"> + <option value="1" selected="true">Treat it as a whole fragment (1)</option> + <option value="0.5">Treat it as a half fragment (0.5)</option> + <option value="0">Ignore it (0)</option> + </param> + </inputs> + + <outputs> + <data format="xls" name="outputxls" from_work_dir="output.FPKM.xls"/> + </outputs> + + <tests> + <test> + <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam"/> + <param name="refgene" value="hg19_RefSeq_chr1_1-100000.bed"/> + <output name="outputxls" file="output.FPKM.xls"/> + </test> + </tests> + + <help><![CDATA[ +FPKM_count.py ++++++++++++++ + +Given a BAM file and reference gene model, this program will calculate the raw +read count, FPM (fragments per million), and FPKM (fragments per million +mapped reads per kilobase exon) for each gene in a BED file. For strand +specific RNA-seq data, program will assign read to its parental gene according +to strand rule, if you don't know the strand rule, run infer_experiment.py. +Please note that chromosome ID, genome cooridinates should be concordant +between BAM and BED files. + +Inputs +++++++++++++++ + +Input BAM/SAM file + Alignment file in BAM/SAM format. + +Reference gene model + Gene model in BED format. + +Strand sequencing type (default=none) + See Infer Experiment tool if uncertain. + +Options +++++++++++++++ + +Skip Multiple Hit Reads + Use Multiple hit reads or use only uniquely mapped reads. + +Minimum mapping quality + Minimum mapping quality (phred scaled) for an alignment to be called + "uniquely mapped". default=30 + +Only use exonic reads + Renders program only used exonic (UTR exons and CDS exons) reads, + otherwise use all reads. + +Single Reads + How to count read-pairs that only have one end mapped. 0: ignore it. 0.5: + treat it as half fragment. 1: treat it as whole fragment. default=1 + +Sample Output +++++++++++++++ + +====== ========= ========= ========= ========= =========== ========== ============ ============ +#chrom st end accession mRNA_size gene_strand Frag_count FPM FPKM +====== ========= ========= ========= ========= =========== ========== ============ ============ +chr1 100652477 100715409 NM_001918 10815.0 ‘-‘ 5498.0 191.73788949 17.728884835 +chr1 175913961 176176380 NM_022457 2789.0 ‘-‘ 923.0 32.188809021 11.541344217 +chr1 150980972 151008189 NM_021222 2977.0 ‘+’ 687.0 23.958517657 8.0478729115 +chr1 6281252 6296044 NM_012405 4815.0 ‘-‘ 1396.0 48.684265866 10.11095864 +chr1 20959947 20978004 NM_032409 2660.0 ‘+’ 509.0 17.750925018 6.6732800821 +chr1 32479294 32509482 NM_006559 2891.0 ‘+’ 2151.0 75.014223408 25.947500314 +====== ========= ========= ========= ========= =========== ========== ============ ============ + +@ABOUT@ + +]]> + </help> + + <expand macro="citations" /> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/RNA_fragment_size.xml Tue Mar 14 10:23:21 2017 -0400 @@ -0,0 +1,76 @@ +<tool id="rseqc_RNA_fragment_size" name="RNA fragment size" version="@WRAPPER_VERSION@"> + <description> + calculates the fragment size for each gene/transcript + </description> + + <macros> + <import>rseqc_macros.xml</import> + </macros> + + <expand macro="requirements" /> + + <expand macro="stdio" /> + + <version_command><![CDATA[RNA_fragment_size.py --version]]></version_command> + + <command><![CDATA[ + ln -sf '${input}' 'input.bam' && + ln -sf '$input.metadata.bam_index' 'input.bam.bai' && + RNA_fragment_size.py -i 'input.bam' --refgene='${refgene}' --mapq=${mapq} --frag-num=${fragnum} > '${output}' + ]]> + </command> + + <inputs> + <expand macro="bam_param" /> + <expand macro="refgene_param" /> + <expand macro="mapq_param" /> + <param name="fragnum" type="integer" value="3" label="Minimum number of fragments (default: 3)" help="(--frag-num)" /> + </inputs> + + <outputs> + <data format="tabular" name="output" /> + </outputs> + + <tests> + <test> + <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam" /> + <param name="refgene" value="hg19_RefSeq_chr1_1-100000.bed" /> + <output name="output" file="output.RNA_fragment_size.txt" /> + </test> + </tests> + + <help><![CDATA[ +RNA_fragment_size.py +++++++++++++++++++++ + +Calculate fragment size for each gene/transcript. For each transcript, it will +report : 1) Number of fragment that was used to estimate mean, median, std (see +below). 2) mean of fragment size 3) median of fragment size 4) stdev of fragment +size. + +Inputs +++++++ + +Input BAM/SAM file + Alignment file in BAM/SAM format. + +Reference gene model + Reference gene model in BED format. Must be strandard 12-column BED file. + [required] + +Minimum mapping quality + Minimum mapping quality for an alignment to be considered as "uniquely + mapped". default=30 + +Minimum number of fragments + Minimum number of fragments. default=3 + +@ABOUT@ + +]]> + + </help> + + <expand macro="citations" /> + +</tool>
--- a/RPKM_count.xml Tue May 03 16:36:57 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,158 +0,0 @@ -<tool id="rseqc_RPKM_count" name="RPKM Count" version="2.4galaxy1"> - <description>calculates raw count and RPKM values for transcript at exon, intron, and mRNA level</description> - - <macros> - <import>rseqc_macros.xml</import> - </macros> - - <requirements> - <expand macro="requirement_package_numpy" /> - <expand macro="requirement_package_rseqc" /> - </requirements> - - <expand macro="stdio" /> - - <version_command><![CDATA[RPKM_count.py --version]]></version_command> - - <command><![CDATA[ - ln -s "${input}" "local_input.bam" && - ln -s "${input.metadata.bam_index}" "local_input.bam.bai" && - RPKM_count.py -i "local_input.bam" -o output -r $refgene - - #if str($strand_type.strand_specific) == "pair" - -d - #if str($strand_type.pair_type) == "sd" - '1++,1--,2+-,2-+' - #else - '1+-,1-+,2++,2--' - #end if - #end if - - #if str($strand_type.strand_specific) == "single" - -d - #if str($strand_type.single_type) == "s" - '++,--' - #else - '+-,-+' - #end if - #end if - - #if $multihits.skipmultihits - --skip-multi-hits - --mapq=$multihits.mapq - #end if - - $onlyexonic - ]]> - </command> - - <inputs> - <param name="input" type="data" label="Input .bam File" format="bam" help="(--input-file)"/> - <param name="refgene" type="data" format="bed" label="reference gene model" help="(--refgene)"/> - <conditional name="strand_type"> - <param name="strand_specific" type="select" label="Strand-specific?" value="None"> - <option value="none">None</option> - <option value="pair">Pair-End RNA-seq</option> - <option value="single">Single-End RNA-seq</option> - </param> - <when value="pair"> - <param name="pair_type" type="select" display="radio" label="Pair-End Read Type (format: mapped --> parent)" value="sd" help="(--strand)"> - <option value="sd"> read1 (positive --> positive; negative --> negative), read2 (positive --> negative; negative --> positive)</option> - <option value="ds">read1 (positive --> negative; negative --> positive), read2 (positive --> positive; negative --> negative)</option> - </param> - </when> - <when value="single"> - <param name="single_type" type="select" display="radio" label="Single-End Read Type (format: mapped --> parent)" value="s" help="(--strand)"> - <option value="s">positive --> positive; negative --> negative</option> - <option value="d">positive --> negative; negative --> positive</option> - </param> - </when> - <when value="none"></when> - </conditional> - - <conditional name="multihits"> - <param name="skipmultihits" type="boolean" label="Skip Multiple Hit Reads/Only Use Uniquely Mapped Reads" value="false" help="(--skip-multi-hits)" /> - <when value="true"> - <param name="mapq" value="30" type="integer" label="Minimum mapping quality for an alignment to be called 'uniquly mapped'" help="(--mapq)" /> - </when> - <when value="false" /> - </conditional> - - <param name="onlyexonic" type="boolean" value="false" truevalue="--only-exonic" falsevalue="" label="Only use exonic (UTR exons and CDS exons) reads, otherwise use all reads" help="(--only-exonic)"/> - </inputs> - - <outputs> - <data format="xls" name="outputxls" from_work_dir="output_read_count.xls"/> - </outputs> - - <tests> - <test> - <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam"/> - <param name="refgene" value="hg19_RefSeq_chr1_1-100000.bed"/> - <output name="outputxls" file="output_read_count.xls"/> - </test> - </tests> - - <help><![CDATA[ -RPKM_count.py -+++++++++++++ - -Given a BAM file and reference gene model, this program will calculate the raw count and RPKM -values for transcript at exon, intron and mRNA level. For strand specific RNA-seq data, -program will assign read to its parental gene according to strand rule, if you don't know the -strand rule, run infer_experiment.py. Please note that chromosome ID, genome cooridinates -should be concordant between BAM and BED files. - -Inputs -++++++++++++++ - -Input BAM/SAM file - Alignment file in BAM/SAM format. - -Reference gene model - Gene model in BED format. - -Strand sequencing type (default=none) - See Infer Experiment tool if uncertain. - -Options -++++++++++++++ - -Skip Multiple Hit Reads - Use Multiple hit reads or use only uniquely mapped reads. - -Only use exonic reads - Renders program only used exonic (UTR exons and CDS exons) reads, otherwise use all reads. - -Sample Output -++++++++++++++ - -===== ======== ======== ===================== ===== =========== ============= ============= ======== ========= -chrom start end accession score gene strand tag count (+) tag count (-) RPKM (+) RPKM (-) -===== ======== ======== ===================== ===== =========== ============= ============= ======== ========= -chr1 29213722 29313959 NM_001166007_intron_1 0 '+' 431 4329 0.086 0.863 -chr1 29314417 29319841 NM_001166007_intron_2 0 '+' 31 1 0.114 0.004 -chr1 29320054 29323726 NM_001166007_intron_3 0 '+' 32 0 0.174 0.000 -chr1 29213602 29213722 NM_001166007_exon_1 0 '+' 164 0 27.321 0.000 -chr1 29313959 29314417 NM_001166007_exon_2 0 '+' 1699 4 74.158 0.175 -chr1 29319841 29320054 NM_001166007_exon_3 0 '+' 528 1 49.554 0.094 -===== ======== ======== ===================== ===== =========== ============= ============= ======== ========= - ------ - -About RSeQC -+++++++++++ - -The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation. - -The RSeQC package is licensed under the GNU GPL v3 license. - -.. image:: http://rseqc.sourceforge.net/_static/logo.png - -.. _RSeQC: http://rseqc.sourceforge.net/ -]]> - </help> - - <expand macro="citations" /> - -</tool>
--- a/RPKM_saturation.xml Tue May 03 16:36:57 2016 -0400 +++ b/RPKM_saturation.xml Tue Mar 14 10:23:21 2017 -0400 @@ -1,22 +1,18 @@ -<tool id="rseqc_RPKM_saturation" name="RPKM Saturation" version="2.4galaxy1"> +<tool id="rseqc_RPKM_saturation" name="RPKM Saturation" version="@WRAPPER_VERSION@"> <description>calculates raw count and RPKM values for transcript at exon, intron, and mRNA level</description> <macros> <import>rseqc_macros.xml</import> </macros> - <requirements> - <expand macro="requirement_package_r" /> - <expand macro="requirement_package_numpy" /> - <expand macro="requirement_package_rseqc" /> - </requirements> + <expand macro="requirements" /> <expand macro="stdio" /> <version_command><![CDATA[RPKM_saturation.py --version]]></version_command> <command><![CDATA[ - RPKM_saturation.py -i $input -o output -r $refgene + RPKM_saturation.py -i '${input}' -o output -r '${refgene}' #if str($strand_type.strand_specific) == "pair" -d @@ -36,51 +32,34 @@ #end if #end if - -l $percentileFloor -u $percentileCeiling -s $percentileStep -c $rpkmCutoff + -l ${percentileFloor} -u ${percentileCeiling} -s ${percentileStep} -c ${rpkmCutoff} ]]> </command> <inputs> - <param name="input" type="data" label="Input .bam File" format="bam" help="(--input-file)"/> - <param name="refgene" type="data" format="bed" label="reference gene model" help="(--refgene)"/> - <conditional name="strand_type"> - <param name="strand_specific" type="select" label="Strand-specific?" value="None"> - <option value="none">None</option> - <option value="pair">Pair-End RNA-seq</option> - <option value="single">Single-End RNA-seq</option> - </param> - <when value="pair"> - <param name="pair_type" type="select" display="radio" label="Pair-End Read Type (format: mapped --> parent)" value="sd" help="(--strand)"> - <option value="sd"> read1 (positive --> positive; negative --> negative), read2 (positive --> negative; negative --> positive)</option> - <option value="ds">read1 (positive --> negative; negative --> positive), read2 (positive --> positive; negative --> negative)</option> - </param> - </when> - <when value="single"> - <param name="single_type" type="select" display="radio" label="Single-End Read Type (format: mapped --> parent)" value="s" help="(--strand)"> - <option value="s">positive --> positive; negative --> negative</option> - <option value="d">positive --> negative; negative --> positive</option> - </param> - </when> - <when value="none"></when> - </conditional> + <expand macro="bam_param" /> + <expand macro="refgene_param" /> + <expand macro="strand_type_param" /> <param name="percentileFloor" type="integer" value="5" label="Begin sampling from this percentile (default=5)" help="(--percentile-floor)"/> <param name="percentileCeiling" type="integer" value="100" label="End sampling at this percentile (default=100)" help="(--percentile-ceiling)" /> <param name="percentileStep" type="integer" value="5" label="Sampling step size (default=5)" help="(--percentile-step)" /> <param name="rpkmCutoff" type="text" value="0.01" label="Ignore transcripts with RPKM smaller than this number (default=0.01)" help="(--rpkm-cutoff)" /> - <param name="mapq" value="30" type="integer" label="Minimum mapping quality for an alignment to be called 'uniquly mapped'" help="(--mapq)" /> + <expand macro="mapq_param" /> + <expand macro="rscript_output_param" /> </inputs> <outputs> - <data format="xls" name="outputxls" from_work_dir="output.eRPKM.xls" label="${tool.name} on ${on_string} (RPKM XLS)"/> - <data format="xls" name="outputrawxls" from_work_dir="output.rawCount.xls" label="${tool.name} on ${on_string} (Raw Count XLS)"/> - <data format="txt" name="outputr" from_work_dir="output.saturation.r" label="${tool.name} on ${on_string} (R Script)"/> - <data format="pdf" name="outputpdf" from_work_dir="output.saturation.pdf" label="${tool.name} on ${on_string} (PDF)"/> + <expand macro="pdf_output_data" filename="output.saturation.pdf" /> + <data format="xls" name="outputxls" from_work_dir="output.eRPKM.xls" label="${tool.name} on ${on_string} (RPKM xls)"/> + <data format="xls" name="outputrawxls" from_work_dir="output.rawCount.xls" label="${tool.name} on ${on_string} (Raw Count xls)"/> + <expand macro="rscript_output_data" filename="output.saturation.r" /> </outputs> <tests> <test> <param name="input" value="pairend_strandspecific_51mer_hg19_random.bam"/> <param name="refgene" value="hg19.HouseKeepingGenes_30.bed"/> + <param name="rscript_output" value="true" /> <output name="outputxls"> <assert_contents> <has_n_columns n="26" /> @@ -99,6 +78,7 @@ <has_line_matching expression="S5=c\(\d+\.\d+\)" /> </assert_contents> </output> + <output name="outputpdf" file="output.saturation.pdf" compare="sim_size" /> </test> </tests> @@ -120,7 +100,7 @@ expression level (i.e. RPKMreal). However, in practice one cannot know the RPKMreal. As a proxy, we use the RPKM estimated from total reads to approximate RPKMreal. -.. image:: http://rseqc.sourceforge.net/_images/RelativeError.png +.. image:: $PATH_TO_IMAGES/RelativeError.png :height: 80 px :width: 400 px :scale: 100 % @@ -154,7 +134,7 @@ 3. output.saturation.r: R script to generate plot 4. output.saturation.pdf: -.. image:: http://rseqc.sourceforge.net/_images/saturation.png +.. image:: $PATH_TO_IMAGES/saturation.png :height: 600 px :width: 600 px :scale: 80 % @@ -173,23 +153,13 @@ scatter.smooth(x,100*abs(rpkm-rpkm[length(rpkm)])/(rpkm[length(rpkm)]),type="p",ylab="Precent Relative Error",xlab="Resampling Percentage") dev.off() #close graphical device -.. image:: http://rseqc.sourceforge.net/_images/saturation_eg.png +.. image:: $PATH_TO_IMAGES/saturation_eg.png :height: 600 px :width: 600 px :scale: 80 % ------ - -About RSeQC -+++++++++++ +@ABOUT@ -The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation. - -The RSeQC package is licensed under the GNU GPL v3 license. - -.. image:: http://rseqc.sourceforge.net/_static/logo.png - -.. _RSeQC: http://rseqc.sourceforge.net/ ]]> </help>
--- a/bam2wig.xml Tue May 03 16:36:57 2016 -0400 +++ b/bam2wig.xml Tue Mar 14 10:23:21 2017 -0400 @@ -1,4 +1,4 @@ -<tool id="rseqc_bam2wig" name="BAM to Wiggle" version="2.4galaxy1"> +<tool id="rseqc_bam2wig" name="BAM to Wiggle" version="@WRAPPER_VERSION@"> <description> converts all types of RNA-seq data from .bam to .wig </description> @@ -7,20 +7,16 @@ <import>rseqc_macros.xml</import> </macros> - <requirements> - <expand macro="requirement_package_r" /> - <expand macro="requirement_package_numpy" /> - <expand macro="requirement_package_rseqc" /> - </requirements> + <expand macro="requirements" /> <expand macro="stdio" /> <version_command><![CDATA[bam2wig.py --version]]></version_command> <command><![CDATA[ - ln -sfn '${input}' 'input.bam' && - ln -sfn '${input.metadata.bam_index}' 'input.bam.bai' && - bam2wig.py -i input.bam -s $chromsize -o outfile + ln -sf '${input}' 'input.bam' && + ln -sf '${input.metadata.bam_index}' 'input.bam.bai' && + bam2wig.py -i 'input.bam' -s '${chromsize}' -o outfile #if str($strand_type.strand_specific) == "pair" -d @@ -40,57 +36,28 @@ #end if #end if - #if $wigsum.wigsum_type - -t $wigsum.totalwig + #if str($wigsum_type.wigsum_type_selector) == "normalize": + -t ${wigsum.totalwig} #end if - #if $multihits.skipmultihits - --skip-multi-hits - --mapq=$multihits.mapq - #end if - 2>&1 + + @MULTIHITS@ ]]> </command> <inputs> - <param name="input" type="data" label="Input .bam File" format="bam" help="(--input-file)"/> + <expand macro="bam_param" /> <param name="chromsize" type="data" label="Chromosome size file (tab or space separated)" format="txt,tabular" help="(--chromSize)"/> - - <conditional name="multihits"> - <param name="skipmultihits" type="boolean" label="Skip Multiple Hit Reads/Only Use Uniquely Mapped Reads" value="false" help="(--skip-multi-hits)" /> - <when value="true"> - <param name="mapq" value="30" type="integer" label="Minimum mapping quality for an alignment to be called 'uniquly mapped'" help="(--mapq)" /> + <expand macro="multihits_param" /> + <conditional name="wigsum_type"> + <param name="wigsum_type_selector" type="select" label="Normalization"> + <option value="normalize">Normalize to specified sum</option> + <option value="raw" selected="true">Do not normalize</option> + </param> + <when value="normalize"> + <param name="totalwig" value="" type="integer" label="specified wigsum" help="(--wigsum)"/> </when> - <when value="false" /> - </conditional> - - <conditional name="wigsum"> - <param name="wigsum_type" type="boolean" label="Specify wigsum?" value="false"> - </param> - <when value="true"> - <param name="totalwig" value="0" type="integer" label="specified wigsum" help="(--wigsum)"/> - </when> - <when value="false"/> + <when value="raw"/> </conditional> - - <conditional name="strand_type"> - <param name="strand_specific" type="select" label="Strand-specific?" value="none"> - <option value="none">none</option> - <option value="pair">Pair-End RNA-seq</option> - <option value="single">Single-End RNA-seq</option> - </param> - <when value="pair"> - <param name="pair_type" type="select" display="radio" label="Pair-End Read Type (format: mapped --> parent)" value="sd" help="(--strand)"> - <option value="sd"> read1 (positive --> positive; negative --> negative), read2 (positive --> negative; negative --> positive)</option> - <option value="ds">read1 (positive --> negative; negative --> positive), read2 (positive --> positive; negative --> negative)</option> - </param> - </when> - <when value="single"> - <param name="single_type" type="select" display="radio" label="Single-End Read Type (format: mapped --> parent)" value="s" help="(--strand)"> - <option value="s">positive --> positive; negative --> negative</option> - <option value="d">positive --> negative; negative --> positive</option> - </param> - </when> - <when value="none"></when> - </conditional> + <expand macro="strand_type_param" /> </inputs> <outputs> @@ -114,8 +81,8 @@ <test> <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam"/> <param name="chromsize" value="hg19.chrom.sizes"/> - <param name="skipmultihits" value="True"/> - <param name="mapq" value="20"/> + <param name="multihits_type.multihits_type_selector" value="skipmultihits"/> + <param name="multihits_type.mapq" value="20"/> <output name="output" file="testwig.wig"/> </test> <test> @@ -165,22 +132,11 @@ If RNA-seq is not strand specific, one wig file will be generated, if RNA-seq is strand specific, two wig files corresponding to Forward and Reverse will be generated. ------ - -About RSeQC -+++++++++++ - -The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation. - -The RSeQC package is licensed under the GNU GPL v3 license. - -.. image:: http://rseqc.sourceforge.net/_static/logo.png - -.. _RSeQC: http://rseqc.sourceforge.net/ +@ABOUT@ .. _UCSC: http://genome.ucsc.edu/index.html .. _IGB: http://bioviz.org/igb/ -.. _IGV: http://www.broadinstitute.org/igv/home +.. _IGV: http://software.broadinstitute.org/software/igv/ .. _BAM: http://genome.ucsc.edu/goldenPath/help/bam.html .. _wiggle: http://genome.ucsc.edu/goldenPath/help/wiggle.html .. _bigwig: http://genome.ucsc.edu/FAQ/FAQformat.html#format6.1
--- a/bam_stat.xml Tue May 03 16:36:57 2016 -0400 +++ b/bam_stat.xml Tue Mar 14 10:23:21 2017 -0400 @@ -1,4 +1,4 @@ -<tool id="rseqc_bam_stat" name="BAM/SAM Mapping Stats" version="2.4galaxy1"> +<tool id="rseqc_bam_stat" name="BAM/SAM Mapping Stats" version="@WRAPPER_VERSION@"> <description> reads mapping statistics for a provided BAM or SAM file. </description> @@ -7,23 +7,20 @@ <import>rseqc_macros.xml</import> </macros> - <requirements> - <expand macro="requirement_package_numpy" /> - <expand macro="requirement_package_rseqc" /> - </requirements> + <expand macro="requirements" /> <expand macro="stdio" /> <version_command><![CDATA[bam_stat.py --version]]></version_command> <command><![CDATA[ - bam_stat.py -i $input -q $mapq 2> $output + bam_stat.py -i '${input}' -q ${mapq} > '${output}' ]]> </command> <inputs> - <param name="input" type="data" label="Input .bam File" format="bam" help="(--input-file)"/> - <param name="mapq" value="30" type="integer" label="Minimum mapping quality for an alignment to be called 'uniquly mapped'" help="(--mapq)" /> + <expand macro="bam_param" /> + <expand macro="mapq_param" /> </inputs> <outputs> @@ -32,8 +29,8 @@ <tests> <test> - <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam"/> - <output name="output" file="bamstats.txt"/> + <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam" /> + <output name="output" file="output.bamstats.txt" /> </test> </tests> @@ -52,10 +49,10 @@ ++++++++++++++ Input BAM/SAM file - Alignment file in BAM/SAM format. + Alignment file in BAM/SAM format. Minimum mapping quality - Minimum mapping quality for an alignment to be called "uniquely mapped" (default=30) + Minimum mapping quality for an alignment to be called "uniquely mapped" (default=30) Output ++++++++++++++ @@ -65,19 +62,7 @@ - Uniquely mapped Reads = {Reads map to '+'} + {Reads map to '-'} - Uniquely mapped Reads = {Splice reads} + {Non-splice reads} ------ - -About RSeQC -+++++++++++ - - -The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation. - -The RSeQC package is licensed under the GNU GPL v3 license. - -.. image:: http://rseqc.sourceforge.net/_static/logo.png - -.. _RSeQC: http://rseqc.sourceforge.net/ +@ABOUT@ ]]> </help>
--- a/clipping_profile.xml Tue May 03 16:36:57 2016 -0400 +++ b/clipping_profile.xml Tue Mar 14 10:23:21 2017 -0400 @@ -1,4 +1,4 @@ -<tool id="rseqc_clipping_profile" name="Clipping Profile" version="2.4galaxy1"> +<tool id="rseqc_clipping_profile" name="Clipping Profile" version="@WRAPPER_VERSION@"> <description> estimates clipping profile of RNA-seq reads from BAM or SAM file </description> @@ -7,37 +7,42 @@ <import>rseqc_macros.xml</import> </macros> - <requirements> - <expand macro="requirement_package_r" /> - <expand macro="requirement_package_numpy" /> - <expand macro="requirement_package_rseqc" /> - </requirements> + <expand macro="requirements" /> <expand macro="stdio" /> <version_command><![CDATA[clipping_profile.py --version]]></version_command> <command><![CDATA[ - clipping_profile.py -i $input -o output + clipping_profile.py -i '${input}' -o output -q ${mapq} -s "${layout}" ]]> </command> <inputs> - <param name="input" type="data" label="Input .bam File" format="bam" help="(--input-file)"/> + <expand macro="bam_param" /> + <expand macro="mapq_param" /> + <expand macro="layout_param" /> + <expand macro="rscript_output_param" /> </inputs> <outputs> - <data format="pdf" name="outputpdf" from_work_dir="output.clipping_profile.pdf" /> - <data format="xls" name="outputxls" from_work_dir="output.clipping_profile.xls" /> - <data format="txt" name="outputr" from_work_dir="output.clipping_profile.r" /> + <expand macro="pdf_output_data" filename="output.clipping_profile.pdf" /> + <expand macro="xls_output_data" filename="output.clipping_profile.xls" /> + <expand macro="rscript_output_data" filename="output.clipping_profile.r" /> </outputs> <tests> <test> - <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam"/> - <output name="outputpdf" file="output.clipping_profile.pdf"/> - <output name="outputxls" file="output.clipping_profile.xls"/> - <output name="outputr" file="output.clipping_profile.r"/> + <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam" /> + <output name="outputpdf" file="output.clipping_profile.pdf" compare="sim_size" /> + <output name="outputxls" file="output.clipping_profile.xls" /> + </test> + <test> + <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam" /> + <param name="rscript_output" value="true" /> + <output name="outputpdf" file="output.clipping_profile.pdf" compare="sim_size" /> + <output name="outputxls" file="output.clipping_profile.xls" /> + <output name="outputr" file="output.clipping_profile.r" /> </test> </tests> @@ -53,29 +58,25 @@ ++++++++++++++ Input BAM/SAM file - Alignment file in BAM/SAM format. + Alignment file in BAM/SAM format. +Minimum mapping quality + Minimum mapping quality for an alignment to be considered as "uniquely + mapped". default=30 + +Sequencing layout + Denotes whether the sequecing was single-end (SE) or paired-end (PE). Sample Output ++++++++++++++ -.. image:: http://rseqc.sourceforge.net/_images/clipping_good.png +.. image:: $PATH_TO_IMAGES/clipping_good.png :height: 600 px :width: 600 px :scale: 80 % ------ - -About RSeQC -+++++++++++ +@ABOUT@ -The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation. - -The RSeQC package is licensed under the GNU GPL v3 license. - -.. image:: http://rseqc.sourceforge.net/_static/logo.png - -.. _RSeQC: http://rseqc.sourceforge.net/ ]]> </help>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deletion_profile.xml Tue Mar 14 10:23:21 2017 -0400 @@ -0,0 +1,87 @@ +<tool id="rseqc_deletion_profile" name="Deletion Profile" version="@WRAPPER_VERSION@"> + <description> + calculates the distributions of deleted nucleotides across reads + </description> + + <macros> + <import>rseqc_macros.xml</import> + </macros> + + <expand macro="requirements" /> + + <expand macro="stdio" /> + + <version_command><![CDATA[deletion_profile.py --version]]></version_command> + + <command><![CDATA[ + deletion_profile.py -i '${input}' -o output -l ${readlength} -n ${readnum} -q ${mapq} + ]]> + </command> + + <inputs> + <expand macro="bam_param" /> + <expand macro="readlength_param" /> + <expand macro="readnum_param" /> + <expand macro="mapq_param" /> + <expand macro="rscript_output_param" /> + </inputs> + + <outputs> + <expand macro="pdf_output_data" filename="output.deletion_profile.pdf" /> + <expand macro="xls_output_data" filename="output.deletion_profile.txt" /> + <expand macro="rscript_output_data" filename="output.deletion_profile.r" /> + </outputs> + + <tests> + <test> + <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam" /> + <param name="readlength" value="101" /> + <param name="rscript_output" value="true" /> + <output name="outputpdf" file="output.deletion_profile.pdf" compare="sim_size" /> + <output name="outputxls" file="output.deletion_profile.txt" /> + <output name="outputr" file="output.deletion_profile.r" /> + </test> + </tests> + + <help><![CDATA[ +deletion_profile.py ++++++++++++++++++++ + +Calculate the distributions of deleted nucleotides across reads. + +Inputs +++++++ + +Input BAM/SAM file + Alignment file in BAM/SAM format. + +Alignment length of read + It is usually set to the orignial read length. For example, all these cigar + strings ("101M", "68M140N33M", "53M1D48M") suggest the read alignment + length is 101. [required] + +Number of aligned reads used + Number of aligned reads with deletions used to calculate the deletion + profile. default=1000000 + +Minimum mapping quality + Minimum mapping quality for an alignment to be considered as "uniquely + mapped". default=30 + +Sample Output +++++++++++++++ + +.. image:: $PATH_TO_IMAGES/out.deletion_profile.png + :height: 600 px + :width: 600 px + :scale: 80 % + +@ABOUT@ + +]]> + + </help> + + <expand macro="citations" /> + +</tool>
--- a/geneBody_coverage.xml Tue May 03 16:36:57 2016 -0400 +++ b/geneBody_coverage.xml Tue Mar 14 10:23:21 2017 -0400 @@ -1,4 +1,4 @@ -<tool id="rseqc_geneBody_coverage" name="Gene Body Converage (BAM)" version="2.4galaxy2"> +<tool id="rseqc_geneBody_coverage" name="Gene Body Converage (BAM)" version="@WRAPPER_VERSION@"> <description> Read coverage over gene body. </description> @@ -7,121 +7,117 @@ <import>rseqc_macros.xml</import> </macros> - <requirements> - <expand macro="requirement_package_r" /> - <expand macro="requirement_package_numpy" /> - <expand macro="requirement_package_rseqc" /> - </requirements> + <expand macro="requirements" /> <expand macro="stdio" /> <version_command><![CDATA[geneBody_coverage.py --version]]></version_command> <command><![CDATA[ + #import re + #set $input_list = [] #for $i, $input in enumerate($inputs): - #set $index = $i+1 - #set $safename = ''.join(c in '_0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' and c or '_' for c in $input.display_name) - #set $fname = 'd' + str($index) + '_' + str($safename) + ".bam" - ln -s '$input' '${fname}' && - ln -s '$input.metadata.bam_index' '${fname}.bai' && - echo '${fname}' >> input_list.txt && + #set $safename = re.sub('[^\w\-_]', '_', $input.element_identifier) + #if $safename in $input_list: + #set $safename = str($safename) + "." + str($i) + #end if + $input_list.append($safename) + ln -sf '${input}' '${safename}.bam' && + ln -sf '${input.metadata.bam_index}' '${safename}.bam.bai' && + echo '${safename}.bam' >> 'input_list.txt' && #end for - geneBody_coverage.py -i input_list.txt -r $refgene --minimum_length $minimum_length -o output + geneBody_coverage.py -i 'input_list.txt' -r '${refgene}' --minimum_length ${minimum_length} -o output ]]> </command> <inputs> - <param name="inputs" type="data" label="Input .bam File(s)" format="bam" help="(--input-file)" multiple="true"/> - <param name="refgene" type="data" format="bed" label="reference gene model" help="(--refgene)"/> - <param name="minimum_length" type="integer" value="100" label="Minimum mRNA length in bp (default: 100)" help="mRNA that are shorter than this value will be skipped (--minimum_length)." /> + <param name="inputs" type="data" label="Input .bam file(s)" format="bam" help="(--input-file)" multiple="true"/> + <expand macro="refgene_param" /> + <param name="minimum_length" type="integer" value="100" label="Minimum mRNA length (default: 100)" help="Minimum mRNA length in bp, mRNA that are shorter than this value will be skipped (--minimum_length)." /> + <expand macro="rscript_output_param" /> </inputs> <outputs> - <data name="outputcurvespdf" format="pdf" from_work_dir="output.geneBodyCoverage.curves.pdf" label="${tool.name} on ${on_string} (Curves PDF)" /> - <data name="outputheatmappdf" format="pdf" from_work_dir="output.geneBodyCoverage.heatMap.pdf" label="${tool.name} on ${on_string} (HeatMap PDF)"> + <data name="outputcurvespdf" format="pdf" from_work_dir="output.geneBodyCoverage.curves.pdf" label="${tool.name} on ${on_string} (Curves pdf)" /> + <data name="outputheatmappdf" format="pdf" from_work_dir="output.geneBodyCoverage.heatMap.pdf" label="${tool.name} on ${on_string} (HeatMap pdf)"> <filter>len(inputs) >= 3</filter> </data> - <data name="outputr" format="txt" from_work_dir="output.geneBodyCoverage.r" label="${tool.name} on ${on_string} (R Script)" /> - <data name="outputtxt" format="txt" from_work_dir="output.geneBodyCoverage.txt" label="${tool.name} on ${on_string} (Text)" /> + <expand macro="rscript_output_data" filename="output.geneBodyCoverage.r" /> + <data name="outputtxt" format="txt" from_work_dir="output.geneBodyCoverage.txt" label="${tool.name} on ${on_string} (text)" /> </outputs> <!-- PDF Files contain R version, must avoid checking for diff --> <tests> <test> - <param name="inputs" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam"/> - <param name="refgene" value="hg19_RefSeq_chr1_1-100000.bed"/> - <!-- <output name="outputcurvespdf" file="output.geneBodyCoverage.curves.pdf"/> --> - <output name="outputr" file="output.geneBodyCoverage.r"/> - <output name="outputtxt" file="output.geneBodyCoverage.txt"/> + <param name="inputs" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam" /> + <param name="refgene" value="hg19_RefSeq_chr1_1-100000.bed" /> + <param name="rscript_output" value="true" /> + <output name="outputcurvespdf" file="output.geneBodyCoverage.curves.pdf" compare="sim_size" /> + <output name="outputr" file="output.geneBodyCoverage.r" /> + <output name="outputtxt" file="output.geneBodyCoverage.txt" /> </test> <test> - <param name="inputs" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam,pairend_strandspecific_51mer_hg19_chr1_1-100000.bam,pairend_strandspecific_51mer_hg19_chr1_1-100000.bam"/> - <param name="refgene" value="hg19_RefSeq_chr1_1-100000.bed"/> - <!-- <output name="outputcurvespdf" file="output2.geneBodyCoverage.curves.pdf"/> --> - <!-- <output name="outputheatmappdf" file="output2.geneBodyCoverage.heatMap.pdf"/> --> - <output name="outputr" file="output2.geneBodycoverage.r"/> - <output name="outputtxt" file="output2.geneBodyCoverage.txt"/> + <param name="inputs" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam,pairend_strandspecific_51mer_hg19_chr1_1-100000.bam,pairend_strandspecific_51mer_hg19_chr1_1-100000.bam" /> + <param name="refgene" value="hg19_RefSeq_chr1_1-100000.bed" /> + <param name="rscript_output" value="true" /> + <output name="outputcurvespdf" file="output2.geneBodyCoverage.curves.pdf" compare="sim_size" /> + <output name="outputheatmappdf" file="output2.geneBodyCoverage.heatMap.pdf" compare="sim_size" /> + <output name="outputr" file="output2.geneBodyCoverage.r" /> + <output name="outputtxt" file="output2.geneBodyCoverage.txt" /> </test> </tests> <help><![CDATA[ - ## geneBody_coverage.py +## geneBody_coverage.py - Read coverage over gene body. This module is used to check if read coverage is uniform and if there is any 5\'/3\' bias. This module scales all transcripts to 100 nt and calculates the number of reads covering each nucleotide position. Finally, it generates plots illustrating the coverage profile along the gene body. +Read coverage over gene body. This module is used to check if read coverage is uniform and if there is any 5\'/3\' bias. This module scales all transcripts to 100 nt and calculates the number of reads covering each nucleotide position. Finally, it generates plots illustrating the coverage profile along the gene body. - If 3 or more BAM files were provided. This program generates a lineGraph and a heatmap. If fewer than 3 BAM files were provided, only lineGraph is generated. See below for examples. +If 3 or more BAM files were provided. This program generates a lineGraph and a heatmap. If fewer than 3 BAM files were provided, only lineGraph is generated. See below for examples. - When heatmap is generated, samples are ranked by the "skewness" of the coverage: Sample with best (worst) coverage will be displayed at the top (bottom) of the heatmap. - Coverage skewness was measured by `Pearson’s skewness coefficients <http://en.wikipedia.org/wiki/Skewness#Pearson.27s_skewness_coefficients>`_ +When heatmap is generated, samples are ranked by the "skewness" of the coverage: Sample with best (worst) coverage will be displayed at the top (bottom) of the heatmap. +Coverage skewness was measured by `Pearson’s skewness coefficients <http://en.wikipedia.org/wiki/Skewness#Pearson.27s_skewness_coefficients>`_ - .. image:: http://rseqc.sourceforge.net/_images/geneBody_workflow.png - :width: 800 px - :scale: 80 % +.. image:: $PATH_TO_IMAGES/geneBody_workflow.png +:width: 800 px +:scale: 80 % - ## Inputs +## Inputs - Input BAM/SAM file +Input BAM/SAM file Alignment file in BAM/SAM format. - Reference gene model +Reference gene model Gene Model in BED format. - Minimum mRNA length +Minimum mRNA length Minimum mRNA length (bp). mRNA that are shorter than this value will be skipped (default is 100). ## Outputs - Text +Text Table that includes the data used to generate the plots - R Script +R Script R script file that reads the data and generates the plot - PDF +PDF The final plot, in PDF format - Example plots: - .. image:: http://rseqc.sourceforge.net/_images/Aug_26.geneBodyCoverage.curves.png - :height: 600 px - :width: 600 px - :scale: 80 % +Example plots: +.. image:: $PATH_TO_IMAGES/Aug_26.geneBodyCoverage.curves.png +:height: 600 px +:width: 600 px +:scale: 80 % - .. image:: http://rseqc.sourceforge.net/_images/Aug_26.geneBodyCoverage.heatMap.png - :height: 600 px - :width: 600 px - :scale: 80 % - - ## About RSeQC +.. image:: $PATH_TO_IMAGES/Aug_26.geneBodyCoverage.heatMap.png +:height: 600 px +:width: 600 px +:scale: 80 % - The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation. - - The RSeQC package is licensed under the GNU GPL v3 license. +@ABOUT@ - .. image:: http://rseqc.sourceforge.net/_static/logo.png - - .. _RSeQC: http://rseqc.sourceforge.net/ ]]> </help>
--- a/geneBody_coverage2.xml Tue May 03 16:36:57 2016 -0400 +++ b/geneBody_coverage2.xml Tue Mar 14 10:23:21 2017 -0400 @@ -1,4 +1,4 @@ -<tool id="rseqc_geneBody_coverage2" name="Gene Body Converage (Bigwig)" version="2.4galaxy1"> +<tool id="rseqc_geneBody_coverage2" name="Gene Body Converage (Bigwig)" version="@WRAPPER_VERSION@"> <description> Read coverage over gene body </description> @@ -7,43 +7,38 @@ <import>rseqc_macros.xml</import> </macros> - <requirements> - <expand macro="requirement_package_r" /> - <expand macro="requirement_package_numpy" /> - <expand macro="requirement_package_rseqc" /> - </requirements> + <expand macro="requirements" /> <expand macro="stdio" /> <version_command><![CDATA[geneBody_coverage2.py --version]]></version_command> <command><![CDATA[ - geneBody_coverage2.py -i $input -r $refgene -o output + geneBody_coverage2.py -i '${input}' -r '${refgene}' -o output ]]> </command> <inputs> <param name="input" type="data" label="Input bigwig file" format="bigwig" /> - <param name="refgene" type="data" label="Reference Genome" format="bed" /> + <expand macro="refgene_param" /> </inputs> <outputs> - <data name="outputpdf" format="pdf" from_work_dir="output.geneBodyCoverage.pdf" label="${tool.name} on ${on_string} (PDF)" /> - <data name="outputr" format="txt" from_work_dir="output.geneBodyCoverage_plot.r" label="${tool.name} on ${on_string} (R Script)" /> - <data name="outputtxt" format="txt" from_work_dir="output.geneBodyCoverage.txt" label="${tool.name} on ${on_string} (Text)" /> + <expand macro="pdf_output_data" filename="output.geneBodyCoverage.pdf" /> + <data name="outputtxt" format="txt" from_work_dir="output.geneBodyCoverage.txt" label="${tool.name} on ${on_string} (text)" /> + <expand macro="rscript_output_data" filename="output.geneBodyCoverage_plot.r" /> </outputs> - <!-- Unable to succefully run this script, it seems deprecated and should probably be dropped <tests> <test> - <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bigwig"/> - <param name="refgene" value="hg19_RefSeq_chr1_1-100000.bed"/> - <output name="outputcurvespdf" file="output.geneBodyCoverage.curves.pdf"/> - <output name="outputr" file="output.geneBodyCoverage.r"/> - <output name="outputtxt" file="output.geneBodyCoverage.txt"/> + <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bigwig" /> + <param name="refgene" value="hg19_RefSeq_chr1_1-100000.bed" /> + <param name="rscript_output" value="true" /> + <output name="outputpdf" file="output.geneBodyCoverage2.curves.pdf" compare="sim_size" /> + <output name="outputr" file="output.geneBodyCoverage2.r" /> + <output name="outputtxt" file="output.geneBodyCoverage2.txt" /> </test> </tests> - --> <help><![CDATA[ geneBody_coverage2.py @@ -69,23 +64,12 @@ Read coverage over gene body. This module is used to check if reads coverage is uniform and if there is any 5’/3’ bias. This module scales all transcripts to 100 nt and calculates the number of reads covering each nucleotide position. Finally, it generates a plot illustrating the coverage profile along the gene body. NOTE: this module requires lots of memory for large BAM files, because it load the entire BAM file into memory. We add another script "geneBody_coverage2.py" into v2.3.1 which takes bigwig (instead of BAM) as input. It only use 200M RAM, but users need to convert BAM into WIG, and then WIG into BigWig. Example output: - .. image:: http://dldcc-web.brc.bcm.edu/lilab/liguow/RSeQC/figure/geneBody_coverage.png + .. image:: $PATH_TO_IMAGES/geneBody_coverage.png :height: 600 px :width: 600 px :scale: 80 % ------ - -About RSeQC -+++++++++++ - -The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation. - -The RSeQC package is licensed under the GNU GPL v3 license. - -.. image:: http://rseqc.sourceforge.net/_static/logo.png - -.. _RSeQC: http://rseqc.sourceforge.net/ +@ABOUT@ ]]> </help>
--- a/infer_experiment.xml Tue May 03 16:36:57 2016 -0400 +++ b/infer_experiment.xml Tue Mar 14 10:23:21 2017 -0400 @@ -1,32 +1,29 @@ -<tool id="rseqc_infer_experiment" name="Infer Experiment" version="2.4galaxy1"> +<tool id="rseqc_infer_experiment" name="Infer Experiment" version="@WRAPPER_VERSION@"> <description>speculates how RNA-seq were configured</description> <macros> <import>rseqc_macros.xml</import> </macros> - <requirements> - <expand macro="requirement_package_numpy" /> - <expand macro="requirement_package_rseqc" /> - </requirements> + <expand macro="requirements" /> <expand macro="stdio" /> <version_command><![CDATA[infer_experiment.py --version]]></version_command> <command><![CDATA[ - infer_experiment.py -i $input -r $refgene - --sample-size $sample_size - --mapq $mapq - > $output + infer_experiment.py -i '${input}' -r '${refgene}' + --sample-size ${sample_size} + --mapq ${mapq} + > '${output}' ]]> </command> <inputs> - <param name="input" type="data" format="bam,sam" label="Input BAM/SAM file" help="(--input-file)"/> - <param name="refgene" type="data" format="bed" label="Reference gene model in bed format" help="(--refgene)" /> - <param name="sample_size" type="integer" label="Number of reads sampled from SAM/BAM file (default = 200000)" value="200000" help="(--sample-size)"/> - <param name="mapq" type="integer" label="Minimum mapping quality (default=30)" help="Minimum phred scale mapping quality to consider a read 'uniquely mapped' (--mapq)" value="30" /> + <expand macro="bam_param" /> + <expand macro="refgene_param" /> + <expand macro="sample_size_param" /> + <expand macro="mapq_param" /> </inputs> <outputs> @@ -134,19 +131,7 @@ *Conclusion*: This is single-end, strand specific RNA-seq data. Strandness of reads are concordant with strandness of reference gene. - ------ - -About RSeQC -+++++++++++ - -The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation. - -The RSeQC package is licensed under the GNU GPL v3 license. - -.. image:: http://rseqc.sourceforge.net/_static/logo.png - -.. _RSeQC: http://rseqc.sourceforge.net/ +@ABOUT@ ]]> </help>
--- a/inner_distance.xml Tue May 03 16:36:57 2016 -0400 +++ b/inner_distance.xml Tue Mar 14 10:23:21 2017 -0400 @@ -1,55 +1,53 @@ -<tool id="rseqc_inner_distance" name="Inner Distance" version="2.4galaxy1"> +<tool id="rseqc_inner_distance" name="Inner Distance" version="@WRAPPER_VERSION@"> <description>calculate the inner distance (or insert size) between two paired RNA reads</description> <macros> <import>rseqc_macros.xml</import> </macros> - <requirements> - <expand macro="requirement_package_r" /> - <expand macro="requirement_package_numpy" /> - <expand macro="requirement_package_rseqc" /> - </requirements> + <expand macro="requirements" /> <expand macro="stdio" /> <version_command><![CDATA[inner_distance.py --version]]></version_command> <command><![CDATA[ - inner_distance.py -i $input -o output -r $refgene - --sample-size $sample_size - --lower-bound $lowerBound - --upper-bound $upperBound - --step $step - --mapq $mapq + inner_distance.py -i '${input}' -o output -r '${refgene}' + --sample-size ${sample_size} + --lower-bound ${lowerBound} + --upper-bound ${upperBound} + --step ${step} + --mapq ${mapq} ]]> </command> <inputs> - <param name="input" type="data" format="bam,sam" label="input bam/sam file" help="(--input-file)" /> - <param name="refgene" type="data" format="bed" label="reference gene model" help="(--refgene)" /> - <param name="sample_size" type="integer" label="Number of read-pairs used to estimate inner distance (default = 1000000)" value="1000000" help="(--sample-size)"/> + <expand macro="bam_sam_param" /> + <expand macro="refgene_param" /> + <expand macro="sample_size_param" /> <param name="lowerBound" type="integer" value="-250" label="Lower bound (bp, default=-250)" help="Used for plotting histogram (--lower-bound)"/> <param name="upperBound" type="integer" value="250" label="Upper bound (bp, default=250)" help="Used for plotting histogram (--upper-bound)"/> <param name="step" type="integer" value="5" label="Step size of histogram (bp, default=5)" help="(--step)"/> - <param name="mapq" type="integer" label="Minimum mapping quality (default=30)" help="Minimum phred scale mapping quality to consider a read 'uniquely mapped' (--mapq)" value="30" /> + <expand macro="mapq_param" /> + <expand macro="rscript_output_param" /> </inputs> <outputs> - <data format="txt" name="outputtxt" from_work_dir="output.inner_distance.txt" label="${tool.name} on ${on_string} (Text)"/> - <data format="txt" name="outputfreqtxt" from_work_dir="output.inner_distance_freq.txt" label="${tool.name} on ${on_string} (Freq Text)" /> - <data format="pdf" name="outputpdf" from_work_dir="output.inner_distance_plot.pdf" label="${tool.name} on ${on_string} (PDF)" /> - <data format="txt" name="outputr" from_work_dir="output.inner_distance_plot.r" label="${tool.name} on ${on_string} (R Script)" /> + <expand macro="pdf_output_data" filename="output.inner_distance_plot.pdf" /> + <data format="txt" name="outputtxt" from_work_dir="output.inner_distance.txt" label="${tool.name} on ${on_string} (text)"/> + <data format="txt" name="outputfreqtxt" from_work_dir="output.inner_distance_freq.txt" label="${tool.name} on ${on_string} (frequency text)" /> + <expand macro="rscript_output_data" filename="output.inner_distance_plot.r" /> </outputs> <tests> <test> <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam"/> <param name="refgene" value="hg19_RefSeq_chr1_1-100000.bed"/> - <output name="outputtxt" file="output.inner_distance.txt"/> - <output name="outputfreqtxt" file="output.inner_distance_freq.txt"/> - <output name="outputpdf" file="output.inner_distance_plot.pdf"/> - <output name="outputr" file="output.inner_distance_plot.r"/> + <param name="rscript_output" value="true" /> + <output name="outputtxt" file="output.inner_distance.txt" /> + <output name="outputfreqtxt" file="output.inner_distance_freq.txt" /> + <output name="outputpdf" file="output.inner_distance_plot.pdf" compare="sim_size"/> + <output name="outputr" file="output.inner_distance_plot.r" /> </test> </tests> @@ -100,24 +98,12 @@ 3. output.inner_distance_plot.r: R script to generate histogram 4. output.inner_distance_plot.pdf: histogram plot -.. image:: http://rseqc.sourceforge.net/_images/inner_distance.png +.. image:: $PATH_TO_IMAGES/inner_distance.png :height: 600 px :width: 600 px :scale: 80 % - ------ - -About RSeQC -+++++++++++ - -The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation. - -The RSeQC package is licensed under the GNU GPL v3 license. - -.. image:: http://rseqc.sourceforge.net/_static/logo.png - -.. _RSeQC: http://rseqc.sourceforge.net/ +@ABOUT@ ]]> </help>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/insertion_profile.xml Tue Mar 14 10:23:21 2017 -0400 @@ -0,0 +1,88 @@ +<tool id="rseqc_insertion_profile" name="Insertion Profile" version="@WRAPPER_VERSION@"> + <description> + calculates the distribution of inserted nucleotides across reads + </description> + + <macros> + <import>rseqc_macros.xml</import> + </macros> + + <expand macro="requirements" /> + + <expand macro="stdio" /> + + <version_command><![CDATA[insertion_profile.py --version]]></version_command> + + <command><![CDATA[ + insertion_profile.py -i '${input}' -o output -q ${mapq} -s "${layout}" + ]]> + </command> + + <inputs> + <expand macro="bam_param" /> + <expand macro="mapq_param" /> + <expand macro="layout_param" /> + <expand macro="rscript_output_param" /> + </inputs> + + <outputs> + <expand macro="pdf_output_data" filename="output.insertion_profile.pdf" /> + <expand macro="xls_output_data" filename="output.insertion_profile.xls" /> + <expand macro="rscript_output_data" filename="output.insertion_profile.r" /> + </outputs> + + <tests> + <test> + <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam" /> + <param name="rscript_output" value="true" /> + <output name="outputpdf" file="output.insertion_profile.pdf" compare="sim_size" /> + <output name="outputxls" file="output.insertion_profile.xls" /> + <output name="outputr" file="output.insertion_profile.r" /> + </test> + </tests> + + <help><![CDATA[ +insertion_profile.py +++++++++++++++++++++ + +Calculate the distributions of inserted nucleotides across reads. Note that to +use this funciton, CIGAR strings within SAM/BAM file should have ‘I’ operation. + +Inputs +++++++ + +Input BAM/SAM file + Alignment file in BAM/SAM format. + +Minimum mapping quality + Minimum mapping quality for an alignment to be considered as "uniquely + mapped". default=30 + +Sequencing layout + Denotes whether the sequecing was single-end (SE) or paired-end (PE). + +Sample Output +++++++++++++++ + +Read-1 insertion profile: + +.. image:: $PATH_TO_IMAGES/out.insertion_profile.R1.png + :height: 600 px + :width: 600 px + :scale: 80 % + +Read-2 insertion profile: + +.. image:: $PATH_TO_IMAGES/out.insertion_profile.R2.png + :height: 600 px + :width: 600 px + :scale: 80 % + +@ABOUT@ + +]]> + </help> + + <expand macro="citations" /> + +</tool>
--- a/junction_annotation.xml Tue May 03 16:36:57 2016 -0400 +++ b/junction_annotation.xml Tue Mar 14 10:23:21 2017 -0400 @@ -1,15 +1,11 @@ -<tool id="rseqc_junction_annotation" name="Junction Annotation" version="2.4galaxy1"> +<tool id="rseqc_junction_annotation" name="Junction Annotation" version="@WRAPPER_VERSION@"> <description>compares detected splice junctions to reference gene model</description> <macros> <import>rseqc_macros.xml</import> </macros> - <requirements> - <expand macro="requirement_package_r" /> - <expand macro="requirement_package_numpy" /> - <expand macro="requirement_package_rseqc" /> - </requirements> + <expand macro="requirements" /> <expand macro="stdio" /> @@ -17,36 +13,38 @@ <command><![CDATA[ junction_annotation.py - --input-file $input - --refgene $refgene + --input-file '${input}' + --refgene '${refgene}' --out-prefix output - --min-intron $min_intron - --mapq $mapq + --min-intron ${min_intron} + --mapq ${mapq} ]]> </command> <inputs> - <param name="input" type="data" format="bam,sam" label="input bam/sam file" help="(--input-file)"/> - <param name="refgene" type="data" format="bed" label="reference gene model" help="(--refgene)"/> - <param name="min_intron" type="integer" value="50" label="Minimum intron length (bp, default=50)" help="(--min-intron)" /> - <param name="mapq" type="integer" label="Minimum mapping quality (default=30)" help="Minimum phred scale mapping quality to consider a read 'uniquely mapped' (--mapq)" value="30" /> + <expand macro="bam_sam_param" /> + <expand macro="refgene_param" /> + <expand macro="min_intron_param" /> + <expand macro="mapq_param" /> + <expand macro="rscript_output_param" /> </inputs> <outputs> - <data format="xls" name="outputxls" from_work_dir="output.junction.xls" label="${tool.name} on ${on_string} (XLS)"/> - <data format="txt" name="outputr" from_work_dir="output.junction_plot.r" label="${tool.name} on ${on_string} (R Script)" /> - <data format="pdf" name="outputpdf" from_work_dir="output.splice_events.pdf" label="${tool.name} on ${on_string} (Splice Events PDF)"/> - <data format="pdf" name="outputjpdf" from_work_dir="output.splice_junction.pdf" label="${tool.name} on ${on_string} (Splice Junction PDF)" /> + <data format="pdf" name="outputpdf" from_work_dir="output.splice_events.pdf" label="${tool.name} on ${on_string} (Splice Events pdf)"/> + <data format="pdf" name="outputjpdf" from_work_dir="output.splice_junction.pdf" label="${tool.name} on ${on_string} (Splice Junction pdf)" /> + <expand macro="xls_output_data" filename="output.junction.xls" /> + <expand macro="rscript_output_data" filename="output.junction_plot.r" /> </outputs> <tests> <test> - <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam"/> - <param name="refgene" value="hg19_RefSeq_chr1_1-100000.bed"/> - <output name="outputxls" file="output.junction.xls"/> - <output name="outputr" file="output.junction_plot.r"/> - <output name="outputpdf" file="output.splice_events.pdf"/> - <output name="outputjpdf" file="output.splice_junction.pdf"/> + <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam" /> + <param name="refgene" value="hg19_RefSeq_chr1_1-100000.bed" /> + <param name="rscript_output" value="true" /> + <output name="outputxls" file="output.junction.xls" /> + <output name="outputr" file="output.junction_plot.r" /> + <output name="outputpdf" file="output.splice_events.pdf" compare="sim_size" /> + <output name="outputjpdf" file="output.splice_junction.pdf" compare="sim_size" /> </test> </tests> @@ -94,23 +92,13 @@ 3. output.splice_junction.pdf: plot of splice junctions 4. output.splice_events.pdf: plot of splice events -.. image:: http://rseqc.sourceforge.net/_images/junction.png +.. image:: $PATH_TO_IMAGES/junction.png :height: 400 px :width: 850 px :scale: 80 % ------ - -About RSeQC -+++++++++++ +@ABOUT@ -The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation. - -The RSeQC package is licensed under the GNU GPL v3 license. - -.. image:: http://rseqc.sourceforge.net/_static/logo.png - -.. _RSeQC: http://rseqc.sourceforge.net/ ]]> </help>
--- a/junction_saturation.xml Tue May 03 16:36:57 2016 -0400 +++ b/junction_saturation.xml Tue Mar 14 10:23:21 2017 -0400 @@ -1,15 +1,11 @@ -<tool id="rseqc_junction_saturation" name="Junction Saturation" version="2.4galaxy1"> +<tool id="rseqc_junction_saturation" name="Junction Saturation" version="@WRAPPER_VERSION@"> <description>detects splice junctions from each subset and compares them to reference gene model</description> <macros> <import>rseqc_macros.xml</import> </macros> - <requirements> - <expand macro="requirement_package_r" /> - <expand macro="requirement_package_numpy" /> - <expand macro="requirement_package_rseqc" /> - </requirements> + <expand macro="requirements" /> <expand macro="stdio" /> @@ -17,29 +13,32 @@ <command><![CDATA[ junction_saturation.py - --input-file $input - --refgene $refgene + --input-file '${input}' + --refgene '${refgene}' --out-prefix output - --min-intron $min_intron - --min-coverage $min_coverage - --mapq $mapq - #if $percentiles.specifyPercentiles - --percentile-floor $percentiles.lowBound - --percentile-ceiling $percentiles.upBound - --percentile-step $percentiles.percentileStep + --min-intron ${min_intron} + --min-coverage ${min_coverage} + --mapq ${mapq} + #if str($percentiles_type.percentiles_type_selector) == "specify": + --percentile-floor ${percentiles_type.lowBound} + --percentile-ceiling ${percentiles_type.upBound} + --percentile-step ${percentiles_type.percentileStep} #end if ]]> </command> <inputs> - <param name="input" type="data" format="bam,sam" label="input bam/sam file" help="(--input-file)"/> - <param name="refgene" type="data" format="bed" label="reference gene model" help="(--refgene)"/> - <param name="min_intron" type="integer" value="50" label="Minimum intron length (bp, default=50)" help="(--min-intron)" /> + <expand macro="bam_sam_param" /> + <expand macro="refgene_param" /> + <expand macro="min_intron_param" /> <param name="min_coverage" type="integer" label="Minimum number of supporting reads to call a junction (default=1)" value="1" help="(--min-coverage)" /> - <param name="mapq" type="integer" label="Minimum mapping quality (default=30)" help="Minimum phred scale mapping quality to consider a read 'uniquely mapped' (--mapq)" value="30" /> - <conditional name="percentiles"> - <param name="specifyPercentiles" type="boolean" label="Specify sampling bounds and frequency" value="false"/> - <when value="true"> + <expand macro="mapq_param" /> + <conditional name="percentiles_type"> + <param name="percentiles_type_selector" type="select" label="Sampling bounds and frequency"> + <option value="default" selected="true">Default sampling bounds and frequency</option> + <option value="specify">Specify sampling bounds and frequency</option> + </param> + <when value="specify"> <param name="lowBound" type="integer" value="5" label="Lower Bound Sampling Frequency (bp, default=5)" help="(--percentile-floor)"> <validator type="in_range" min="0" max="100" /> </param> @@ -50,20 +49,28 @@ <validator type="in_range" min="0" max="100" /> </param> </when> - <when value="false"/> + <when value="default"/> </conditional> + <expand macro="rscript_output_param" /> </inputs> <outputs> - <data format="txt" name="outputr" from_work_dir="output.junctionSaturation_plot.r" label="${tool.name} on ${on_string} (R Script)"/> - <data format="pdf" name="outputpdf" from_work_dir="output.junctionSaturation_plot.pdf" label="${tool.name} on ${on_string} (PDF)"/> + <expand macro="pdf_output_data" filename="output.junctionSaturation_plot.pdf" /> + <expand macro="rscript_output_data" filename="output.junctionSaturation_plot.r" /> </outputs> <tests> <test> - <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam"/> - <param name="refgene" value="hg19_RefSeq_chr1_1-100000.bed"/> - <output name="outputr" file="output.junctionSaturation_plot.r"/> + <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam" /> + <param name="refgene" value="hg19_RefSeq_chr1_1-100000.bed" /> + <param name="rscript_output" value="true" /> + <output name="outputr" file="output.junctionSaturation_plot.r" compare="sim_size"> + <assert_contents> + <has_line line="pdf('output.junctionSaturation_plot.pdf')" /> + <has_line line="x=c(5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100)" /> + </assert_contents> + </output> + <output name="outputpdf" file="output.junctionSaturation_plot.pdf" compare="sim_size" /> </test> </tests> @@ -105,26 +112,15 @@ 1. output.junctionSaturation_plot.r: R script to generate plot 2. output.junctionSaturation_plot.pdf -.. image:: http://rseqc.sourceforge.net/_images/junction_saturation.png +.. image:: $PATH_TO_IMAGES/junction_saturation.png :height: 600 px :width: 600 px :scale: 80 % In this example, current sequencing depth is almost saturated for "known junction" (red line) detection because the number of "known junction" reaches a plateau. In other words, nearly all "known junctions" (expressed in this particular tissue) have already been detected, and continue sequencing will not detect additional "known junction" and will only increase junction coverage (i.e. junction covered by more reads). While current sequencing depth is not saturated for novel junctions (green). - ------ - -About RSeQC -+++++++++++ +@ABOUT@ -The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation. - -The RSeQC package is licensed under the GNU GPL v3 license. - -.. image:: http://rseqc.sourceforge.net/_static/logo.png - -.. _RSeQC: http://rseqc.sourceforge.net/ ]]> </help>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mismatch_profile.xml Tue Mar 14 10:23:21 2017 -0400 @@ -0,0 +1,89 @@ +<tool id="rseqc_mismatch_profile" name="Mismatch Profile" version="@WRAPPER_VERSION@"> + <description> + calculates the distribution of mismatches across reads + </description> + + <macros> + <import>rseqc_macros.xml</import> + </macros> + + <expand macro="requirements" /> + + <expand macro="stdio" /> + + <version_command><![CDATA[mismatch_profile.py --version]]></version_command> + + <command><![CDATA[ + mismatch_profile.py -i '${input}' -o output -l ${readlength} -n ${readnum} -q ${mapq} + ]]> + </command> + + <inputs> + <expand macro="bam_param" /> + <expand macro="readlength_param" /> + <expand macro="readnum_param" /> + <expand macro="mapq_param" /> + <expand macro="rscript_output_param" /> + </inputs> + + <outputs> + <expand macro="pdf_output_data" filename="output.mismatch_profile.pdf" /> + <expand macro="xls_output_data" filename="output.mismatch_profile.xls" /> + <expand macro="rscript_output_data" filename="output.mismatch_profile.r" /> + </outputs> + + <tests> + <test> + <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam"/> + <param name="readlength" value="101" /> + <param name="rscript_output" value="true" /> + <output name="outputpdf" file="output.mismatch_profile.pdf" compare="sim_size" /> + <output name="outputxls" file="output.mismatch_profile.xls"/> + <output name="outputr" file="output.mismatch_profile.r"/> + </test> + </tests> + + <help><![CDATA[ +mismatch_profile.py ++++++++++++++++++++ + +Calculate the distribution of mismatches across reads. + +Note that the “MD” tag must exist in BAM file. + +Inputs +++++++ + +Input BAM/SAM file + Alignment file in BAM/SAM format. + +Alignment length of read + It is usually set to the orignial read length. For example, all these cigar + strings ("101M", "68M140N33M", "53M1D48M") suggest the read alignment + length is 101. [required] + +Number of aligned reads used + Number of aligned reads with deletions used to calculate the deletion + profile. default=1000000 + +Minimum mapping quality + Minimum mapping quality for an alignment to be considered as "uniquely + mapped". default=30 + +Sample Output +++++++++++++++ + +.. image:: $PATH_TO_IMAGES/mismatch_profile.png + :height: 600 px + :width: 600 px + :scale: 80 % + +@ABOUT@ + +]]> + + </help> + + <expand macro="citations" /> + +</tool>
--- a/read_GC.xml Tue May 03 16:36:57 2016 -0400 +++ b/read_GC.xml Tue Mar 14 10:23:21 2017 -0400 @@ -1,15 +1,11 @@ -<tool id="rseqc_read_GC" name="Read GC" version="2.4galaxy1"> +<tool id="rseqc_read_GC" name="Read GC" version="@WRAPPER_VERSION@"> <description>determines GC% and read count</description> <macros> <import>rseqc_macros.xml</import> </macros> - <requirements> - <expand macro="requirement_package_r" /> - <expand macro="requirement_package_numpy" /> - <expand macro="requirement_package_rseqc" /> - </requirements> + <expand macro="requirements" /> <expand macro="stdio" /> @@ -17,28 +13,31 @@ <command><![CDATA[ read_GC.py - --input-file $input + --input-file '${input}' --out-prefix output - --mapq $mapq + --mapq ${mapq} ]]> </command> <inputs> - <param name="input" type="data" format="bam,sam" label="input bam/sam file" help="(--input-file)"/> - <param name="mapq" type="integer" label="Minimum mapping quality (default=30)" help="Minimum phred scale mapping quality to consider a read 'uniquely mapped' (--mapq)" value="30" /> + <expand macro="bam_sam_param" /> + <expand macro="mapq_param" /> + <expand macro="rscript_output_param" /> </inputs> <outputs> - <data format="xls" name="outputxls" from_work_dir="output.GC.xls" label="${tool.name} on ${on_string} (XLS)"/> - <data format="txt" name="outputr" from_work_dir="output.GC_plot.r" label="${tool.name} on ${on_string} (R Script)" /> - <data format="pdf" name="outputpdf" from_work_dir="output.GC_plot.pdf" label="${tool.name} on ${on_string} (PDF)" /> + <expand macro="pdf_output_data" filename="output.GC_plot.pdf" /> + <expand macro="xls_output_data" filename="output.GC.xls" /> + <expand macro="rscript_output_data" filename="output.GC_plot.r" /> </outputs> <tests> <test> - <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam"/> - <output name="outputxls" file="output.GC.xls"/> - <output name="outputr" file="output.GC_plot.r"/> + <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam" /> + <param name="rscript_output" value="true" /> + <output name="outputxls" file="output.GC.xls" /> + <output name="outputr" file="output.GC_plot.r" /> + <output name="outputpdf" file="output.GC_plot.pdf" compare="sim_size" /> </test> </tests> @@ -60,23 +59,13 @@ 2. output.GC_plot.r: R script to generate pdf file. 3. output.GC_plot.pdf: graphical output generated from R script. -.. image:: http://rseqc.sourceforge.net/_images/read_gc.png +.. image:: $PATH_TO_IMAGES/read_gc.png :height: 600 px :width: 600 px :scale: 80 % ------ - -About RSeQC -+++++++++++ +@ABOUT@ -The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation. - -The RSeQC package is licensed under the GNU GPL v3 license. - -.. image:: http://rseqc.sourceforge.net/_static/logo.png - -.. _RSeQC: http://rseqc.sourceforge.net/ ]]> </help>
--- a/read_NVC.xml Tue May 03 16:36:57 2016 -0400 +++ b/read_NVC.xml Tue Mar 14 10:23:21 2017 -0400 @@ -1,45 +1,45 @@ -<tool id="rseqc_read_NVC" name="Read NVC" version="2.4galaxy1"> +<tool id="rseqc_read_NVC" name="Read NVC" version="@WRAPPER_VERSION@"> <description>to check the nucleotide composition bias</description> <macros> <import>rseqc_macros.xml</import> </macros> - <requirements> - <expand macro="requirement_package_r" /> - <expand macro="requirement_package_numpy" /> - <expand macro="requirement_package_rseqc" /> - </requirements> + <expand macro="requirements" /> <expand macro="stdio" /> <version_command><![CDATA[read_NVC.py --version]]></version_command> - <command> + <command><![CDATA[ read_NVC.py - --input-file $input + --input-file '${input}' --out-prefix output - $nx - --mapq $mapq + ${nx} + --mapq ${mapq} + ]]> </command> <inputs> - <param name="input" type="data" format="bam,sam" label="input bam/sam file" help="(--input-file)"/> + <expand macro="bam_sam_param" /> <param name="nx" type="boolean" value="false" truevalue="--nx" falsevalue="" label="Include N,X in NVC plot" help="(--nx)"/> - <param name="mapq" type="integer" label="Minimum mapping quality (default=30)" help="Minimum phred scale mapping quality to consider a read 'uniquely mapped' (--mapq)" value="30" /> + <expand macro="mapq_param" /> + <expand macro="rscript_output_param" /> </inputs> <outputs> - <data format="xls" name="outputxls" from_work_dir="output.NVC.xls" label="${tool.name} on ${on_string} (XLS)" /> - <data format="txt" name="outputr" from_work_dir="output.NVC_plot.r" label="${tool.name} on ${on_string} (R Script)" /> - <data format="pdf" name="outputpdf" from_work_dir="output.NVC_plot.pdf" label="${tool.name} on ${on_string} (PDF)" /> + <expand macro="pdf_output_data" filename="output.NVC_plot.pdf" /> + <expand macro="xls_output_data" filename="output.NVC.xls" /> + <expand macro="rscript_output_data" filename="output.NVC_plot.r" /> </outputs> <tests> <test> - <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam"/> - <output name="outputxls" file="output.NVC.xls"/> - <output name="outputr" file="output.NVC_plot.r"/> + <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam" /> + <param name="rscript_output" value="true" /> + <output name="outputxls" file="output.NVC.xls" /> + <output name="outputr" file="output.NVC_plot.r" /> + <output name="outputpdf" file="output.NVC_plot.pdf" compare="sim_size" /> </test> </tests> @@ -76,23 +76,13 @@ 3. output.NVC_plot.pdf: NVC plot. -.. image:: http://rseqc.sourceforge.net/_images/NVC_plot.png +.. image:: $PATH_TO_IMAGES/NVC_plot.png :height: 600 px :width: 600 px :scale: 80 % ------ - -About RSeQC -+++++++++++ +@ABOUT@ -The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation. - -The RSeQC package is licensed under the GNU GPL v3 license. - -.. image:: http://rseqc.sourceforge.net/_static/logo.png - -.. _RSeQC: http://rseqc.sourceforge.net/ ]]> </help>
--- a/read_distribution.xml Tue May 03 16:36:57 2016 -0400 +++ b/read_distribution.xml Tue Mar 14 10:23:21 2017 -0400 @@ -1,27 +1,24 @@ -<tool id="rseqc_read_distribution" name="Read Distribution" version="2.4galaxy1"> +<tool id="rseqc_read_distribution" name="Read Distribution" version="@WRAPPER_VERSION@"> <description>calculates how mapped reads were distributed over genome feature</description> <macros> <import>rseqc_macros.xml</import> </macros> - <requirements> - <expand macro="requirement_package_numpy" /> - <expand macro="requirement_package_rseqc" /> - </requirements> + <expand macro="requirements" /> <expand macro="stdio" /> <version_command><![CDATA[read_distribution.py --version]]></version_command> <command><![CDATA[ - read_distribution.py -i $input -r $refgene > $output + read_distribution.py -i '${input}' -r '${refgene}' > '${output}' ]]> </command> <inputs> - <param name="input" type="data" format="bam,sam" label="input bam/sam file" help="(--input-file)"/> - <param name="refgene" type="data" format="bed" label="reference gene model" help="(--refgene)"/> + <expand macro="bam_sam_param" /> + <expand macro="refgene_param" /> </inputs> <outputs> @@ -89,18 +86,8 @@ TES_down_10kb 140361190 896882 6.39 =============== ============ =========== =========== ------ - -About RSeQC -+++++++++++ +@ABOUT@ -The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation. - -The RSeQC package is licensed under the GNU GPL v3 license. - -.. image:: http://rseqc.sourceforge.net/_static/logo.png - -.. _RSeQC: http://rseqc.sourceforge.net/ ]]> </help>
--- a/read_duplication.xml Tue May 03 16:36:57 2016 -0400 +++ b/read_duplication.xml Tue Mar 14 10:23:21 2017 -0400 @@ -1,43 +1,43 @@ -<tool id="rseqc_read_duplication" name="Read Duplication" version="2.4galaxy1"> +<tool id="rseqc_read_duplication" name="Read Duplication" version="@WRAPPER_VERSION@"> <description>determines reads duplication rate with sequence-based and mapping-based strategies</description> <macros> <import>rseqc_macros.xml</import> </macros> - <requirements> - <expand macro="requirement_package_r" /> - <expand macro="requirement_package_numpy" /> - <expand macro="requirement_package_rseqc" /> - </requirements> + <expand macro="requirements" /> <expand macro="stdio" /> <version_command><![CDATA[read_duplication.py --version]]></version_command> <command><![CDATA[ - read_duplication.py -i $input -o output -u $upLimit + read_duplication.py -i '${input}' -o output -u ${upLimit} -q ${mapq} ]]> </command> <inputs> - <param name="input" type="data" format="bam,sam" label="input bam/sam file" help="(--input-file)"/> + <expand macro="bam_sam_param" /> <param name="upLimit" type="integer" label="Upper Limit of Plotted Duplicated Times (default=500)" value="500" help="(--up-limit)"/> + <expand macro="mapq_param" /> + <expand macro="rscript_output_param" /> </inputs> <outputs> - <data format="xls" name="outputxls" from_work_dir="output.pos.DupRate.xls" label="${tool.name} on ${on_string} (Position XLS)"/> - <data format="xls" name="outputseqxls" from_work_dir="output.seq.DupRate.xls" label="${tool.name} on ${on_string} (Sequence XLS)"/> - <data format="txt" name="outputr" from_work_dir="output.DupRate_plot.r" label="${tool.name} on ${on_string} (R Script)" /> - <data format="pdf" name="outputpdf" from_work_dir="output.DupRate_plot.pdf" label="${tool.name} on ${on_string} (PDF)" /> + <expand macro="pdf_output_data" filename="output.DupRate_plot.pdf" /> + <data format="xls" name="outputxls" from_work_dir="output.pos.DupRate.xls" label="${tool.name} on ${on_string} (Position xls)"/> + <data format="xls" name="outputseqxls" from_work_dir="output.seq.DupRate.xls" label="${tool.name} on ${on_string} (Sequence xls)"/> + <expand macro="rscript_output_data" filename="output.DupRate_plot.r" /> </outputs> <tests> <test> - <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam"/> - <output name="outputxls" file="output.pos.DupRate.xls"/> - <output name="outputseqxls" file="output.seq.DupRate.xls"/> - <output name="outputr" file="output.DupRate_plot.r"/> + <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam" /> + <param name="rscript_output" value="true" /> + <output name="outputxls" file="output.pos.DupRate.xls" /> + <output name="outputseqxls" file="output.seq.DupRate.xls" /> + <output name="outputr" file="output.DupRate_plot.r" /> + <output name="outputpdf" file="output.DupRate_plot.pdf" compare="sim_size" /> </test> </tests> @@ -67,23 +67,13 @@ 3. output.DupRate_plot.r: R script to generate pdf file 4. output.DupRate_plot.pdf: graphical output generated from R script -.. image:: http://rseqc.sourceforge.net/_images/duplicate.png +.. image:: $PATH_TO_IMAGES/duplicate.png :height: 600 px :width: 600 px :scale: 80 % ------ - -About RSeQC -+++++++++++ +@ABOUT@ -The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation. - -The RSeQC package is licensed under the GNU GPL v3 license. - -.. image:: http://rseqc.sourceforge.net/_static/logo.png - -.. _RSeQC: http://rseqc.sourceforge.net/ ]]> </help>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/read_hexamer.xml Tue Mar 14 10:23:21 2017 -0400 @@ -0,0 +1,137 @@ +<tool id="rseqc_read_hexamer" name="Hexamer frequency" version="@WRAPPER_VERSION@"> + <description> + calculates hexamer (6mer) frequency for reads, genomes, and mRNA sequences + </description> + + <macros> + <import>rseqc_macros.xml</import> + </macros> + + <expand macro="requirements" /> + + <expand macro="stdio" /> + + <version_command><![CDATA[read_hexamer.py --version]]></version_command> + + <command><![CDATA[ + #import re + #set $input_list = [] + #for $i, $input in enumerate($inputs): + + #set $safename = re.sub('[^\w\-_]', '_', $input.element_identifier) + #if $safename in $input_list: + #set $safename = str($safename) + "." + str($i) + #end if + $input_list.append($safename) + + #if $input.is_of_type("fastq.gz", "fastqsanger.gz"): + gunzip -c '${input}' > "${safename}" && + #else: + ln -sf '${input}' "${safename}" && + #end if + #end for + read_hexamer.py -i '${ ','.join( [ $name for $name in $input_list ] ) }' + #if $refgenome: + -r '${refgenome}' + #end if + #if $refgene: + -g '${refgene}' + #end if + > '${output}' + ]]> + </command> + + <inputs> + <param name="inputs" type="data" label="Read sequences in fasta or fastq format" format="fasta,fastq,fastqsanger,fastq.gz,fastqsanger.gz" help="(--input)" multiple="true" /> + <param name="refgenome" type="data" label="Reference genome seqeunce (fasta)" format="fasta" optional="true" help="(--refgenome)" /> + <param name="refgene" type="data" label="Reference mRNA sequence (fasta)" format="fasta" optional="true" help="(--refgene)" /> + </inputs> + + <outputs> + <data name="output" format="tabular" label="${tool.name} on ${on_string}" /> + </outputs> + + <tests> + <test> + <param name="inputs" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.R1.fastq"/> + <output name="output"> + <assert_contents> + <has_line line="Hexamer	pairend_strandspecific_51mer_hg19_chr1_1-100000_R1_fastq" /> + <has_line line="AAAAAA	0.00217391304348" /> + </assert_contents> + </output> + </test> + <test> + <param name="inputs" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.R1.fastq.gz" ftype="fastqsanger.gz"/> + <output name="output"> + <assert_contents> + <has_line line="Hexamer	pairend_strandspecific_51mer_hg19_chr1_1-100000_R1_fastq_gz" /> + <has_line line="AAAAAA	0.00217391304348" /> + </assert_contents> + </output> + </test> + <test> + <param name="inputs" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.R1.fastq,pairend_strandspecific_51mer_hg19_chr1_1-100000.R2.fastq"/> + <output name="output"> + <assert_contents> + <has_line line="Hexamer	pairend_strandspecific_51mer_hg19_chr1_1-100000_R1_fastq	pairend_strandspecific_51mer_hg19_chr1_1-100000_R2_fastq" /> + <has_line line="AAAAAA	0.00217391304348	0.00534759358289" /> + </assert_contents> + </output> + </test> + <test> + <param name="inputs" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.R1.fastq,pairend_strandspecific_51mer_hg19_chr1_1-100000.R1.fastq"/> + <output name="output"> + <assert_contents> + <has_line line="Hexamer	pairend_strandspecific_51mer_hg19_chr1_1-100000_R1_fastq	pairend_strandspecific_51mer_hg19_chr1_1-100000_R1_fastq.1" /> + <has_line line="AAAAAA	0.00217391304348	0.00217391304348" /> + </assert_contents> + </output> + </test> + <!-- Unable to test with collections at the moment (requires type="data_collection" on the input) + <test> + <param name="inputs"> + <collection type="list"> + <element name="read_1" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.R1.fastq" /> + <element name="read_2" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.R2.fastq" /> + </collection> + </param> + <output name="output" file="output.read_hexamer.2.txt" /> + </test> + --> + </tests> + + <help><![CDATA[ +read_hexamer.py ++++++++++++++++++++++ + +Calculate hexamer (6mer) frequency. If ‘-r’ was specified, hexamer frequency +is also calculated for the reference genome. If ‘-g’ was provided, hexamer +frequency is also calculated for the mRNA sequences. + +Inputs +++++++++++++++ + +Input reads file + Read sequences in fasta or fastq format. + +Reference Genome + Reference genome sequence in fasta format. + +Reference Gene + Reference mRNA sequences in fasta format. + + +Outputs +++++++++++++++ + +Tabular file of hexamer frequences in for each input. + +@ABOUT@ + +]]> + </help> + + <expand macro="citations" /> + +</tool>
--- a/read_quality.xml Tue May 03 16:36:57 2016 -0400 +++ b/read_quality.xml Tue Mar 14 10:23:21 2017 -0400 @@ -1,15 +1,11 @@ -<tool id="rseqc_read_quality" name="Read Quality" version="2.4galaxy1"> +<tool id="rseqc_read_quality" name="Read Quality" version="@WRAPPER_VERSION@"> <description>determines Phred quality score</description> <macros> <import>rseqc_macros.xml</import> </macros> - <requirements> - <expand macro="requirement_package_r" /> - <expand macro="requirement_package_numpy" /> - <expand macro="requirement_package_rseqc" /> - </requirements> + <expand macro="requirements" /> <expand macro="stdio" /> @@ -17,29 +13,33 @@ <command><![CDATA[ read_quality.py - --input-file $input + --input-file '${input}' --out-prefix output - -r $reduce - --mapq $mapq + -r ${reduce} + --mapq ${mapq} ]]> </command> <inputs> - <param name="input" type="data" format="bam,sam" label="input bam/sam file" help="(--input-file)"/> + <expand macro="bam_sam_param" /> <param name="reduce" type="integer" label="Ignore Phred scores less than this amount (only applies to 'boxplot', default=1000)" value="1000" help="(--reduce)"/> - <param name="mapq" type="integer" label="Minimum mapping quality (default=30)" help="Minimum phred scale mapping quality to consider a read 'uniquely mapped' (--mapq)" value="30" /> + <expand macro="mapq_param" /> + <expand macro="rscript_output_param" /> </inputs> <outputs> - <data format="txt" name="outputr" from_work_dir="output.qual.r" label="${tool.name} on ${on_string} (R Script)" /> - <data format="pdf" name="outputheatpdf" from_work_dir="output.qual.heatmap.pdf" label="${tool.name} on ${on_string} (Heatmap PDF)" /> - <data format="pdf" name="outputboxpdf" from_work_dir="output.qual.boxplot.pdf" label="${tool.name} on ${on_string} (Boxplot PDF)" /> + <data format="pdf" name="outputheatpdf" from_work_dir="output.qual.heatmap.pdf" label="${tool.name} on ${on_string} (Heatmap pdf)" /> + <data format="pdf" name="outputboxpdf" from_work_dir="output.qual.boxplot.pdf" label="${tool.name} on ${on_string} (Boxplot pdf)" /> + <expand macro="rscript_output_data" filename="output.qual.r" /> </outputs> <tests> <test> <param name="input" value="pairend_strandspecific_51mer_hg19_random.bam"/> + <param name="rscript_output" value="true" /> <output name="outputr" file="output.qual.r"/> + <output name="outputheatpdf" file="output.qual.heatmap.pdf" compare="sim_size" /> + <output name="outputboxpdf" file="output.qual.boxplot.pdf" compare="sim_size" /> </test> </tests> @@ -70,30 +70,20 @@ 1. output.qual.r 2. output.qual.boxplot.pdf - .. image:: http://rseqc.sourceforge.net/_images/36mer.qual.plot.png + .. image:: $PATH_TO_IMAGES/36mer.qual.plot.png :height: 600 px :width: 600 px :scale: 80 % 3. output.qual.heatmap.pdf - .. image:: http://rseqc.sourceforge.net/_images/36mer.qual.heatmap.png + .. image:: $PATH_TO_IMAGES/36mer.qual.heatmap.png :height: 600 px :width: 600 px :scale: 80 % Heatmap: use different color to represent nucleotide density ("blue"=low density,"orange"=median density,"red"=high density") ------ - -About RSeQC -+++++++++++ +@ABOUT@ -The RSeQC_ package provides a number of useful modules that can comprehensively evaluate high throughput sequence data especially RNA-seq data. "Basic modules" quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC bias, while "RNA-seq specific modules" investigate sequencing saturation status of both splicing junction detection and expression estimation, mapped reads clipping profile, mapped reads distribution, coverage uniformity over gene body, reproducibility, strand specificity and splice junction annotation. - -The RSeQC package is licensed under the GNU GPL v3 license. - -.. image:: http://rseqc.sourceforge.net/_static/logo.png - -.. _RSeQC: http://rseqc.sourceforge.net/ ]]> </help>
--- a/rseqc_macros.xml Tue May 03 16:36:57 2016 -0400 +++ b/rseqc_macros.xml Tue Mar 14 10:23:21 2017 -0400 @@ -1,8 +1,12 @@ <macros> - <xml name="requirement_package_r"><requirement type="package" version="3.0.3">R</requirement></xml> - <xml name="requirement_package_numpy"><requirement type="package" version="1.7.1">numpy</requirement></xml> - <xml name="requirement_package_rseqc"><requirement type="package" version="2.4">rseqc</requirement></xml> + <token name="@WRAPPER_VERSION@">2.6.4</token> + + <xml name="requirements"> + <requirements> + <requirement type="package" version="2.6.4">rseqc</requirement> + </requirements> + </xml> <xml name="stdio"> <stdio> @@ -11,32 +15,141 @@ </stdio> </xml> + <!-- Params --> + <xml name="bam_param"> + <param name="input" type="data" label="Input .bam file" format="bam" help="(--input-file)"/> + </xml> + + <xml name="bam_sam_param"> + <param name="input" type="data" label="Input .bam/.sam file" format="bam,sam" help="(--input-file)"/> + </xml> + + <xml name="refgene_param"> + <param name="refgene" type="data" format="bed" label="Reference gene model" help="(--refgene)"/> + </xml> + + <xml name="mapq_param"> + <param name="mapq" type="integer" label="Minimum mapping quality" value="30" help="Minimum mapping quality for an alignment to be considered as "uniquely mapped" (--mapq)"/> + </xml> + + <xml name="readlength_param"> + <param name="readlength" type="integer" value="" label="Alignment length" optional="false" help="Alignment length of read, usually set to the orignial read length (--read-align-length)"/> + </xml> + + <xml name="readnum_param"> + <param name="readnum" type="integer" label="Number of aligned reads" value="1000000" help="Number of aligned reads with mismatches used to calculate the mismatch profile (--read-num)"/> + </xml> + + <xml name="sample_size_param"> + <param name="sample_size" type="integer" label="Number of reads sampled from SAM/BAM file (default = 200000)" value="200000" min="1" help="(--sample-size)"/> + </xml> + + <xml name="min_intron_param"> + <param name="min_intron" type="integer" value="50" label="Minimum intron length (bp, default=50)" help="(--min-intron)" /> + </xml> + + <xml name="layout_param"> + <param name="layout" type="select" label="Sequencing layout" help="(--sequencing)"> + <option value="SE" selected="true">Single-end</option> + <option value="PE">Paired-end</option> + </param> + </xml> + + <xml name="strand_type_param"> + <conditional name="strand_type"> + <param name="strand_specific" type="select" label="Strand-specific?"> + <option value="none" selected="true">None</option> + <option value="pair">Pair-End RNA-seq</option> + <option value="single">Single-End RNA-seq</option> + </param> + <when value="pair"> + <param name="pair_type" type="select" display="radio" label="Pair-End Read Type (format: mapped --> parent)" help="(--strand)"> + <option value="sd" selected="true"> read1 (positive --> positive; negative --> negative), read2 (positive --> negative; negative --> positive)</option> + <option value="ds">read1 (positive --> negative; negative --> positive), read2 (positive --> positive; negative --> negative)</option> + </param> + </when> + <when value="single"> + <param name="single_type" type="select" display="radio" label="Single-End Read Type (format: mapped --> parent)" help="(--strand)"> + <option value="s" selected="true">positive --> positive; negative --> negative</option> + <option value="d">positive --> negative; negative --> positive</option> + </param> + </when> + <when value="none"></when> + </conditional> + </xml> + + <xml name="multihits_param"> + <conditional name="multihits_type"> + <param name="multihits_type_selector" type="select" label="Reads with multiple hits" help="(--skip-multi-hits)"> + <option value="use_multihits" selected="true">Count Mutliple Hit Reads</option> + <option value="skip_multihits">Skip Multiple Hit Reads/Only Use Uniquely Mapped Reads</option> + </param> + <when value="skip_multihits"> + <expand macro="mapq_param" /> + </when> + <when value="use_multihits" /> + </conditional> + </xml> + + <xml name="rscript_output_param"> + <param name="rscript_output" type="boolean" value="false" label="Output R-Script" + help="Output the R-Script used to generate the plots" /> + </xml> + + + <!-- Output --> + + <xml name="pdf_output_data" token_filename="output.pdf"> + <data format="pdf" name="outputpdf" from_work_dir="@FILENAME@" label="${tool.name} on ${on_string} (pdf)" /> + </xml> + + <xml name="xls_output_data" token_filename="output.xls"> + <data format="xls" name="outputxls" from_work_dir="@FILENAME@" label="${tool.name} on ${on_string} (xls)" /> + </xml> + + <xml name="rscript_output_data" token_filename="output.r"> + <data format="txt" name="outputr" from_work_dir="@FILENAME@" label="${tool.name} on ${on_string} (rscript)"> + <filter>rscript_output</filter> + </data> + </xml> + + <!-- Command --> + <token name="@MULTIHITS@"> +<![CDATA[ +#if str($multihits_type.multihits_type_selector) == "skip_multihits" + --skip-multi-hits + --mapq=${multihits_type.mapq} +#end if +]]> + </token> + + <token name="@ABOUT@"> + +----- + +About RSeQC ++++++++++++ + +The RSeQC_ package provides a number of useful modules that can comprehensively +evaluate high throughput sequence data especially RNA-seq data. "Basic modules" +quickly inspect sequence quality, nucleotide composition bias, PCR bias and GC +bias, while "RNA-seq specific modules" investigate sequencing saturation status +of both splicing junction detection and expression estimation, mapped reads +clipping profile, mapped reads distribution, coverage uniformity over gene +body, reproducibility, strand specificity and splice junction annotation. + +The RSeQC package is licensed under the GNU GPL v3 license. + +.. image:: $PATH_TO_IMAGES/logo.png + +.. _RSeQC: http://rseqc.sourceforge.net/ + + + </token> + <xml name="citations"> <citations> - <citation type="bibtex"> - @article{wang_rseqc:_2012, - title = {{RSeQC}: quality control of {RNA}-seq experiments}, - volume = {28}, - issn = {1367-4803, 1460-2059}, - shorttitle = {{RSeQC}}, - url = {http://bioinformatics.oxfordjournals.org/content/28/16/2184}, - doi = {10.1093/bioinformatics/bts356}, - abstract = {Motivation: RNA-seq has been extensively used for transcriptome study. Quality control (QC) is critical to ensure that RNA-seq data are of high quality and suitable for subsequent analyses. However, QC is a time-consuming and complex task, due to the massive size and versatile nature of RNA-seq data. Therefore, a convenient and comprehensive QC tool to assess RNA-seq quality is sorely needed. - Results: We developed the RSeQC package to comprehensively evaluate different aspects of RNA-seq experiments, such as sequence quality, GC bias, polymerase chain reaction bias, nucleotide composition bias, sequencing depth, strand specificity, coverage uniformity and read distribution over the genome structure. RSeQC takes both SAM and BAM files as input, which can be produced by most RNA-seq mapping tools as well as BED files, which are widely used for gene models. Most modules in RSeQC take advantage of R scripts for visualization, and they are notably efficient in dealing with large BAM/SAM files containing hundreds of millions of alignments. - Availability and implementation: RSeQC is written in Python and C. Source code and a comprehensive user's manual are freely available at: http://code.google.com/p/rseqc/. - Contact: WL1\{at\}bcm.edu - Supplementary Information: Supplementary data are available at Bioinformatics online.}, - language = {en}, - number = {16}, - urldate = {2015-06-30}, - journal = {Bioinformatics}, - author = {Wang, Liguo and Wang, Shengqin and Li, Wei}, - month = aug, - year = {2012}, - pmid = {22743226}, - pages = {2184--2185}, - } - </citation> + <citation type="doi">10.1093/bioinformatics/bts356</citation> </citations> </xml> </macros>
--- a/test-data/bamstats.txt Tue May 03 16:36:57 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,23 +0,0 @@ -Load BAM file ... Done - -#================================================== -#All numbers are READ count -#================================================== - -Total records: 40 - -QC failed: 0 -Optical/PCR duplicate: 0 -Non primary hits 0 -Unmapped reads: 0 -mapq < mapq_cut (non-unique): 0 - -mapq >= mapq_cut (unique): 40 -Read-1: 20 -Read-2: 20 -Reads map to '+': 20 -Reads map to '-': 20 -Non-splice reads: 36 -Splice reads: 4 -Reads mapped in proper pairs: 39 -Proper-paired reads map to different chrom:0
--- a/test-data/output.DupRate_plot.r Tue May 03 16:36:57 2016 -0400 +++ b/test-data/output.DupRate_plot.r Tue Mar 14 10:23:21 2017 -0400 @@ -4,11 +4,11 @@ seq_uniqRead=c(40) pos_occ=c(1) pos_uniqRead=c(40) -plot(pos_occ,log10(pos_uniqRead),ylab='Number of Reads (log10)',xlab='Frequency',pch=4,cex=0.8,col='blue',xlim=c(1,500),yaxt='n') +plot(pos_occ,log10(pos_uniqRead),ylab='Number of Reads (log10)',xlab='Occurrence of read',pch=4,cex=0.8,col='blue',xlim=c(1,500),yaxt='n') points(seq_occ,log10(seq_uniqRead),pch=20,cex=0.8,col='red') ym=floor(max(log10(pos_uniqRead))) -legend(300,ym,legend=c('Sequence-base','Mapping-base'),col=c('blue','red'),pch=c(4,20)) +legend(300,ym,legend=c('Sequence-based','Mapping-based'),col=c('blue','red'),pch=c(4,20)) axis(side=2,at=0:ym,labels=0:ym) -axis(side=4,at=c(log10(pos_uniqRead[1]),log10(pos_uniqRead[2]),log10(pos_uniqRead[3]),log10(pos_uniqRead[4])), labels=c(round(pos_uniqRead[1]*100/sum(pos_uniqRead)),round(pos_uniqRead[2]*100/sum(pos_uniqRead)),round(pos_uniqRead[3]*100/sum(pos_uniqRead)),round(pos_uniqRead[4]*100/sum(pos_uniqRead)))) +axis(side=4,at=c(log10(pos_uniqRead[1]),log10(pos_uniqRead[2]),log10(pos_uniqRead[3]),log10(pos_uniqRead[4])), labels=c(round(pos_uniqRead[1]*100/sum(pos_uniqRead*pos_occ)),round(pos_uniqRead[2]*100/sum(pos_uniqRead*pos_occ)),round(pos_uniqRead[3]*100/sum(pos_uniqRead*pos_occ)),round(pos_uniqRead[4]*100/sum(pos_uniqRead*pos_occ)))) mtext(4, text = "Reads %", line = 2) dev.off()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.FPKM.xls Tue Mar 14 10:23:21 2017 -0400 @@ -0,0 +1,8 @@ +#chrom st end accession mRNA_size gene_strand Frag_count FPM FPKM +chr1 11873 14409 NR_046018 1652.0 + 1.0 50000.0 30266.3438257 +chr1 14361 29370 NR_024540 1769.0 - 2.0 100000.0 56529.1124929 +chr1 17368 17436 NR_106918 68.0 - 0.0 0.0 0.0 +chr1 17368 17436 NR_107062 68.0 - 0.0 0.0 0.0 +chr1 34610 36081 NR_026818 1130.0 - 0.0 0.0 0.0 +chr1 34610 36081 NR_026820 1130.0 - 0.0 0.0 0.0 +chr1 69090 70008 NM_001005484 918.0 + 0.0 0.0 0.0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.RNA_fragment_size.txt Tue Mar 14 10:23:21 2017 -0400 @@ -0,0 +1,8 @@ +chrom tx_start tx_end symbol frag_count frag_mean frag_median frag_std +chr1 11873 14409 NR_046018 1 0 0 0 +chr1 14361 29370 NR_024540 14 66.5 51.0 41.1195990809 +chr1 17368 17436 NR_106918 0 0 0 0 +chr1 17368 17436 NR_107062 0 0 0 0 +chr1 34610 36081 NR_026818 0 0 0 0 +chr1 34610 36081 NR_026820 0 0 0 0 +chr1 69090 70008 NM_001005484 0 0 0 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.bamstats.txt Tue Mar 14 10:23:21 2017 -0400 @@ -0,0 +1,22 @@ + +#================================================== +#All numbers are READ count +#================================================== + +Total records: 40 + +QC failed: 0 +Optical/PCR duplicate: 0 +Non primary hits 0 +Unmapped reads: 0 +mapq < mapq_cut (non-unique): 0 + +mapq >= mapq_cut (unique): 40 +Read-1: 20 +Read-2: 20 +Reads map to '+': 20 +Reads map to '-': 20 +Non-splice reads: 36 +Splice reads: 4 +Reads mapped in proper pairs: 39 +Proper-paired reads map to different chrom:0
--- a/test-data/output.clipping_profile.r Tue May 03 16:36:57 2016 -0400 +++ b/test-data/output.clipping_profile.r Tue Mar 14 10:23:21 2017 -0400 @@ -1,5 +1,6 @@ pdf("output.clipping_profile.pdf") -read_pos=c(0,1,2,3,4,5,6,7,8,9,44,45,46,47,48,49,50) -count=c(16,12,11,8,6,5,1,1,1,1,1,2,2,2,3,4,4) -plot(read_pos,1-(count/40),col="blue",main="clipping profile",xlab="Position of reads",ylab="Mappability",type="b") +read_pos=c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50) +clip_count=c(16.0,12.0,11.0,8.0,7.0,6.0,1.0,1.0,1.0,1.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1.0,1.0,1.0,2.0,3.0,4.0,4.0) +nonclip_count= 40 - clip_count +plot(read_pos, nonclip_count*100/(clip_count+nonclip_count),col="blue",main="clipping profile",xlab="Position of read",ylab="Non-clipped %",type="b") dev.off()
--- a/test-data/output.clipping_profile.xls Tue May 03 16:36:57 2016 -0400 +++ b/test-data/output.clipping_profile.xls Tue Mar 14 10:23:21 2017 -0400 @@ -1,18 +1,52 @@ -Position Read_Total Read_clipped -0 40 16 -1 40 12 -2 40 11 -3 40 8 -4 40 6 -5 40 5 -6 40 1 -7 40 1 -8 40 1 -9 40 1 -44 40 1 -45 40 2 -46 40 2 -47 40 2 -48 40 3 -49 40 4 -50 40 4 +Position Clipped_nt Non_clipped_nt +0 16.0 24.0 +1 12.0 28.0 +2 11.0 29.0 +3 8.0 32.0 +4 7.0 33.0 +5 6.0 34.0 +6 1.0 39.0 +7 1.0 39.0 +8 1.0 39.0 +9 1.0 39.0 +10 0 40.0 +11 0 40.0 +12 0 40.0 +13 0 40.0 +14 0 40.0 +15 0 40.0 +16 0 40.0 +17 0 40.0 +18 0 40.0 +19 0 40.0 +20 0 40.0 +21 0 40.0 +22 0 40.0 +23 0 40.0 +24 0 40.0 +25 0 40.0 +26 0 40.0 +27 0 40.0 +28 0 40.0 +29 0 40.0 +30 0 40.0 +31 0 40.0 +32 0 40.0 +33 0 40.0 +34 0 40.0 +35 0 40.0 +36 0 40.0 +37 0 40.0 +38 0 40.0 +39 0 40.0 +40 0 40.0 +41 0 40.0 +42 0 40.0 +43 0 40.0 +44 1.0 39.0 +45 1.0 39.0 +46 1.0 39.0 +47 2.0 38.0 +48 3.0 37.0 +49 4.0 36.0 +50 4.0 36.0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.deletion_profile.r Tue Mar 14 10:23:21 2017 -0400 @@ -0,0 +1,5 @@ +pdf("output.deletion_profile.pdf") +pos=c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100) +value=c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0) +plot(pos,value,type='b', col='blue',xlab="Read position (5'->3')", ylab='Deletion count') +dev.off()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.deletion_profile.txt Tue Mar 14 10:23:21 2017 -0400 @@ -0,0 +1,102 @@ +read_position deletion_count +0 0 +1 0 +2 0 +3 0 +4 0 +5 0 +6 0 +7 0 +8 0 +9 0 +10 0 +11 0 +12 0 +13 0 +14 0 +15 0 +16 0 +17 0 +18 0 +19 0 +20 0 +21 0 +22 0 +23 0 +24 0 +25 0 +26 0 +27 0 +28 0 +29 0 +30 0 +31 0 +32 0 +33 0 +34 0 +35 0 +36 0 +37 0 +38 0 +39 0 +40 0 +41 0 +42 0 +43 0 +44 0 +45 0 +46 0 +47 0 +48 0 +49 0 +50 0 +51 0 +52 0 +53 0 +54 0 +55 0 +56 0 +57 0 +58 0 +59 0 +60 0 +61 0 +62 0 +63 0 +64 0 +65 0 +66 0 +67 0 +68 0 +69 0 +70 0 +71 0 +72 0 +73 0 +74 0 +75 0 +76 0 +77 0 +78 0 +79 0 +80 0 +81 0 +82 0 +83 0 +84 0 +85 0 +86 0 +87 0 +88 0 +89 0 +90 0 +91 0 +92 0 +93 0 +94 0 +95 0 +96 0 +97 0 +98 0 +99 0 +100 0
--- a/test-data/output.geneBodyCoverage.r Tue May 03 16:36:57 2016 -0400 +++ b/test-data/output.geneBodyCoverage.r Tue Mar 14 10:23:21 2017 -0400 @@ -1,8 +1,8 @@ -d1_pairend_strandspecific_51mer_hg19_chr1_1_100000_bam <- c(0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0) +pairend_strandspecific_51mer_hg19_chr1_1_100000_bam <- c(0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0) pdf("output.geneBodyCoverage.curves.pdf") x=1:100 icolor = colorRampPalette(c("#7fc97f","#beaed4","#fdc086","#ffff99","#386cb0","#f0027f"))(1) -plot(x,d1_pairend_strandspecific_51mer_hg19_chr1_1_100000_bam,type='l',xlab="Gene body percentile (5'->3')", ylab="Coverage",lwd=0.8,col=icolor[1]) +plot(x,pairend_strandspecific_51mer_hg19_chr1_1_100000_bam,type='l',xlab="Gene body percentile (5'->3')", ylab="Coverage",lwd=0.8,col=icolor[1]) dev.off()
--- a/test-data/output.geneBodyCoverage.txt Tue May 03 16:36:57 2016 -0400 +++ b/test-data/output.geneBodyCoverage.txt Tue Mar 14 10:23:21 2017 -0400 @@ -1,2 +1,2 @@ Percentile 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 -d1_pairend_strandspecific_51mer_hg19_chr1_1_100000_bam 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 0.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +pairend_strandspecific_51mer_hg19_chr1_1_100000_bam 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 0.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.geneBodyCoverage2.r Tue Mar 14 10:23:21 2017 -0400 @@ -0,0 +1,5 @@ +pdf('output.geneBodyCoverage.pdf') +x=1:100 +y=c(0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0) +plot(x,y/7,xlab="percentile of gene body (5'->3')",ylab='average wigsum',type='s') +dev.off()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.geneBodyCoverage2.txt Tue Mar 14 10:23:21 2017 -0400 @@ -0,0 +1,101 @@ +percentile count +0 0.0 +1 0.0 +2 0.0 +3 0.0 +4 0.0 +5 0.0 +6 0.0 +7 0.0 +8 0.0 +9 0.0 +10 0.0 +11 0.0 +12 0.0 +13 0.0 +14 0.0 +15 0.0 +16 0.0 +17 0.0 +18 0.0 +19 0.0 +20 0.0 +21 0.0 +22 0.0 +23 0.0 +24 0.0 +25 1.0 +26 0.0 +27 0.0 +28 1.0 +29 0.0 +30 0.0 +31 0.0 +32 0.0 +33 0.0 +34 0.0 +35 0.0 +36 0.0 +37 0.0 +38 1.0 +39 1.0 +40 1.0 +41 0.0 +42 0.0 +43 1.0 +44 1.0 +45 1.0 +46 0.0 +47 0.0 +48 0.0 +49 0.0 +50 0.0 +51 0.0 +52 0.0 +53 0.0 +54 0.0 +55 0.0 +56 0.0 +57 0.0 +58 0.0 +59 0.0 +60 0.0 +61 0.0 +62 0.0 +63 0.0 +64 0.0 +65 0.0 +66 0.0 +67 0.0 +68 0.0 +69 0.0 +70 0.0 +71 0.0 +72 0.0 +73 0.0 +74 0.0 +75 0.0 +76 0.0 +77 0.0 +78 0.0 +79 1.0 +80 1.0 +81 1.0 +82 0.0 +83 1.0 +84 1.0 +85 1.0 +86 0.0 +87 0.0 +88 0.0 +89 0.0 +90 0.0 +91 0.0 +92 0.0 +93 0.0 +94 0.0 +95 0.0 +96 0.0 +97 0.0 +98 0.0 +99 0.0
--- a/test-data/output.infer_experiment.txt Tue May 03 16:36:57 2016 -0400 +++ b/test-data/output.infer_experiment.txt Tue Mar 14 10:23:21 2017 -0400 @@ -1,6 +1,6 @@ This is PairEnd Data +Fraction of reads failed to determine: 0.0000 Fraction of reads explained by "1++,1--,2+-,2-+": 1.0000 Fraction of reads explained by "1+-,1-+,2++,2--": 0.0000 -Fraction of reads explained by other combinations: 0.0000
--- a/test-data/output.inner_distance_plot.r Tue May 03 16:36:57 2016 -0400 +++ b/test-data/output.inner_distance_plot.r Tue Mar 14 10:23:21 2017 -0400 @@ -1,11 +1,11 @@ +out_file = 'output' pdf('output.inner_distance_plot.pdf') fragsize=rep(c(-248,-243,-238,-233,-228,-223,-218,-213,-208,-203,-198,-193,-188,-183,-178,-173,-168,-163,-158,-153,-148,-143,-138,-133,-128,-123,-118,-113,-108,-103,-98,-93,-88,-83,-78,-73,-68,-63,-58,-53,-48,-43,-38,-33,-28,-23,-18,-13,-8,-3,2,7,12,17,22,27,32,37,42,47,52,57,62,67,72,77,82,87,92,97,102,107,112,117,122,127,132,137,142,147,152,157,162,167,172,177,182,187,192,197,202,207,212,217,222,227,232,237,242,247),times=c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,2,0,0,2,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,1,1,0,1,0,1,0,0,0)) frag_sd = sd(fragsize) frag_mean = mean(fragsize) frag_median = median(fragsize) -write(c("Mean insert size",frag_mean), stdout()) -write(c("Median insert size",frag_median), stdout()) -write(c("Standard deviation",frag_sd), stdout()) +write(x=c("Name","Mean","Median","sd"), sep=" ", file=stdout(),ncolumns=4) +write(c(out_file,frag_mean,frag_median,frag_sd),sep=" ", file=stdout(),ncolumns=4) hist(fragsize,probability=T,breaks=100,xlab="mRNA insert size (bp)",main=paste(c("Mean=",frag_mean,";","SD=",frag_sd),collapse=""),border="blue") lines(density(fragsize,bw=10),col='red') dev.off()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.insertion_profile.r Tue Mar 14 10:23:21 2017 -0400 @@ -0,0 +1,6 @@ +pdf("output.insertion_profile.pdf") +read_pos=c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50) +insert_count=c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0) +noninsert_count= 40 - insert_count +plot(read_pos, insert_count*100/(insert_count+noninsert_count),col="blue",main="Insertion profile",xlab="Position of read",ylab="Insertion %",type="b") +dev.off()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.insertion_profile.xls Tue Mar 14 10:23:21 2017 -0400 @@ -0,0 +1,52 @@ +Position Insert_nt Non_insert_nt +0 0 40.0 +1 0 40.0 +2 0 40.0 +3 0 40.0 +4 0 40.0 +5 0 40.0 +6 0 40.0 +7 0 40.0 +8 0 40.0 +9 0 40.0 +10 0 40.0 +11 0 40.0 +12 0 40.0 +13 0 40.0 +14 0 40.0 +15 0 40.0 +16 0 40.0 +17 0 40.0 +18 0 40.0 +19 0 40.0 +20 0 40.0 +21 0 40.0 +22 0 40.0 +23 0 40.0 +24 0 40.0 +25 0 40.0 +26 0 40.0 +27 0 40.0 +28 0 40.0 +29 0 40.0 +30 0 40.0 +31 0 40.0 +32 0 40.0 +33 0 40.0 +34 0 40.0 +35 0 40.0 +36 0 40.0 +37 0 40.0 +38 0 40.0 +39 0 40.0 +40 0 40.0 +41 0 40.0 +42 0 40.0 +43 0 40.0 +44 0 40.0 +45 0 40.0 +46 0 40.0 +47 0 40.0 +48 0 40.0 +49 0 40.0 +50 0 40.0
--- a/test-data/output.junctionSaturation_plot.r Tue May 03 16:36:57 2016 -0400 +++ b/test-data/output.junctionSaturation_plot.r Tue Mar 14 10:23:21 2017 -0400 @@ -1,8 +1,8 @@ pdf('output.junctionSaturation_plot.pdf') x=c(5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100) -y=c(0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1) +y=c(0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1) z=c(0,0,0,0,0,0,1,1,1,1,1,1,1,2,2,2,2,2,2,3) -w=c(0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,2) +w=c(0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,2) m=max(0,0,0) n=min(0,0,0) plot(x,z/1000,xlab='percent of total reads',ylab='Number of splicing junctions (x1000)',type='o',col='blue',ylim=c(n,m))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.mismatch_profile.xls Tue Mar 14 10:23:21 2017 -0400 @@ -0,0 +1,1 @@ +Total reads used: 0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.qual.heatmap.pdf Tue Mar 14 10:23:21 2017 -0400 @@ -0,0 +1,62 @@ +pdf('output.qual.boxplot.pdf') +p0<-rep(c(33,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(119,2,3,2,5,6,8,6,2,3,11,16,6,26,11,13,25,39,7,40,33,33,58,51,116,87,55,256,54,323,263,140,812,654,1119)/1000) +p1<-rep(c(33,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(105,2,2,2,4,8,6,21,3,1,1,8,13,13,16,16,14,29,32,18,50,30,57,66,73,97,105,60,253,57,330,270,142,801,630,1069)/1000) +p2<-rep(c(33,35,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(109,1,1,3,2,7,11,14,13,2,4,3,8,14,21,27,17,14,26,39,11,37,28,74,64,55,86,106,62,234,56,326,269,147,787,645,1081)/1000) +p3<-rep(c(33,37,38,39,40,41,42,43,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(108,1,6,4,2,9,12,7,3,3,9,14,13,24,20,8,24,46,14,43,28,59,67,75,88,107,51,285,56,293,239,139,802,660,1084)/1000) +p4<-rep(c(33,35,37,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(97,1,1,3,9,8,11,5,4,2,4,10,16,19,24,7,8,35,43,19,49,29,51,67,51,93,107,43,306,65,345,223,123,789,661,1075)/1000) +p5<-rep(c(33,37,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(96,3,2,5,11,6,8,2,7,2,12,17,15,16,12,11,25,31,12,32,36,59,70,69,74,99,56,277,59,343,249,111,845,650,1081)/1000) +p6<-rep(c(33,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(86,2,4,2,6,12,8,10,1,7,7,9,11,14,26,14,9,14,53,17,34,41,55,71,76,76,117,62,238,62,339,229,155,798,607,1131)/1000) +p7<-rep(c(33,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(76,1,4,4,6,11,9,13,5,5,6,6,17,19,20,17,8,19,45,15,30,33,60,68,58,76,99,59,291,54,349,251,129,818,602,1120)/1000) +p8<-rep(c(33,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(74,1,2,3,1,5,6,6,7,7,2,4,11,11,16,24,13,9,24,48,19,33,39,63,67,68,78,104,66,284,62,329,240,147,749,649,1132)/1000) +p9<-rep(c(33,37,38,39,40,41,42,43,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(98,1,2,2,6,11,19,10,5,3,8,18,19,24,14,5,18,53,21,41,39,56,79,64,70,93,57,291,42,334,259,143,795,616,1087)/1000) +p10<-rep(c(33,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(71,3,3,4,3,5,6,7,2,3,5,13,14,6,17,21,12,27,40,16,34,39,46,64,78,103,103,63,279,37,314,239,118,805,674,1129)/1000) +p11<-rep(c(33,34,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(76,1,4,3,3,7,10,11,8,5,6,3,12,13,18,21,16,18,21,46,21,32,41,77,56,77,103,105,54,269,40,320,247,144,796,621,1098)/1000) +p12<-rep(c(33,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(87,3,2,1,7,12,8,6,5,13,8,11,9,16,23,13,14,22,40,21,53,48,51,59,77,84,126,75,282,48,306,254,151,808,586,1074)/1000) +p13<-rep(c(33,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(76,5,1,3,6,3,7,8,4,3,10,12,14,13,23,12,19,25,43,17,52,42,63,57,92,91,114,61,281,45,342,256,132,812,586,1073)/1000) +p14<-rep(c(33,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(81,1,5,4,4,10,11,9,3,5,5,6,18,21,29,26,14,27,51,17,54,47,51,65,84,84,118,66,291,46,316,244,149,782,579,1080)/1000) +p15<-rep(c(33,36,37,38,39,40,41,42,43,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(87,1,2,5,2,10,17,12,9,7,2,10,10,12,20,18,21,27,50,17,50,54,42,82,57,84,103,54,285,41,342,265,115,822,582,1085)/1000) +p16<-rep(c(33,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(118,4,3,7,7,11,10,5,6,8,11,18,19,30,34,13,34,47,14,62,49,55,83,82,96,101,51,283,45,346,249,152,843,521,985)/1000) +p17<-rep(c(33,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(73,2,5,6,7,9,11,7,5,4,11,19,13,15,33,18,17,42,57,25,46,65,67,94,68,93,117,67,279,53,306,295,132,844,504,993)/1000) +p18<-rep(c(33,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(72,1,2,3,5,4,16,13,14,2,8,2,18,19,27,37,27,18,29,57,21,47,57,62,87,81,89,111,57,293,49,319,270,142,858,495,990)/1000) +p19<-rep(c(33,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(78,1,1,5,3,3,13,10,13,5,7,5,14,15,24,30,23,23,24,57,19,72,49,70,70,72,91,124,60,298,52,347,270,147,841,486,980)/1000) +p20<-rep(c(33,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(71,4,5,3,10,9,10,12,5,9,6,23,14,19,33,27,21,34,60,16,47,57,55,82,84,109,117,44,305,45,335,265,146,856,510,954)/1000) +p21<-rep(c(33,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(80,5,4,5,4,6,10,7,4,11,14,19,18,32,25,29,32,75,19,58,56,66,81,79,102,133,52,332,44,306,260,152,879,486,917)/1000) +p22<-rep(c(33,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(80,2,5,2,11,11,13,4,4,5,8,12,15,21,34,27,18,44,58,26,72,62,72,90,84,97,137,51,324,54,332,254,143,857,492,881)/1000) +p23<-rep(c(33,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(78,3,4,1,9,9,3,8,9,9,5,12,20,19,37,30,23,38,69,29,64,51,71,95,92,99,133,52,320,51,340,275,152,868,467,856)/1000) +p24<-rep(c(33,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(73,1,2,3,6,14,19,7,3,3,10,24,15,26,38,27,15,34,71,17,62,72,75,86,84,108,128,65,304,41,356,239,139,864,494,876)/1000) +p25<-rep(c(33,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(100,1,1,6,3,6,14,19,11,9,7,11,5,8,25,21,35,16,18,39,61,19,65,42,62,91,83,80,105,38,318,50,372,289,135,847,504,884)/1000) +p26<-rep(c(33,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(72,1,1,4,2,12,10,17,11,5,3,3,21,25,29,34,34,19,38,55,20,55,59,82,96,99,106,133,45,299,71,339,265,157,822,474,882)/1000) +p27<-rep(c(33,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(73,1,2,5,3,17,16,6,5,7,11,7,16,14,30,31,16,45,71,29,50,62,72,78,77,107,132,62,273,47,366,277,161,892,462,877)/1000) +p28<-rep(c(33,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(91,3,2,2,7,10,6,9,9,6,11,15,17,19,33,20,10,30,54,20,68,48,73,84,72,114,131,60,321,60,356,270,159,874,496,840)/1000) +p29<-rep(c(33,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(92,1,5,5,4,6,8,7,10,4,7,7,16,26,24,33,20,22,36,49,15,53,65,71,79,80,112,127,63,320,49,359,292,141,837,455,900)/1000) +p30<-rep(c(33,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(105,1,2,5,4,2,15,8,13,8,6,7,19,24,21,30,22,17,35,53,13,52,61,45,93,74,87,120,60,302,41,331,272,131,877,513,931)/1000) +p31<-rep(c(33,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(117,3,5,3,11,23,24,10,5,7,6,8,13,12,40,18,18,40,41,12,45,57,72,86,71,75,125,68,299,55,302,264,154,874,464,973)/1000) +p32<-rep(c(33,35,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(120,1,7,3,4,9,13,19,8,5,3,10,17,25,13,19,18,23,33,49,25,41,51,72,74,56,95,112,60,291,58,281,267,145,916,463,993)/1000) +p33<-rep(c(33,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(89,1,4,9,13,7,8,7,8,6,8,8,18,20,12,34,26,14,31,50,17,45,65,58,68,77,84,110,66,289,54,284,282,164,871,489,1003)/1000) +p34<-rep(c(33,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(162,1,3,4,4,14,15,24,17,2,6,9,16,15,20,37,20,12,34,49,12,42,50,54,66,62,81,121,56,265,50,292,258,127,878,506,1015)/1000) +p35<-rep(c(33,37,38,39,40,41,42,43,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(79,5,1,3,3,13,8,6,7,11,15,19,11,17,25,16,34,49,16,37,45,69,72,76,79,101,48,303,38,326,254,140,811,619,1043)/1000) +p36<-rep(c(33,37,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(78,3,4,3,8,5,6,8,6,5,13,18,18,32,19,22,39,43,17,39,45,68,77,74,69,118,47,272,47,332,262,139,833,562,1068)/1000) +p37<-rep(c(33,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(96,1,3,1,5,5,6,6,8,4,7,3,12,9,18,35,15,24,27,57,20,40,53,70,81,89,91,117,46,262,44,298,251,130,817,588,1059)/1000) +p38<-rep(c(33,37,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(102,5,5,4,10,11,12,3,3,7,8,18,15,22,20,17,20,50,21,46,43,71,70,80,91,110,51,239,34,339,258,119,820,614,1058)/1000) +p39<-rep(c(33,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(100,3,5,3,6,10,12,12,5,5,10,6,21,18,28,14,16,33,38,18,45,56,58,71,65,79,109,57,253,47,310,263,129,854,616,1017)/1000) +p40<-rep(c(33,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(97,1,4,3,1,5,14,7,11,2,5,2,9,6,19,21,18,21,30,37,22,37,64,40,69,53,89,104,66,281,42,355,233,137,771,615,1095)/1000) +p41<-rep(c(33,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(130,5,2,6,10,10,19,20,5,5,4,12,16,17,28,14,12,25,42,16,42,39,57,61,73,84,110,49,261,48,315,254,125,761,643,1028)/1000) +p42<-rep(c(33,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(108,2,1,2,1,5,4,12,13,7,6,7,3,6,14,19,20,22,15,22,52,14,50,45,57,67,72,78,119,51,272,45,284,226,127,831,604,1054)/1000) +p43<-rep(c(33,35,36,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(124,2,2,1,2,5,13,17,10,1,6,4,14,11,19,31,14,14,24,30,12,42,41,54,64,74,82,112,68,250,49,308,261,142,775,557,1060)/1000) +p44<-rep(c(33,35,37,38,39,40,41,42,43,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(106,1,1,4,4,10,9,9,7,9,6,8,8,16,21,12,18,24,50,14,43,43,56,55,100,87,109,51,261,51,308,217,139,759,562,1041)/1000) +p45<-rep(c(33,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(120,3,3,2,9,7,12,9,6,4,1,10,9,15,26,11,16,22,35,16,26,45,50,60,56,67,74,62,247,50,282,243,123,747,618,1061)/1000) +p46<-rep(c(33,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(116,1,2,3,2,10,15,10,1,2,6,6,10,9,13,8,14,29,26,12,31,42,59,41,57,88,92,58,257,43,304,236,133,707,612,1016)/1000) +p47<-rep(c(33,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(130,1,2,4,2,6,8,18,21,3,5,5,7,12,15,17,7,7,23,43,9,28,32,44,42,56,68,83,54,225,38,289,181,133,713,594,991)/1000) +p48<-rep(c(33,35,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(105,1,5,2,6,9,10,23,1,5,3,3,9,9,30,13,7,18,27,12,28,24,49,42,63,75,81,45,226,43,274,217,147,676,571,925)/1000) +p49<-rep(c(33,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(99,3,1,3,5,5,16,3,3,6,7,4,13,15,11,3,10,34,16,20,37,46,41,52,66,85,35,201,45,253,201,119,685,497,913)/1000) +p50<-rep(c(33,43,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71),times=c(82,2,3,5,3,4,5,13,9,5,11,14,3,35,17,21,41,34,67,67,31,184,49,241,167,93,639,515,797)/1000) +boxplot(p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11,p12,p13,p14,p15,p16,p17,p18,p19,p20,p21,p22,p23,p24,p25,p26,p27,p28,p29,p30,p31,p32,p33,p34,p35,p36,p37,p38,p39,p40,p41,p42,p43,p44,p45,p46,p47,p48,p49,p50,xlab="Position of Read(5'->3')",ylab="Phred Quality Score",outline=F) +dev.off() + + +pdf('output.qual.heatmap.pdf') +qual=c(119,0,0,0,2,3,2,5,6,8,6,2,0,3,11,16,6,26,11,13,25,39,7,40,33,33,58,51,116,87,55,256,54,323,263,140,812,654,1119,105,0,0,0,2,2,2,4,8,6,21,3,1,1,8,13,13,16,16,14,29,32,18,50,30,57,66,73,97,105,60,253,57,330,270,142,801,630,1069,109,0,1,0,1,3,2,7,11,14,13,2,4,3,8,14,21,27,17,14,26,39,11,37,28,74,64,55,86,106,62,234,56,326,269,147,787,645,1081,108,0,0,0,1,6,4,2,9,12,7,0,3,3,9,14,13,24,20,8,24,46,14,43,28,59,67,75,88,107,51,285,56,293,239,139,802,660,1084,97,0,1,0,1,0,3,9,8,11,5,4,2,4,10,16,19,24,7,8,35,43,19,49,29,51,67,51,93,107,43,306,65,345,223,123,789,661,1075,96,0,0,0,3,0,2,5,11,6,8,2,7,2,12,17,15,16,12,11,25,31,12,32,36,59,70,69,74,99,56,277,59,343,249,111,845,650,1081,86,0,0,0,2,4,2,6,12,8,10,1,7,7,9,11,14,26,14,9,14,53,17,34,41,55,71,76,76,117,62,238,62,339,229,155,798,607,1131,76,0,0,0,1,4,4,6,11,9,13,5,5,6,6,17,19,20,17,8,19,45,15,30,33,60,68,58,76,99,59,291,54,349,251,129,818,602,1120,74,0,0,1,2,3,1,5,6,6,7,7,2,4,11,11,16,24,13,9,24,48,19,33,39,63,67,68,78,104,66,284,62,329,240,147,749,649,1132,98,0,0,0,1,2,2,6,11,19,10,0,5,3,8,18,19,24,14,5,18,53,21,41,39,56,79,64,70,93,57,291,42,334,259,143,795,616,1087,71,0,0,0,3,3,4,3,5,6,7,2,3,5,13,14,6,17,21,12,27,40,16,34,39,46,64,78,103,103,63,279,37,314,239,118,805,674,1129,76,1,0,0,4,3,3,7,10,11,8,5,6,3,12,13,18,21,16,18,21,46,21,32,41,77,56,77,103,105,54,269,40,320,247,144,796,621,1098,87,0,0,0,3,2,1,7,12,8,6,5,13,8,11,9,16,23,13,14,22,40,21,53,48,51,59,77,84,126,75,282,48,306,254,151,808,586,1074,76,0,0,0,5,1,3,6,3,7,8,4,3,10,12,14,13,23,12,19,25,43,17,52,42,63,57,92,91,114,61,281,45,342,256,132,812,586,1073,81,0,0,0,1,5,4,4,10,11,9,3,5,5,6,18,21,29,26,14,27,51,17,54,47,51,65,84,84,118,66,291,46,316,244,149,782,579,1080,87,0,0,1,2,5,2,10,17,12,9,0,7,2,10,10,12,20,18,21,27,50,17,50,54,42,82,57,84,103,54,285,41,342,265,115,822,582,1085,118,0,0,0,0,4,3,7,7,11,10,5,6,8,11,18,19,30,34,13,34,47,14,62,49,55,83,82,96,101,51,283,45,346,249,152,843,521,985,73,0,0,0,2,5,6,7,9,11,7,5,4,11,19,13,15,33,18,17,42,57,25,46,65,67,94,68,93,117,67,279,53,306,295,132,844,504,993,72,0,0,1,2,3,5,4,16,13,14,2,8,2,18,19,27,37,27,18,29,57,21,47,57,62,87,81,89,111,57,293,49,319,270,142,858,495,990,78,0,0,1,1,5,3,3,13,10,13,5,7,5,14,15,24,30,23,23,24,57,19,72,49,70,70,72,91,124,60,298,52,347,270,147,841,486,980,71,0,0,0,4,5,3,10,9,10,12,5,9,6,23,14,19,33,27,21,34,60,16,47,57,55,82,84,109,117,44,305,45,335,265,146,856,510,954,80,0,0,0,5,4,5,4,6,10,7,4,0,11,14,19,18,32,25,29,32,75,19,58,56,66,81,79,102,133,52,332,44,306,260,152,879,486,917,80,0,0,0,2,5,2,11,11,13,4,4,5,8,12,15,21,34,27,18,44,58,26,72,62,72,90,84,97,137,51,324,54,332,254,143,857,492,881,78,0,0,0,3,4,1,9,9,3,8,9,9,5,12,20,19,37,30,23,38,69,29,64,51,71,95,92,99,133,52,320,51,340,275,152,868,467,856,73,0,0,0,1,2,3,6,14,19,7,3,3,10,24,15,26,38,27,15,34,71,17,62,72,75,86,84,108,128,65,304,41,356,239,139,864,494,876,100,0,1,1,6,3,6,14,19,11,9,7,11,5,8,25,21,35,16,18,39,61,19,65,42,62,91,83,80,105,38,318,50,372,289,135,847,504,884,72,0,0,1,1,4,2,12,10,17,11,5,3,3,21,25,29,34,34,19,38,55,20,55,59,82,96,99,106,133,45,299,71,339,265,157,822,474,882,73,0,0,0,1,2,5,3,17,16,6,5,7,11,7,16,14,30,31,16,45,71,29,50,62,72,78,77,107,132,62,273,47,366,277,161,892,462,877,91,0,0,0,3,2,2,7,10,6,9,9,6,11,15,17,19,33,20,10,30,54,20,68,48,73,84,72,114,131,60,321,60,356,270,159,874,496,840,92,0,0,1,5,5,4,6,8,7,10,4,7,7,16,26,24,33,20,22,36,49,15,53,65,71,79,80,112,127,63,320,49,359,292,141,837,455,900,105,0,0,1,2,5,4,2,15,8,13,8,6,7,19,24,21,30,22,17,35,53,13,52,61,45,93,74,87,120,60,302,41,331,272,131,877,513,931,117,0,0,0,3,5,3,11,23,24,10,5,7,6,8,13,12,40,18,18,40,41,12,45,57,72,86,71,75,125,68,299,55,302,264,154,874,464,973,120,0,1,0,7,3,4,9,13,19,8,5,3,10,17,25,13,19,18,23,33,49,25,41,51,72,74,56,95,112,60,291,58,281,267,145,916,463,993,89,0,0,1,4,9,13,7,8,7,8,6,8,8,18,20,12,34,26,14,31,50,17,45,65,58,68,77,84,110,66,289,54,284,282,164,871,489,1003,162,0,0,1,3,4,4,14,15,24,17,2,6,9,16,15,20,37,20,12,34,49,12,42,50,54,66,62,81,121,56,265,50,292,258,127,878,506,1015,79,0,0,0,5,1,3,3,13,8,6,0,7,11,15,19,11,17,25,16,34,49,16,37,45,69,72,76,79,101,48,303,38,326,254,140,811,619,1043,78,0,0,0,3,0,4,3,8,5,6,8,6,5,13,18,18,32,19,22,39,43,17,39,45,68,77,74,69,118,47,272,47,332,262,139,833,562,1068,96,0,0,1,3,1,5,5,6,6,8,4,7,3,12,9,18,35,15,24,27,57,20,40,53,70,81,89,91,117,46,262,44,298,251,130,817,588,1059,102,0,0,0,5,0,5,4,10,11,12,3,3,7,8,18,15,22,20,17,20,50,21,46,43,71,70,80,91,110,51,239,34,339,258,119,820,614,1058,100,0,0,0,3,5,3,6,10,12,12,5,5,10,6,21,18,28,14,16,33,38,18,45,56,58,71,65,79,109,57,253,47,310,263,129,854,616,1017,97,0,0,1,4,3,1,5,14,7,11,2,5,2,9,6,19,21,18,21,30,37,22,37,64,40,69,53,89,104,66,281,42,355,233,137,771,615,1095,130,0,0,0,5,2,6,10,10,19,20,5,5,4,12,16,17,28,14,12,25,42,16,42,39,57,61,73,84,110,49,261,48,315,254,125,761,643,1028,108,0,2,1,2,1,5,4,12,13,7,6,7,3,6,14,19,20,22,15,22,52,14,50,45,57,67,72,78,119,51,272,45,284,226,127,831,604,1054,124,0,2,2,0,1,2,5,13,17,10,1,6,4,14,11,19,31,14,14,24,30,12,42,41,54,64,74,82,112,68,250,49,308,261,142,775,557,1060,106,0,1,0,1,4,4,10,9,9,7,0,9,6,8,8,16,21,12,18,24,50,14,43,43,56,55,100,87,109,51,261,51,308,217,139,759,562,1041,120,0,0,0,3,3,2,9,7,12,9,6,4,1,10,9,15,26,11,16,22,35,16,26,45,50,60,56,67,74,62,247,50,282,243,123,747,618,1061,116,0,0,0,1,2,3,2,10,15,10,1,2,6,6,10,9,13,8,14,29,26,12,31,42,59,41,57,88,92,58,257,43,304,236,133,707,612,1016,130,0,0,1,2,4,2,6,8,18,21,3,5,5,7,12,15,17,7,7,23,43,9,28,32,44,42,56,68,83,54,225,38,289,181,133,713,594,991,105,0,1,0,0,5,2,6,9,10,23,1,5,3,3,9,9,30,13,7,18,27,12,28,24,49,42,63,75,81,45,226,43,274,217,147,676,571,925,99,0,0,0,0,3,1,3,5,5,16,3,3,6,7,4,13,15,11,3,10,34,16,20,37,46,41,52,66,85,35,201,45,253,201,119,685,497,913,82,0,0,0,0,0,0,0,0,0,2,0,3,5,3,4,5,13,9,5,11,14,3,35,17,21,41,34,67,67,31,184,49,241,167,93,639,515,797) +mat=matrix(qual,ncol=51,byrow=F) +Lab.palette <- colorRampPalette(c("blue", "orange", "red3","red2","red1","red"), space = "rgb",interpolate=c('spline')) +heatmap(mat,Rowv=NA,Colv=NA,xlab="Position of Read",ylab="Phred Quality Score",labRow=seq(from=33,to=71),col = Lab.palette(256),scale="none" ) +dev.off()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.tin.summary.txt Tue Mar 14 10:23:21 2017 -0400 @@ -0,0 +1,2 @@ +Bam_file TIN(mean) TIN(median) TIN(stdev) +input.bam 8.87096774194 8.87096774194 0.0
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output.tin.xls Tue Mar 14 10:23:21 2017 -0400 @@ -0,0 +1,8 @@ +geneID chrom tx_start tx_end TIN +NR_046018 chr1 11873 14409 0.0 +NR_024540 chr1 14361 29370 8.87096774194 +NR_106918 chr1 17368 17436 0.0 +NR_107062 chr1 17368 17436 0.0 +NR_026818 chr1 34610 36081 0.0 +NR_026820 chr1 34610 36081 0.0 +NM_001005484 chr1 69090 70008 0.0
--- a/test-data/output2.geneBodyCoverage.r Tue May 03 16:36:57 2016 -0400 +++ b/test-data/output2.geneBodyCoverage.r Tue Mar 14 10:23:21 2017 -0400 @@ -1,8 +1,8 @@ -d1_pairend_strandspecific_51mer_hg19_chr1_1_100000_bam <- c(0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0) -d2_pairend_strandspecific_51mer_hg19_chr1_1_100000_bam <- c(0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0) -d3_pairend_strandspecific_51mer_hg19_chr1_1_100000_bam <- c(0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0) -data_matrix <- matrix(c(d1_pairend_strandspecific_51mer_hg19_chr1_1_100000_bam,d2_pairend_strandspecific_51mer_hg19_chr1_1_100000_bam,d3_pairend_strandspecific_51mer_hg19_chr1_1_100000_bam), byrow=T, ncol=100) -rowLabel <- c("d1_pairend_strandspecific_51mer_hg19_chr1_1_100000_bam","d2_pairend_strandspecific_51mer_hg19_chr1_1_100000_bam","d3_pairend_strandspecific_51mer_hg19_chr1_1_100000_bam") +pairend_strandspecific_51mer_hg19_chr1_1_100000_bam <- c(0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0) +pairend_strandspecific_51mer_hg19_chr1_1_100000_bam.1 <- c(0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0) +pairend_strandspecific_51mer_hg19_chr1_1_100000_bam.2 <- c(0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0) +data_matrix <- matrix(c(pairend_strandspecific_51mer_hg19_chr1_1_100000_bam,pairend_strandspecific_51mer_hg19_chr1_1_100000_bam.1,pairend_strandspecific_51mer_hg19_chr1_1_100000_bam.2), byrow=T, ncol=100) +rowLabel <- c("pairend_strandspecific_51mer_hg19_chr1_1_100000_bam","pairend_strandspecific_51mer_hg19_chr1_1_100000_bam.1","pairend_strandspecific_51mer_hg19_chr1_1_100000_bam.2") pdf("output.geneBodyCoverage.heatMap.pdf") @@ -14,8 +14,8 @@ pdf("output.geneBodyCoverage.curves.pdf") x=1:100 icolor = colorRampPalette(c("#7fc97f","#beaed4","#fdc086","#ffff99","#386cb0","#f0027f"))(3) -plot(x,d1_pairend_strandspecific_51mer_hg19_chr1_1_100000_bam,type='l',xlab="Gene body percentile (5'->3')", ylab="Coverage",lwd=0.8,col=icolor[1]) -lines(x,d2_pairend_strandspecific_51mer_hg19_chr1_1_100000_bam,type='l',col=icolor[2]) -lines(x,d3_pairend_strandspecific_51mer_hg19_chr1_1_100000_bam,type='l',col=icolor[3]) -legend(0,1,fill=icolor[1:3], legend=c('d1_pairend_strandspecific_51mer_hg19_chr1_1_100000_bam','d2_pairend_strandspecific_51mer_hg19_chr1_1_100000_bam','d3_pairend_strandspecific_51mer_hg19_chr1_1_100000_bam')) +plot(x,pairend_strandspecific_51mer_hg19_chr1_1_100000_bam,type='l',xlab="Gene body percentile (5'->3')", ylab="Coverage",lwd=0.8,col=icolor[1]) +lines(x,pairend_strandspecific_51mer_hg19_chr1_1_100000_bam.1,type='l',col=icolor[2]) +lines(x,pairend_strandspecific_51mer_hg19_chr1_1_100000_bam.2,type='l',col=icolor[3]) +legend(0,1,fill=icolor[1:3], legend=c('pairend_strandspecific_51mer_hg19_chr1_1_100000_bam','pairend_strandspecific_51mer_hg19_chr1_1_100000_bam.1','pairend_strandspecific_51mer_hg19_chr1_1_100000_bam.2')) dev.off()
--- a/test-data/output2.geneBodyCoverage.txt Tue May 03 16:36:57 2016 -0400 +++ b/test-data/output2.geneBodyCoverage.txt Tue Mar 14 10:23:21 2017 -0400 @@ -1,4 +1,4 @@ Percentile 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 -d1_pairend_strandspecific_51mer_hg19_chr1_1_100000_bam 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 0.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -d2_pairend_strandspecific_51mer_hg19_chr1_1_100000_bam 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 0.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -d3_pairend_strandspecific_51mer_hg19_chr1_1_100000_bam 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 0.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +pairend_strandspecific_51mer_hg19_chr1_1_100000_bam 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 0.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +pairend_strandspecific_51mer_hg19_chr1_1_100000_bam.1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 0.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 +pairend_strandspecific_51mer_hg19_chr1_1_100000_bam.2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 0.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
--- a/test-data/output_read_count.xls Tue May 03 16:36:57 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,47 +0,0 @@ -#chrom st end accession score gene_strand tag_count RPKM -chr1 12227 12612 NR_046018_intron_1 0 + 0 0.000 -chr1 12721 13220 NR_046018_intron_2 0 + 0 0.000 -chr1 11873 12227 NR_046018_exon_1 0 + 0 0.000 -chr1 12612 12721 NR_046018_exon_2 0 + 1 208507.089 -chr1 13220 14409 NR_046018_exon_3 0 + 2 38229.222 -chr1 11873 14409 NR_046018_mRNA 0 + 3 41272.287 -chr1 14829 14969 NR_024540_intron_10 0 - 0 0.000 -chr1 15038 15795 NR_024540_intron_9 0 - 0 0.000 -chr1 15947 16606 NR_024540_intron_8 0 - 2 68975.031 -chr1 16765 16857 NR_024540_intron_7 0 - 0 0.000 -chr1 17055 17232 NR_024540_intron_6 0 - 0 0.000 -chr1 17368 17605 NR_024540_intron_5 0 - 1 95895.666 -chr1 17742 17914 NR_024540_intron_4 0 - 0 0.000 -chr1 18061 18267 NR_024540_intron_3 0 - 0 0.000 -chr1 18366 24737 NR_024540_intron_2 0 - 22 78480.615 -chr1 24891 29320 NR_024540_intron_1 0 - 2 10262.936 -chr1 14361 14829 NR_024540_exon_11 0 - 2 97125.097 -chr1 14969 15038 NR_024540_exon_10 0 - 0 0.000 -chr1 15795 15947 NR_024540_exon_9 0 - 0 0.000 -chr1 16606 16765 NR_024540_exon_8 0 - 0 0.000 -chr1 16857 17055 NR_024540_exon_7 0 - 1 114784.206 -chr1 17232 17368 NR_024540_exon_6 0 - 1 167112.299 -chr1 17605 17742 NR_024540_exon_5 0 - 0 0.000 -chr1 17914 18061 NR_024540_exon_4 0 - 0 0.000 -chr1 18267 18366 NR_024540_exon_3 0 - 0 0.000 -chr1 24737 24891 NR_024540_exon_2 0 - 0 0.000 -chr1 29320 29370 NR_024540_exon_1 0 - 0 0.000 -chr1 14361 29370 NR_024540_mRNA 0 - 4 51390.102 -chr1 17368 17436 NR_106918_exon_1 0 - 0 0.000 -chr1 17368 17436 NR_106918_mRNA 0 - 0 0.000 -chr1 17368 17436 NR_107062_exon_1 0 - 0 0.000 -chr1 17368 17436 NR_107062_mRNA 0 - 0 0.000 -chr1 35174 35276 NR_026818_intron_2 0 - 0 0.000 -chr1 35481 35720 NR_026818_intron_1 0 - 0 0.000 -chr1 34610 35174 NR_026818_exon_3 0 - 0 0.000 -chr1 35276 35481 NR_026818_exon_2 0 - 0 0.000 -chr1 35720 36081 NR_026818_exon_1 0 - 0 0.000 -chr1 34610 36081 NR_026818_mRNA 0 - 0 0.000 -chr1 35174 35276 NR_026820_intron_2 0 - 0 0.000 -chr1 35481 35720 NR_026820_intron_1 0 - 0 0.000 -chr1 34610 35174 NR_026820_exon_3 0 - 0 0.000 -chr1 35276 35481 NR_026820_exon_2 0 - 0 0.000 -chr1 35720 36081 NR_026820_exon_1 0 - 0 0.000 -chr1 34610 36081 NR_026820_mRNA 0 - 0 0.000 -chr1 69090 70008 NM_001005484_exon_1 0 + 0 0.000 -chr1 69090 70008 NM_001005484_mRNA 0 + 0 0.000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/pairend_strandspecific_51mer_hg19_chr1_1-100000.R1.fastq Tue Mar 14 10:23:21 2017 -0400 @@ -0,0 +1,80 @@ +@seq.11990047/1 +ACGGCCGACTTGGATCACACTCTTGCAGGGCCATCAGGCACCAAAGGGATT ++ +hgffghhhhfhhchfhghhhhhhhggahh[chffhfhghhhhhhafehfeh +@seq.14614493/1 +AGAGGAGGACGAGGACGACTGGGAATCGTAGGGGGCTCCATGACACCTTCC ++ +hhgghehehghgghhaffffcggchghhhhgfhfhahghhhghhghhhhhe +@seq.24018133/1 +CGGGTGGATTTTCTGTGGGTTTGTTAAGTGGTCAGAAATTCTCAATTTTTT ++ +`aggggecfffa\\^Ua``\af`fffcffffafaffcffffec``fWfffe +@seq.10608403/1 +GTATGGCCAGAGGGCAGGGCCGAGGGGTGTGGGCGGGAGGCCCGGCCTGGC ++ +_dd_]bfggfgcg[egdbdbdXc`cfggaagdgggf^ggfdfggggggggg +@seq.10820209/1 +GTGCTGGCCCCAGTTTTCTAACCAGGTGTTGAATGAACTGGATGGACTCTG ++ +ghhccgfgghhhdhfhhghhhfffdf_hfhhhffhhhgdchhhhhgfhahh +@seq.1537155/1 +GGGAGTGTGCAGAGACTGGAGGGGATGACAGTCACCCTCTGTTTTCTGTGG ++ +aag`hhhhhgghhhhhhfhhchgfacchhhhah]hhdcafhhhhffachhg +@seq.25274725/1 +AGGGTGTGGGGCAAGGCAGTGAGTGAAGAGTTGGGATGAGTGAGTTAGGGC ++ +hhhhdhhhhhhffffcfdffff_fdffffffhchahhhhchgfhhfhfhhh +@seq.26326595/1 +GGGAAGGGGGTGCTTCTGCATGGGAAGCACAGACAGCGCTGCCTCTCCCTT ++ +Wbfdf]ddggggdgggdfdfWggdggf]fffadfffffVfffgfgfdgggg +@seq.28833653/1 +TGGGGCCAGGGGACTATGACACACCACTTGGCTTAGACTGAGGAGCTCTGT ++ +_cffafhdghhhhhhhhhhhghhaffhhhhdhfhhehhhhhhgghfhghhh +@seq.25049090/1 +AGGGCGAGATTGATTGTTAATTGCTAGCATGAACCGCGTGGGCTTCTCAGG ++ +fdffbfdddb[_afffacdggafdbc[fcfcgggfgffccfgagggggfgc +@seq.23476912/1 +GGCCTCTCCACCATGTGCTCCACCTCGTGCTGGACCTTAAGAGATACCAAT ++ +fgggggeggecfefffd^^aY]fdfcaggggfdefdggggggggggggggg +@seq.28059536/1 +GGGATGAGGAGAGGGCAGGAAGGCATTTCCTGGGTAGTGGAGTGCTGTGTT ++ +B_bbea[_V[WZVY`\Pacaaebecd]]fddbaed[decbe]fd`fggggg +@seq.13270875/1 +TGCCCCGAGTTTGTCAAGAATGTCCCAGTAACCAGGGGACACACAGTGAAG ++ +ffffafgagcggggfgfcfffccfcffg]ggggfgcgggggggggggggcf +@seq.2214586/1 +GTGGGAGGGGCTGAAGTGAGAGCCCAACTTGGAAGCTTTTACTCCTGGGAG ++ +gghghghhhhhhhhhhffefafhfhhhhhgghhhghhehhhhehhhhhhhh +@seq.31061198/1 +GAGGAGCTAGGGTTTCTCATAAAACTCCCTGATAGAAGACGACTTTTGATA ++ +cd\WaaaRcaacJdd[dff_f_ffcfddfff_dafffcd[cd\aW\eedcc +@seq.13835843/1 +GGGGAGGCAGAGGTTGCAGTGAGCCGAGATCATGTCACTGCACTCCAGGCT ++ +ggcgafggfgggggggfgggagggagggefgdffffdadeggggggggegg +@seq.13539256/1 +GTGGGGAAACCTAGAATTGCTGTAGAGAAAATGCCCTAGAGCAGCTCTAGA ++ +hfhhchfhhhfhghhhhhhhhghghhhgfhhhhhhhhhghhhhfhhhhfhh +@seq.5556605/1 +GGGATGAGGCCAAATCTTTCTGAATCTGAGATAGCCTCTCAGCCTATGCAT ++ +hfhchhchhhehhhhhhhhdhh_hghchgfhdhhhhhhhhhchhghhhhhh +@seq.32597077/1 +GAGAGACGGGGTTTCACCATGTTGGCCAGGATGGTCTTGATCTCTTGACCT ++ +dcba]fffcdfdWccf\``S_da_cdc_fdafggggfffcfcfcfddafff +@seq.20367385/1 +TTAAGTGCACTCAAATAATGTGATTTTATGAGGCTATAGGAGAAAAAAATT ++ +fdddZ`dc[cdadJ_ddScad[[^\^ddadad__daa^a^\]QY\T^ZZZY
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/pairend_strandspecific_51mer_hg19_chr1_1-100000.R2.fastq Tue Mar 14 10:23:21 2017 -0400 @@ -0,0 +1,80 @@ +@seq.11990047/2 +CCCGTGGGCAGAGCAAAGGAAGGGCACAGCGCCAGGCAGTGGTGCAGCTGC ++ +hfadfddd`]f[fa_fh]hhcagffhWhhhh]eeehhhha^hfhhhhghhf +@seq.14614493/2 +TCCCCTCCCAAGGAAGTAGGTCTGAGCAGCTTGTCCTGGCTGTGTCCATGT ++ +hhhhhhhhhehhhhehehhhghhehfhahhdhfdhhfhhhdf]f_ffbdfa +@seq.24018133/2 +TACCACTATTTTATGAACCAAGTATAACAAGATACTTGAAATGGAAACTAT ++ +fLffddhhhhag_gefafffhaefhfffffchffhhggahhhhRhhgccgh +@seq.10608403/2 +GGGACCTGCTGTTCCATCGGCTCTCTCTTGCTGATGGACAAGGGGGCATCA ++ +hefchhhfda`]]b]a^aLa^[Za^WdWb[faff]fd]defQacffdRd]f +@seq.10820209/2 +GCCCGGGGAAAACATGCATCACAGTTCATCTCGAGTCAGCAGGATTTTGAC ++ +ffcddeed]eaTfccfffffceee]ffdcdcee[efdaffffdSfhhdc]d +@seq.1537155/2 +TNNATCAATCAGCAGGNNNCGTGCACTCTCTTTGAGCCACCACAGAAAACA ++ +VBBVT^WZ^^I[]V]YBBBIVS[W[eeKceccaccUfaffff_afghg`gd +@seq.25274725/2 +GGCNCCTCCNTGCCCTNCTNAAAANNCAATCACAGCTCCCTAACAGTCCTG ++ +^UZB]]]Y]B][IS[[B]]BW][XBB\WaadddddhhghgffffaGVV[Se +@seq.26326595/2 +CATGCGTGCCCTGCTCGANATCCAATCACAGCTCCCTAACACTCCTGAATC ++ +^K^K_YT[YVe_eLe[INBTYZUV^S`babhacfhhccghhahghhdaghW +@seq.28833653/2 +CAGCAGCTATTTCCTGNTNACTCAANCAATGGCCCCATTTCCCTGGTGGAA ++ +fLdYfeYdddXbbabSBWBTY[[[]BdeedfffffghhhghdfgLfbfddg +@seq.25049090/2 +CTNCCCTTANTCCGAANGCNGCTCNNCTGATTGGTTAATTTTTGCGTAGCT ++ +VVBNZT[]]BSHZWS[BZHBTQPOBBZUZO]bZ^^hfehffff[fcfd_]g +@seq.23476912/2 +TNACTGATTNCTCTCCACTNTAGANNCTGAGAAGCCCACGCTGTTCATGCT ++ +`Ba_a^]^\Bbab\aa`b`BV]^VBBZV[Z`a^abffYfaa^e^dedbdd] +@seq.28059536/2 +CGTNTGACTCTAGACCNTNNGAAGCCCACGCGGTTCATTCTAGCAAGTAAC ++ +ZXZBY`\`]][dcKcUBVBBWNVV]ghchfdcc]ecccLa`edecf_cfdf +@seq.13270875/2 +TAGATTATCAACAGGGGAGAGATAGCATTTCCTGAAGGCTTCCTAGGTGCC ++ +eghggd_hhhfahhg\K^[[ffafchehg_ffWffhgceghhhhhffLfcY +@seq.2214586/2 +GNNTGCANANATAGANNTTNCCACACTGCCTTGCACAGGAGCACTGCGGGG ++ +VBBSITZBVBTTRHXBBZYBVUUVHH[QV[chhghacKaa_eeeeghgghg +@seq.31061198/2 +GNCGGAAAAAAAAATTNNNNAAAATNCGTCTGCTATCAGGGAGTTTTATGA ++ +VB[NV^\_]_`hfccXBBBBTUT[TBa_aLTRNQQaYcaeaKa^adcS\Vd +@seq.13539256/2 +CCTATTTTTTTTTTTTTTTTTGACACAGGTTCTCTGTCACCCAGCCTGGGG ++ +hhhhhhhhhhhghhhghhghfccWKVVYZRd[_[aZQYZ^``WT`[L^^Q\ +@seq.13835843/2 +TCCATCTTTTTTTTTTTTTTTTTGACACAGGTTCTCTGTCACCCAGCCTGG ++ +ghhhggffhhhhhhhhhhhghhh]fMPLPVW^^WXUZ\WXL[VVJY`\Wbb +@seq.5556605/2 +GCAGCTANGNCCATCNNNTTTGAAANCCAGATTTCGTTTTAAACCAGAGGA ++ +fLfLf]VBTBI]]]]BBB[^WW[]XBdede`eedeffhhehffhhhhahee +@seq.20367385/2 +ATTTGGCAGAGAAGCAAACACCAGTCGGAGAGCTGGGGCCCTCCCAGCCCT ++ +W_\_^W___WdcfceVIW[T^aa\[aaacaQYYSZY`KK````^[GaQZ\Y +@seq.17373919/2 +TTTTTGTTTTTTTTTTTTTTTGAGTCAGAATCTCGCTCTGTTGCCCAGGCT ++ +hgghhhhhhSfffffhgh`h__Wb`ZZZ_]PVPUSVYVQVaWWacaQa^BB
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tin.xml Tue Mar 14 10:23:21 2017 -0400 @@ -0,0 +1,144 @@ +<tool id="rseqc_tin" name="Transcript Integrity Number" version="@WRAPPER_VERSION@"> + <description> + evaluates RNA integrity at a transcript level + </description> + + <macros> + <import>rseqc_macros.xml</import> + </macros> + + <expand macro="requirements" /> + + <expand macro="stdio" /> + + <version_command><![CDATA[tin.py --version]]></version_command> + + <!-- Generate output files here because tin.py removes all instances of "bam" + in the filename --> + <command><![CDATA[ + #import re + ln -sf '${input}' 'input.bam' && + ln -sf '${input.metadata.bam_index}' 'input.bam.bai' && + tin.py -i 'input.bam' --refgene='${refgene}' --minCov=${minCov} + --sample-size=${samplesize} ${subtractbackground} + ]]> + </command> + + <inputs> + <expand macro="bam_param" /> + <expand macro="refgene_param" /> + <param name="minCov" type="integer" value="10" label="Minimum coverage (default=10)" + help="Minimum number of reads mapped to a transcript (--minCov)." /> + <param name="samplesize" type="integer" value="100" label="Sample size (default=100)" + help="Number of equal-spaced nucleotide positions picked from mRNA. + Note: if this number is larger than the length of mRNA (L), it will + be halved until is's smaller than L. (--sample-size)." /> + <param name="subtractbackground" type="boolean" value="false" falsevalue="" + truevalue="--subtract-background" label="Subtract background noise + (default=No)" help="Subtract background noise (estimated from + intronic reads). Only use this option if there are substantial + intronic reads (--subtract-background)." /> + </inputs> + + <outputs> + <data name="outputsummary" format="tabular" from_work_dir="input.summary.txt" label="TIN on ${on_string} (summary)" /> + <data name="outputxls" format="xls" from_work_dir="input.tin.xls" label="TIN on ${on_string} (tin)" /> + </outputs> + + <!-- PDF Files contain R version, must avoid checking for diff --> + <tests> + <test> + <param name="input" value="pairend_strandspecific_51mer_hg19_chr1_1-100000.bam"/> + <param name="refgene" value="hg19_RefSeq_chr1_1-100000.bed"/> + <output name="outputsummary" file="output.tin.summary.txt"/> + <output name="outputxls" file="output.tin.xls"/> + </test> + </tests> + + <help><![CDATA[ +## tin.py + +This program is designed to evaluate RNA integrity at transcript level. TIN +(transcript integrity number) is named in analogous to RIN (RNA integrity +number). RIN (RNA integrity number) is the most widely used metric to +evaluate RNA integrity at sample (or transcriptome) level. It is a very +useful preventive measure to ensure good RNA quality and robust, +reproducible RNA sequencing. However, it has several weaknesses: + +* RIN score (1 <= RIN <= 10) is not a direct measurement of mRNA quality. + RIN score heavily relies on the amount of 18S and 28S ribosome RNAs, which + was demonstrated by the four features used by the RIN algorithm: the + “total RNA ratio” (i.e. the fraction of the area in the region of 18S and + 28S compared to the total area under the curve), 28S-region height, 28S + area ratio and the 18S:28S ratio24. To a large extent, RIN score was a + measure of ribosome RNA integrity. However, in most RNA-seq experiments, + ribosome RNAs were depleted from the library to enrich mRNA through either + ribo-minus or polyA selection procedure. + +* RIN only measures the overall RNA quality of an RNA sample. However, in real + situation, the degradation rate may differs significantly among + transcripts, depending on factors such as “AU-rich sequence”, “transcript + length”, “GC content”, “secondary structure” and the “RNA-protein + complex”. Therefore, RIN is practically not very useful in downstream + analysis such as adjusting the gene expression count. + +* RIN has very limited sensitivity to measure substantially degraded RNA + samples such as preserved clinical tissues. (ref: + http://www.illumina.com/documents/products/technotes/technote-truseq-rna-access.pdf). + +To overcome these limitations, we developed TIN, an algorithm that is able +to measure RNA integrity at transcript level. TIN calculates a score (0 <= +TIN <= 100) for each expressed transcript, however, the medTIN (i.e. +meidan TIN score across all the transcripts) can also be used to measure +the RNA integrity at sample level. Below plots demonstrated TIN is a +useful metric to measure RNA integrity in both transcriptome-wise and +transcript-wise, as demonstrated by the high concordance with both RIN and +RNA fragment size (estimated from RNA-seq read pairs). + + +## Inputs + +Input BAM/SAM file + Alignment file in BAM/SAM format. + +Reference gene model + Gene Model in BED format. Must be standard 12-column BED file. + +Minimum coverage + Minimum number of reads mapped to a tracript (default is 10). + +Sample size + Number of equal-spaced nucleotide positions picked from mRNA. Note: if + this number is larger than the length of mRNA (L), it will be halved until + it’s smaller than L (default is 100). + +Subtract background + Subtract background noise (estimated from intronic reads). Only use this + option if there are substantial intronic reads. + + +## Outputs + +Text + Table that includes the gene identifier (geneID), chromosome (chrom), + transcript start (tx_start), transcript end (tx_end), and transcript + integrity number (TIN). + +Example output: + +------ ----- ---------- --------- ------------- +geneID chrom tx_start tx_end TIN +------ ----- ---------- --------- ------------- +ABCC2 chr10 101542354 101611949 67.6446525761 +IPMK chr10 59951277 60027694 86.383618429 +RUFY2 chr10 70100863 70167051 43.8967503948 +------ ----- ---------- --------- ------------- + +@ABOUT@ + +]]> + </help> + + <expand macro="citations" /> + +</tool>
--- a/tool_dependencies.xml Tue May 03 16:36:57 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="R" version="3.0.3"> - <repository changeset_revision="afc48696ee5c" name="package_r_3_0_3" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> - <package name="numpy" version="1.7.1"> - <repository changeset_revision="300877695495" name="package_numpy_1_7" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> - <package name="rseqc" version="2.4"> - <repository changeset_revision="8e7baa602cec" name="package_rseqc_2_4" owner="lparsons" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency>