Mercurial > repos > nilesh > rseqc
changeset 32:580ee0c4bc4e
Fixes from Bjorn Gruning: create symlinks under $TMP and clean them up afterwards, replace R dependency with the Tool Shed R3 package, add --install-scripts, prepend tool-ids with rseqc
author | lparsons |
---|---|
date | Mon, 07 Oct 2013 15:01:13 -0400 |
parents | cc5eaa9376d8 |
children | 073c77ce5e94 |
files | RPKM_count.xml RPKM_saturation.xml bam2wig.xml bam_stat.xml clipping_profile.xml geneBody_coverage.xml geneBody_coverage2.xml infer_experiment.xml inner_distance.xml junction_annotation.xml junction_saturation.xml read_GC.xml read_NVC.xml read_distribution.xml read_duplication.xml read_quality.xml samtoolshelper.py tool_dependencies.xml |
diffstat | 18 files changed, 632 insertions(+), 665 deletions(-) [+] |
line wrap: on
line diff
--- a/RPKM_count.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/RPKM_count.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,75 +1,75 @@ -<tool id="RPKM_count" name="RPKM Count" version="1.1"> - <description>calculates raw count and RPKM values for transcript at exon, intron, and mRNA level</description> - <requirements> - <requirement type="package" version="1.7.1">numpy</requirement> - <requirement type="package" version="2.3.7">rseqc</requirement> - </requirements> +<tool id="rseqc_RPKM_count" name="RPKM Count" version="1.1"> + <description>calculates raw count and RPKM values for transcript at exon, intron, and mRNA level</description> + <requirements> + <requirement type="package" version="1.7.1">numpy</requirement> + <requirement type="package" version="2.3.7">rseqc</requirement> + </requirements> <command> ln -s "${input}" "local_input.bam" && ln -s "${input.metadata.bam_index}" "local_input.bam.bai" && RPKM_count.py -i "local_input.bam" -o output -r $refgene - #if str($strand_type.strand_specific) == "pair" - -d - #if str($strand_type.pair_type) == "sd" - '1++,1--,2+-,2-+' - #else - '1+-,1-+,2++,2--' - #end if - #end if - - #if str($strand_type.strand_specific) == "single" - -d - #if str($strand_type.single_type) == "s" - '++,--' - #else - '+-,-+' - #end if - #end if - - #if $skiphits - -u - #end if - - #if $onlyexonic - -e - #end if + #if str($strand_type.strand_specific) == "pair" + -d + #if str($strand_type.pair_type) == "sd" + '1++,1--,2+-,2-+' + #else + '1+-,1-+,2++,2--' + #end if + #end if - </command> - <inputs> - <param name="input" type="data" format="bam" label="input bam/sam file" /> - <param name="refgene" type="data" format="bed" label="Reference gene model" /> - <conditional name="strand_type"> - <param name="strand_specific" type="select" label="Strand-specific?" value="None"> - <option value="none">None</option> - <option value="pair">Pair-End RNA-seq</option> - <option value="single">Single-End RNA-seq</option> - </param> - <when value="pair"> - <param name="pair_type" type="select" display="radio" label="Pair-End Read Type (format: mapped --> parent)" value="sd"> - <option value="sd"> read1 (positive --> positive; negative --> negative), read2 (positive --> negative; negative --> positive)</option> - <option value="ds">read1 (positive --> negative; negative --> positive), read2 (positive --> positive; negative --> negative)</option> - </param> - </when> - <when value="single"> - <param name="single_type" type="select" display="radio" label="Single-End Read Type (format: mapped --> parent)" value="s"> - <option value="s">positive --> positive; negative --> negative</option> - <option value="d">positive --> negative; negative --> positive</option> - </param> - </when> - <when value="none"></when> - </conditional> - <param name="skiphits" type="boolean" value="false" label="Skip Multiple Hit Reads" /> - <param name="onlyexonic" type="boolean" value="false" label="Only use exonic (UTR exons and CDS exons) reads, otherwise use all reads" /> - </inputs> - <outputs> - <data format="xls" name="outputxls" from_work_dir="output_read_count.xls"/> - </outputs> + #if str($strand_type.strand_specific) == "single" + -d + #if str($strand_type.single_type) == "s" + '++,--' + #else + '+-,-+' + #end if + #end if + + #if $skiphits + -u + #end if + + #if $onlyexonic + -e + #end if + + </command> <stdio> <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" /> <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" /> </stdio> - <help> + <inputs> + <param name="input" type="data" format="bam" label="input bam/sam file" /> + <param name="refgene" type="data" format="bed" label="Reference gene model" /> + <conditional name="strand_type"> + <param name="strand_specific" type="select" label="Strand-specific?" value="None"> + <option value="none">None</option> + <option value="pair">Pair-End RNA-seq</option> + <option value="single">Single-End RNA-seq</option> + </param> + <when value="pair"> + <param name="pair_type" type="select" display="radio" label="Pair-End Read Type (format: mapped --> parent)" value="sd"> + <option value="sd"> read1 (positive --> positive; negative --> negative), read2 (positive --> negative; negative --> positive)</option> + <option value="ds">read1 (positive --> negative; negative --> positive), read2 (positive --> positive; negative --> negative)</option> + </param> + </when> + <when value="single"> + <param name="single_type" type="select" display="radio" label="Single-End Read Type (format: mapped --> parent)" value="s"> + <option value="s">positive --> positive; negative --> negative</option> + <option value="d">positive --> negative; negative --> positive</option> + </param> + </when> + <when value="none"></when> + </conditional> + <param name="skiphits" type="boolean" value="false" label="Skip Multiple Hit Reads" /> + <param name="onlyexonic" type="boolean" value="false" label="Only use exonic (UTR exons and CDS exons) reads, otherwise use all reads" /> + </inputs> + <outputs> + <data format="xls" name="outputxls" from_work_dir="output_read_count.xls"/> + </outputs> + <help> RPKM_count.py +++++++++++++ @@ -83,22 +83,22 @@ ++++++++++++++ Input BAM/SAM file - Alignment file in BAM/SAM format. + Alignment file in BAM/SAM format. Reference gene model - Gene model in BED format. + Gene model in BED format. Strand sequencing type (default=none) - See Infer Experiment tool if uncertain. + See Infer Experiment tool if uncertain. Options ++++++++++++++ Skip Multiple Hit Reads - Use Multiple hit reads or use only uniquely mapped reads. + Use Multiple hit reads or use only uniquely mapped reads. Only use exonic reads - Renders program only used exonic (UTR exons and CDS exons) reads, otherwise use all reads. + Renders program only used exonic (UTR exons and CDS exons) reads, otherwise use all reads. Sample Output ++++++++++++++ @@ -113,7 +113,7 @@ chr1 29313959 29314417 NM_001166007_exon_2 0 '+' 1699 4 74.158 0.175 chr1 29319841 29320054 NM_001166007_exon_3 0 '+' 528 1 49.554 0.094 ===== ======== ======== ===================== ===== =========== ============= ============= ======== ========= - + ----- About RSeQC @@ -128,5 +128,5 @@ .. _RSeQC: http://rseqc.sourceforge.net/ - </help> + </help> </tool>
--- a/RPKM_saturation.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/RPKM_saturation.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,72 +1,72 @@ -<tool id="RPKM_saturation" name="RPKM Saturation" version="1.1"> - <description>calculates raw count and RPKM values for transcript at exon, intron, and mRNA level</description> - <requirements> - <requirement type="package" version="2.11.0">R</requirement> - <requirement type="package" version="1.7.1">numpy</requirement> - <requirement type="package" version="2.3.7">rseqc</requirement> - </requirements> - <command> RPKM_saturation.py -i $input -o output -r $refgene - - #if str($strand_type.strand_specific) == "pair" - -d - #if str($strand_type.pair_type) == "sd" - '1++,1--,2+-,2-+' - #else - '1+-,1-+,2++,2--' - #end if - #end if - - #if str($strand_type.strand_specific) == "single" - -d - #if str($strand_type.single_type) == "s" - '++,--' - #else - '+-,-+' - #end if - #end if - - -l $percentileFloor -u $percentileCeiling -s $percentileStep -c $rpkmCutoff +<tool id="rseqc_RPKM_saturation" name="RPKM Saturation" version="1.1"> + <description>calculates raw count and RPKM values for transcript at exon, intron, and mRNA level</description> + <requirements> + <requirement type="package" version="3.0.1">R</requirement> + <requirement type="package" version="1.7.1">numpy</requirement> + <requirement type="package" version="2.3.7">rseqc</requirement> + </requirements> + <command> RPKM_saturation.py -i $input -o output -r $refgene - </command> - <inputs> - <param name="input" type="data" format="bam" label="input bam/sam file" /> - <param name="refgene" type="data" format="bed" label="Reference gene model" /> - <conditional name="strand_type"> - <param name="strand_specific" type="select" label="Strand-specific?" value="None"> - <option value="none">None</option> - <option value="pair">Pair-End RNA-seq</option> - <option value="single">Single-End RNA-seq</option> - </param> - <when value="pair"> - <param name="pair_type" type="select" display="radio" label="Pair-End Read Type (format: mapped --> parent)" value="sd"> - <option value="sd"> read1 (positive --> positive; negative --> negative), read2 (positive --> negative; negative --> positive)</option> - <option value="ds">read1 (positive --> negative; negative --> positive), read2 (positive --> positive; negative --> negative)</option> - </param> - </when> - <when value="single"> - <param name="single_type" type="select" display="radio" label="Single-End Read Type (format: mapped --> parent)" value="s"> - <option value="s">positive --> positive; negative --> negative</option> - <option value="d">positive --> negative; negative --> positive</option> - </param> - </when> - <when value="none"></when> - </conditional> - <param name="percentileFloor" type="integer" value="5" label="Begin sampling from this percentile (default=5)" /> - <param name="percentileCeiling" type="integer" value="100" label="End sampling at this percentile (default=100)" /> - <param name="percentileStep" type="integer" value="5" label="Sampling step size (default=5)" /> - <param name="rpkmCutoff" type="text" value="0.01" label="Ignore transcripts with RPKM smaller than this number (default=0.01)" /> - </inputs> - <outputs> - <data format="xls" name="outputxls" from_work_dir="output.eRPKM.xls" label="${tool.name} on ${on_string} (RPKM XLS)"/> - <data format="xls" name="outputrawxls" from_work_dir="output.rawCount.xls" label="${tool.name} on ${on_string} (Raw Count XLS)"/> - <data format="r" name="outputr" from_work_dir="output.saturation.r" label="${tool.name} on ${on_string} (R Script)"/> - <data format="pdf" name="outputpdf" from_work_dir="output.saturation.pdf" label="${tool.name} on ${on_string} (PDF)"/> - </outputs> + #if str($strand_type.strand_specific) == "pair" + -d + #if str($strand_type.pair_type) == "sd" + '1++,1--,2+-,2-+' + #else + '1+-,1-+,2++,2--' + #end if + #end if + + #if str($strand_type.strand_specific) == "single" + -d + #if str($strand_type.single_type) == "s" + '++,--' + #else + '+-,-+' + #end if + #end if + + -l $percentileFloor -u $percentileCeiling -s $percentileStep -c $rpkmCutoff + + </command> <stdio> <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" /> <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" /> </stdio> - <help> + <inputs> + <param name="input" type="data" format="bam" label="input bam/sam file" /> + <param name="refgene" type="data" format="bed" label="Reference gene model" /> + <conditional name="strand_type"> + <param name="strand_specific" type="select" label="Strand-specific?" value="None"> + <option value="none">None</option> + <option value="pair">Pair-End RNA-seq</option> + <option value="single">Single-End RNA-seq</option> + </param> + <when value="pair"> + <param name="pair_type" type="select" display="radio" label="Pair-End Read Type (format: mapped --> parent)" value="sd"> + <option value="sd"> read1 (positive --> positive; negative --> negative), read2 (positive --> negative; negative --> positive)</option> + <option value="ds">read1 (positive --> negative; negative --> positive), read2 (positive --> positive; negative --> negative)</option> + </param> + </when> + <when value="single"> + <param name="single_type" type="select" display="radio" label="Single-End Read Type (format: mapped --> parent)" value="s"> + <option value="s">positive --> positive; negative --> negative</option> + <option value="d">positive --> negative; negative --> positive</option> + </param> + </when> + <when value="none"></when> + </conditional> + <param name="percentileFloor" type="integer" value="5" label="Begin sampling from this percentile (default=5)" /> + <param name="percentileCeiling" type="integer" value="100" label="End sampling at this percentile (default=100)" /> + <param name="percentileStep" type="integer" value="5" label="Sampling step size (default=5)" /> + <param name="rpkmCutoff" type="text" value="0.01" label="Ignore transcripts with RPKM smaller than this number (default=0.01)" /> + </inputs> + <outputs> + <data format="xls" name="outputxls" from_work_dir="output.eRPKM.xls" label="${tool.name} on ${on_string} (RPKM XLS)"/> + <data format="xls" name="outputrawxls" from_work_dir="output.rawCount.xls" label="${tool.name} on ${on_string} (Raw Count XLS)"/> + <data format="txt" name="outputr" from_work_dir="output.saturation.r" label="${tool.name} on ${on_string} (R Script)"/> + <data format="pdf" name="outputpdf" from_work_dir="output.saturation.pdf" label="${tool.name} on ${on_string} (PDF)"/> + </outputs> + <help> RPKM_saturation.py ++++++++++++++++++ @@ -93,22 +93,22 @@ ++++++++++++++ Input BAM/SAM file - Alignment file in BAM/SAM format. + Alignment file in BAM/SAM format. Reference gene model - Gene model in BED format. + Gene model in BED format. Strand sequencing type (default=none) - See Infer Experiment tool if uncertain. + See Infer Experiment tool if uncertain. Options ++++++++++++++ Skip Multiple Hit Reads - Use Multiple hit reads or use only uniquely mapped reads. + Use Multiple hit reads or use only uniquely mapped reads. Only use exonic reads - Renders program only used exonic (UTR exons and CDS exons) reads, otherwise use all reads. + Renders program only used exonic (UTR exons and CDS exons) reads, otherwise use all reads. Output ++++++++++++++ @@ -124,10 +124,10 @@ :scale: 80 % - All transcripts were sorted in ascending order according to expression level (RPKM). Then they are divided into 4 groups: - 1. Q1 (0-25%): Transcripts with expression level ranked below 25 percentile. - 2. Q2 (25-50%): Transcripts with expression level ranked between 25 percentile and 50 percentile. - 3. Q3 (50-75%): Transcripts with expression level ranked between 50 percentile and 75 percentile. - 4. Q4 (75-100%): Transcripts with expression level ranked above 75 percentile. + 1. Q1 (0-25%): Transcripts with expression level ranked below 25 percentile. + 2. Q2 (25-50%): Transcripts with expression level ranked between 25 percentile and 50 percentile. + 3. Q3 (50-75%): Transcripts with expression level ranked between 50 percentile and 75 percentile. + 4. Q4 (75-100%): Transcripts with expression level ranked above 75 percentile. - BAM/SAM file containing more than 100 million alignments will make module very slow. - Follow example below to visualize a particular transcript (using R console):: @@ -156,5 +156,5 @@ .. _RSeQC: http://rseqc.sourceforge.net/ - </help> + </help> </tool>
--- a/bam2wig.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/bam2wig.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,92 +1,101 @@ -<tool id="bam2wig" name="BAM to Wiggle" version="1.1"> - <description> - converts all types of RNA-seq data from .bam to .wig - </description> - <requirements> - <requirement type="package" version="2.11.0">R</requirement> - <requirement type="package" version="1.7.1">numpy</requirement> - <requirement type="package" version="2.3.7">rseqc</requirement> - </requirements> - <command> - ln -s "${input}" "local_input.bam" && - ln -s "${input.metadata.bam_index}" "local_input.bam.bai" && - bam2wig.py -i "local_input.bam" -s $chromsize -o outfile +<tool id="rseqc_bam2wig" name="BAM to Wiggle" version="1.1"> + <description> + converts all types of RNA-seq data from .bam to .wig + </description> + <requirements> + <requirement type="package" version="3.0.1">R</requirement> + <requirement type="package" version="1.7.1">numpy</requirement> + <requirement type="package" version="2.3.7">rseqc</requirement> + </requirements> + <command> + + #import tempfile, os + #set $tmp_input = tempfile.NamedTemporaryFile() + #set $tmp_input_name = $input_singles_tmp_handle.name + #silent $tmp_input.close() + + ln -s "${input}" $tmp_input_name && + ln -s "${input.metadata.bam_index}" $tmp_input_name + ".bai" && + bam2wig.py -i "local_input.bam" -s $chromsize -o outfile - #if str($strand_type.strand_specific) == "pair" - -d - #if str($strand_type.pair_type) == "sd" - '1++,1--,2+-,2-+' - #else - '1+-,1-+,2++,2--' - #end if - #end if + #if str($strand_type.strand_specific) == "pair" + -d + #if str($strand_type.pair_type) == "sd" + '1++,1--,2+-,2-+' + #else + '1+-,1-+,2++,2--' + #end if + #end if - #if str($strand_type.strand_specific) == "single" - -d - #if str($strand_type.single_type) == "s" - '++,--' - #else - '+-,-+' - #end if - #end if + #if str($strand_type.strand_specific) == "single" + -d + #if str($strand_type.single_type) == "s" + '++,--' + #else + '+-,-+' + #end if + #end if - #if $wigsum.wigsum_type - -t $wigsum.totalwig - #end if + #if $wigsum.wigsum_type + -t $wigsum.totalwig + #end if - #if $skipmultihits - -u - #end if - </command> - <inputs> - <param name="input" type="data" label="Input .bam File" format="bam" /> - <param name="chromsize" type="data" label="Chromosome size file (tab or space separated)" format="txt,tabular" /> - <param name="skipmultihits" type="boolean" label="Skip Multiple Hit Reads/Only Use Uniquely Mapped Reads" value="false" /> - <conditional name="wigsum"> - <param name="wigsum_type" type="boolean" label="Specify wigsum?" value="false"> - </param> - <when value="true"> - <param name="totalwig" value="0" type="integer" label="specified wigsum" /> - </when> - <when value="false"></when> - </conditional> - <conditional name="strand_type"> - <param name="strand_specific" type="select" label="Strand-specific?" value="none"> - <option value="none">none</option> - <option value="pair">Pair-End RNA-seq</option> - <option value="single">Single-End RNA-seq</option> - </param> - <when value="pair"> - <param name="pair_type" type="select" display="radio" label="Pair-End Read Type (format: mapped --> parent)" value="sd"> - <option value="sd"> read1 (positive --> positive; negative --> negative), read2 (positive --> negative; negative --> positive)</option> - <option value="ds">read1 (positive --> negative; negative --> positive), read2 (positive --> positive; negative --> negative)</option> - </param> - </when> - <when value="single"> - <param name="single_type" type="select" display="radio" label="Single-End Read Type (format: mapped --> parent)" value="s"> - <option value="s">positive --> positive; negative --> negative</option> - <option value="d">positive --> negative; negative --> positive</option> - </param> - </when> - <when value="none"></when> - </conditional> - </inputs> - <outputs> - <data format="wig" name="output" from_work_dir="outfile.wig"> - <filter>strand_type['strand_specific'] == 'none'</filter> - </data> - <data format="wig" name="outputfwd" from_work_dir="outfile_Forward.wig" label="${tool.name} on ${on_string} (Forward Reads)"> - <filter>strand_type['strand_specific'] != 'none'</filter> - </data> - <data format="wig" name="outputrv" from_work_dir="outfile_Reverse.wig" label="${tool.name} on ${on_string} (Reverse Reads)"> - <filter>strand_type['strand_specific'] != 'none'</filter> - </data> - </outputs> + #if $skipmultihits + -u + #end if + ; + rm $tmp_input_name + ".bai" ; + rm $tmp_input_name + </command> + <inputs> + <param name="input" type="data" label="Input .bam File" format="bam" /> + <param name="chromsize" type="data" label="Chromosome size file (tab or space separated)" format="txt,tabular" /> + <param name="skipmultihits" type="boolean" label="Skip Multiple Hit Reads/Only Use Uniquely Mapped Reads" value="false" /> + <conditional name="wigsum"> + <param name="wigsum_type" type="boolean" label="Specify wigsum?" value="false"> + </param> + <when value="true"> + <param name="totalwig" value="0" type="integer" label="specified wigsum" /> + </when> + <when value="false"/> + </conditional> + <conditional name="strand_type"> + <param name="strand_specific" type="select" label="Strand-specific?" value="none"> + <option value="none">none</option> + <option value="pair">Pair-End RNA-seq</option> + <option value="single">Single-End RNA-seq</option> + </param> + <when value="pair"> + <param name="pair_type" type="select" display="radio" label="Pair-End Read Type (format: mapped --> parent)" value="sd"> + <option value="sd"> read1 (positive --> positive; negative --> negative), read2 (positive --> negative; negative --> positive)</option> + <option value="ds">read1 (positive --> negative; negative --> positive), read2 (positive --> positive; negative --> negative)</option> + </param> + </when> + <when value="single"> + <param name="single_type" type="select" display="radio" label="Single-End Read Type (format: mapped --> parent)" value="s"> + <option value="s">positive --> positive; negative --> negative</option> + <option value="d">positive --> negative; negative --> positive</option> + </param> + </when> + <when value="none"></when> + </conditional> + </inputs> + <outputs> + <data format="wig" name="output" from_work_dir="outfile.wig"> + <filter>strand_type['strand_specific'] == 'none'</filter> + </data> + <data format="wig" name="outputfwd" from_work_dir="outfile_Forward.wig" label="${tool.name} on ${on_string} (Forward Reads)"> + <filter>strand_type['strand_specific'] != 'none'</filter> + </data> + <data format="wig" name="outputrv" from_work_dir="outfile_Reverse.wig" label="${tool.name} on ${on_string} (Reverse Reads)"> + <filter>strand_type['strand_specific'] != 'none'</filter> + </data> + </outputs> <stdio> <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" /> <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" /> </stdio> - <help> + <help> bam2wig.py ++++++++++ @@ -103,19 +112,19 @@ ++++++++++++++ Input BAM file - Alignment file in BAM format (SAM is not supported). BAM file will be sorted and indexed using samTools. + Alignment file in BAM format (SAM is not supported). BAM file will be sorted and indexed using samTools. Chromosome size file - Tab or space separated text file with 2 columns: first column is chromosome name, second column is size of the chromosome. Chromosome names (such as "chr1") should be consistent between this file and BAM file. + Tab or space separated text file with 2 columns: first column is chromosome name, second column is size of the chromosome. Chromosome names (such as "chr1") should be consistent between this file and BAM file. Specified wigsum (default=none) - Specified wigsum. Wigsum of 100000000 equals to coverage achieved by 1 million 100nt reads. Ignore this option to disable normalization. + Specified wigsum. Wigsum of 100000000 equals to coverage achieved by 1 million 100nt reads. Ignore this option to disable normalization. Skip multiple Hit reads - skips multiple hit reads or only use uniquely mapped reads + skips multiple hit reads or only use uniquely mapped reads Strand-specific (default=none) - How read(s) were stranded during sequencing. If you are not sure about the strand rule, run infer_experiment.py + How read(s) were stranded during sequencing. If you are not sure about the strand rule, run infer_experiment.py Outputs ++++++++++++++ @@ -143,5 +152,5 @@ .. _wiggle: http://genome.ucsc.edu/goldenPath/help/wiggle.html .. _bigwig: http://genome.ucsc.edu/FAQ/FAQformat.html#format6.1 - </help> + </help> </tool>
--- a/bam_stat.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/bam_stat.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,26 +1,26 @@ -<tool id="bam_stat" name="BAM/SAM Mapping Stats" version="1.1"> - <description> - reads mapping statistics for a provided BAM or SAM file. - </description> - <requirements> - <requirement type="package" version="1.7.1">numpy</requirement> - <requirement type="package" version="2.3.7">rseqc</requirement> - </requirements>s - <command> - bam_stat.py -i $input -q $mapqual 2> $output - </command> - <inputs> - <param name="input" type="data" label="Input .bam/.sam File" format="bam,sam" /> - <param label="Minimum mapping quality (default=30" type="integer" value="30" name="mapqual" /> - </inputs> - <outputs> - <data format="txt" name="output" /> - </outputs> +<tool id="rseqc_bam_stat" name="BAM/SAM Mapping Stats" version="1.1"> + <description> + reads mapping statistics for a provided BAM or SAM file. + </description> + <requirements> + <requirement type="package" version="1.7.1">numpy</requirement> + <requirement type="package" version="2.3.7">rseqc</requirement> + </requirements>s + <command> + bam_stat.py -i $input -q $mapqual 2> $output + </command> <stdio> <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" /> <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" /> </stdio> - <help> + <inputs> + <param name="input" type="data" label="Input .bam/.sam File" format="bam,sam" /> + <param label="Minimum mapping quality (default=30" type="integer" value="30" name="mapqual" /> + </inputs> + <outputs> + <data format="txt" name="output" /> + </outputs> + <help> bam_stat.py +++++++++++ @@ -61,5 +61,5 @@ .. _RSeQC: http://rseqc.sourceforge.net/ .. _`mapping quality`: http://genome.sph.umich.edu/wiki/Mapping_Quality_Scores - </help> + </help> </tool>
--- a/clipping_profile.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/clipping_profile.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,27 +1,27 @@ -<tool id="clipping_profile" name="Clipping Profile" version="1.1"> - <description> - estimates clipping profile of RNA-seq reads from BAM or SAM file - </description> - <requirements> - <requirement type="package" version="2.11.0">R</requirement> - <requirement type="package" version="1.7.1">numpy</requirement> - <requirement type="package" version="2.3.7">rseqc</requirement> - </requirements> - <command> - clipping_profile.py -i $input -o output - </command> - <inputs> - <param name="input" type="data" label="Input .bam/.sam File" format="bam,sam" /> - </inputs> - <outputs> - <data format="xls" name="outputxls" from_work_dir="output.clipping_profile.xls" /> - <data format="r" name="outputr" from_work_dir="output.clipping_profile.r" /> - </outputs> +<tool id="rseqc_clipping_profile" name="Clipping Profile" version="1.1"> + <description> + estimates clipping profile of RNA-seq reads from BAM or SAM file + </description> + <requirements> + <requirement type="package" version="3.0.1">R</requirement> + <requirement type="package" version="1.7.1">numpy</requirement> + <requirement type="package" version="2.3.7">rseqc</requirement> + </requirements> + <command> + clipping_profile.py -i $input -o output + </command> <stdio> <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" /> <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" /> </stdio> - <help> + <inputs> + <param name="input" type="data" label="Input .bam/.sam File" format="bam,sam" /> + </inputs> + <outputs> + <data format="xls" name="outputxls" from_work_dir="output.clipping_profile.xls" /> + <data format="txt" name="outputr" from_work_dir="output.clipping_profile.r" /> + </outputs> + <help> clipping_profile.py +++++++++++++++++++ @@ -42,7 +42,7 @@ .. image:: http://rseqc.sourceforge.net/_images/clipping_good.png :height: 600 px :width: 600 px - :scale: 80 % + :scale: 80 % ----- @@ -57,5 +57,5 @@ .. _RSeQC: http://rseqc.sourceforge.net/ - </help> + </help> </tool>
--- a/geneBody_coverage.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/geneBody_coverage.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,29 +1,29 @@ -<tool id="geneBody_coverage" name="Gene Body Converage (BAM)" version="1.1"> - <description> - Read coverage over gene body. - </description> - <requirements> - <requirement type="package" version="2.11.0">R</requirement> - <requirement type="package" version="1.7.1">numpy</requirement> - <requirement type="package" version="2.3.7">rseqc</requirement> - </requirements> - <command> - geneBody_coverage.py -i $input -r $refgene -o output - </command> - <inputs> - <param name="input" type="data" label="Input .bam file" format="bam" /> - <param name="refgene" type="data" label="Reference Genome" format="bed" /> - </inputs> - <outputs> - <data name="outputpdf" format="pdf" from_work_dir="output.geneBodyCoverage.pdf" label="${tool.name} on ${on_string} (PDF)" /> - <data name="outputr" format="r" from_work_dir="output.geneBodyCoverage_plot.r" label="${tool.name} on ${on_string} (R Script)" /> - <data name="outputtxt" format="txt" from_work_dir="output.geneBodyCoverage.txt" label="${tool.name} on ${on_string} (Text)" /> - </outputs> +<tool id="rseqc_geneBody_coverage" name="Gene Body Converage (BAM)" version="1.1"> + <description> + Read coverage over gene body. + </description> + <requirements> + <requirement type="package" version="3.0.1">R</requirement> + <requirement type="package" version="1.7.1">numpy</requirement> + <requirement type="package" version="2.3.7">rseqc</requirement> + </requirements> + <command> + geneBody_coverage.py -i $input -r $refgene -o output + </command> <stdio> <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" /> <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" /> </stdio> - <help> + <inputs> + <param name="input" type="data" label="Input .bam file" format="bam" /> + <param name="refgene" type="data" label="Reference Genome" format="bed" /> + </inputs> + <outputs> + <data name="outputpdf" format="pdf" from_work_dir="output.geneBodyCoverage.pdf" label="${tool.name} on ${on_string} (PDF)" /> + <data name="outputr" format="txt" from_work_dir="output.geneBodyCoverage_plot.r" label="${tool.name} on ${on_string} (R Script)" /> + <data name="outputtxt" format="txt" from_work_dir="output.geneBodyCoverage.txt" label="${tool.name} on ${on_string} (Text)" /> + </outputs> + <help> geneBody_coverage.py ++++++++++++++++++++ @@ -54,7 +54,7 @@ .. image:: http://rseqc.sourceforge.net/_images/geneBody_coverage.png :height: 600 px :width: 600 px - :scale: 80 % + :scale: 80 % -----
--- a/geneBody_coverage2.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/geneBody_coverage2.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,29 +1,29 @@ -<tool id="geneBody_coverage2" name="Gene Body Converage (Bigwig)" version="1.1"> - <description> - Read coverage over gene body. - </description> - <requirements> - <requirement type="package" version="2.11.0">R</requirement> - <requirement type="package" version="1.7.1">numpy</requirement> - <requirement type="package" version="2.3.7">rseqc</requirement> - </requirements> - <command> - geneBody_coverage2.py -i $input -r $refgene -o output - </command> - <inputs> - <param name="input" type="data" label="Input bigwig file" format="bigwig" /> - <param name="refgene" type="data" label="Reference Genome" format="bed" /> - </inputs> - <outputs> - <data name="outputpdf" format="pdf" from_work_dir="output.geneBodyCoverage.pdf" label="${tool.name} on ${on_string} (PDF)" /> - <data name="outputr" format="r" from_work_dir="output.geneBodyCoverage_plot.r" label="${tool.name} on ${on_string} (R Script)" /> - <data name="outputtxt" format="txt" from_work_dir="output.geneBodyCoverage.txt" label="${tool.name} on ${on_string} (Text)" /> - </outputs> +<tool id="rseqc_geneBody_coverage2" name="Gene Body Converage (Bigwig)" version="1.1"> + <description> + Read coverage over gene body + </description> + <requirements> + <requirement type="package" version="3.0.1">R</requirement> + <requirement type="package" version="1.7.1">numpy</requirement> + <requirement type="package" version="2.3.7">rseqc</requirement> + </requirements> + <command> + geneBody_coverage2.py -i $input -r $refgene -o output + </command> <stdio> <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" /> <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" /> </stdio> - <help> + <inputs> + <param name="input" type="data" label="Input bigwig file" format="bigwig" /> + <param name="refgene" type="data" label="Reference Genome" format="bed" /> + </inputs> + <outputs> + <data name="outputpdf" format="pdf" from_work_dir="output.geneBodyCoverage.pdf" label="${tool.name} on ${on_string} (PDF)" /> + <data name="outputr" format="txt" from_work_dir="output.geneBodyCoverage_plot.r" label="${tool.name} on ${on_string} (R Script)" /> + <data name="outputtxt" format="txt" from_work_dir="output.geneBodyCoverage.txt" label="${tool.name} on ${on_string} (Text)" /> + </outputs> + <help> geneBody_coverage2.py +++++++++++++++++++++ @@ -35,10 +35,10 @@ ++++++++++++++ Input BAM/SAM file - Alignment file in BAM/SAM format. + Alignment file in BAM/SAM format. Reference gene model - Gene Model in BED format. + Gene Model in BED format. Outputs @@ -67,5 +67,5 @@ - </help> + </help> </tool>
--- a/infer_experiment.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/infer_experiment.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,35 +1,35 @@ -<tool id="infer_experiment" name="Infer Experiment" version="1.1"> - <description>speculates how RNA-seq were configured</description> - <requirements> - <requirement type="package" version="1.7.1">numpy</requirement> - <requirement type="package" version="2.3.7">rseqc</requirement> - </requirements> - <command> infer_experiment.py -i $input -r $refgene - - #if $sample_size.boolean - -s $sample_size.size - #end if - - > $output - </command> - <inputs> - <param name="input" type="data" format="bam,sam" label="Input BAM/SAM file" /> - <param name="refgene" type="data" format="bed" label="Reference gene model in bed format" /> - <conditional name="sample_size"> - <param name="boolean" type="boolean" label="Modify usable sampled reads" value="false" /> - <when value="true"> - <param name="size" type="integer" label="Number of usable sampled reads (default = 200000)" value="200000" /> - </when> - </conditional> - </inputs> - <outputs> - <data format="txt" name="output" /> - </outputs> +<tool id="rseqc_infer_experiment" name="Infer Experiment" version="1.1"> + <description>speculates how RNA-seq were configured</description> + <requirements> + <requirement type="package" version="1.7.1">numpy</requirement> + <requirement type="package" version="2.3.7">rseqc</requirement> + </requirements> + <command> + infer_experiment.py -i $input -r $refgene + #if $sample_size.boolean + -s $sample_size.size + #end if + + > $output + </command> <stdio> <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" /> <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" /> </stdio> - <help> + <inputs> + <param name="input" type="data" format="bam,sam" label="Input BAM/SAM file" /> + <param name="refgene" type="data" format="bed" label="Reference gene model in bed format" /> + <conditional name="sample_size"> + <param name="boolean" type="boolean" label="Modify usable sampled reads" value="false" /> + <when value="true"> + <param name="size" type="integer" label="Number of usable sampled reads (default = 200000)" value="200000" /> + </when> + </conditional> + </inputs> + <outputs> + <data format="txt" name="output" /> + </outputs> + <help> infer_experiment.py +++++++++++++++++++ @@ -42,13 +42,13 @@ ++++++++++++++ Input BAM/SAM file - Alignment file in BAM/SAM format. + Alignment file in BAM/SAM format. Reference gene model - Gene model in BED format. + Gene model in BED format. Number of usable sampled reads (default=200000) - Number of usable reads sampled from SAM/BAM file. More reads will give more accurate estimation, but make program little slower. + Number of usable reads sampled from SAM/BAM file. More reads will give more accurate estimation, but make program little slower. Outputs +++++++ @@ -88,37 +88,37 @@ **Example1** :: - ========================================================= - This is PairEnd Data :: + ========================================================= + This is PairEnd Data :: - Fraction of reads explained by "1++,1--,2+-,2-+": 0.4992 - Fraction of reads explained by "1+-,1-+,2++,2--": 0.5008 - Fraction of reads explained by other combinations: 0.0000 - ========================================================= + Fraction of reads explained by "1++,1--,2+-,2-+": 0.4992 + Fraction of reads explained by "1+-,1-+,2++,2--": 0.5008 + Fraction of reads explained by other combinations: 0.0000 + ========================================================= *Conclusion*: We can infer that this is NOT a strand specific because 50% of reads can be explained by "1++,1--,2+-,2-+", while the other 50% can be explained by "1+-,1-+,2++,2--". **Example2** :: - ============================================================ - This is PairEnd Data + ============================================================ + This is PairEnd Data - Fraction of reads explained by "1++,1--,2+-,2-+": 0.9644 :: - Fraction of reads explained by "1+-,1-+,2++,2--": 0.0356 - Fraction of reads explained by other combinations: 0.0000 - ============================================================ - + Fraction of reads explained by "1++,1--,2+-,2-+": 0.9644 :: + Fraction of reads explained by "1+-,1-+,2++,2--": 0.0356 + Fraction of reads explained by other combinations: 0.0000 + ============================================================ + *Conclusion*: We can infer that this is a strand-specific RNA-seq data. strandness of read1 is consistent with that of gene model, while strandness of read2 is opposite to the strand of reference gene model. **Example3** :: - ========================================================= - This is SingleEnd Data :: + ========================================================= + This is SingleEnd Data :: - Fraction of reads explained by "++,--": 0.9840 :: - Fraction of reads explained by "+-,-+": 0.0160 - Fraction of reads explained by other combinations: 0.0000 - ========================================================= + Fraction of reads explained by "++,--": 0.9840 :: + Fraction of reads explained by "+-,-+": 0.0160 + Fraction of reads explained by other combinations: 0.0000 + ========================================================= *Conclusion*: This is single-end, strand specific RNA-seq data. Strandness of reads are concordant with strandness of reference gene. @@ -137,5 +137,5 @@ .. _RSeQC: http://rseqc.sourceforge.net/ - </help> + </help> </tool>
--- a/inner_distance.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/inner_distance.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,57 +1,58 @@ -<tool id="inner_distance" name="Inner Distance" version="1.1"> - <description>calculate the inner distance (or insert size) between two paired RNA reads</description> - <requirements> - <requirement type="package" version="2.11.0">R</requirement> - <requirement type="package" version="1.7.1">numpy</requirement> - <requirement type="package" version="2.3.7">rseqc</requirement> - </requirements> - <command> inner_distance.py -i $input -o output -r $refgene - - #if $bounds.hasLowerBound - -l $bounds.lowerBound - #end if - - #if $bounds2.hasUpperBound - -u $bounds2.upperBound - #end if +<tool id="rseqc_inner_distance" name="Inner Distance" version="1.1"> + <description>calculate the inner distance (or insert size) between two paired RNA reads</description> + <requirements> + <requirement type="package" version="3.0.1">R</requirement> + <requirement type="package" version="1.7.1">numpy</requirement> + <requirement type="package" version="2.3.7">rseqc</requirement> + </requirements> + <command> + inner_distance.py -i $input -o output -r $refgene - #if $steps.step - -s $steps.stepSize - #end if - </command> - <inputs> - <param name="input" type="data" format="bam,sam" label="input bam/sam file" /> - <param name="refgene" type="data" format="bed" label="reference gene model" /> - <conditional name="bounds"> - <param name="hasLowerBound" type="boolean" label="Specify lower bound" value="false"/> - <when value="true"> - <param name="lowerBound" type="integer" value="-250" label="Estimated Lower Bound (bp, default=-250)" /> - </when> - </conditional> - <conditional name="bounds2"> - <param name="hasUpperBound" type="boolean" label="Specify upper bound" value="false" /> - <when value="true"> - <param name="upperBound" type="integer" value="250" label="Estimated Upper Bound (bp, default=250)" /> - </when> - </conditional> - <conditional name="steps"> - <param name="step" type="boolean" label="Specify step size" value="false" /> - <when value="true"> - <param name="stepSize" type="integer" value="5" label="Step size (bp, default=5)" /> - </when> - </conditional> - </inputs> - <outputs> - <data format="txt" name="outputtxt" from_work_dir="output.inner_distance.txt" label="${tool.name} on ${on_string} (Text)"/> - <data format="txt" name="outputfreqtxt" from_work_dir="output.inner_distance_freq.txt" label="${tool.name} on ${on_string} (Freq Text)" /> - <data format="pdf" name="outputpdf" from_work_dir="output.inner_distance_plot.pdf" label="${tool.name} on ${on_string} (PDF)" /> - <data format="r" name="outputr" from_work_dir="output.inner_distance_plot.r" label="${tool.name} on ${on_string} (R Script)" /> - </outputs> + #if $bounds.hasLowerBound + -l $bounds.lowerBound + #end if + + #if $bounds2.hasUpperBound + -u $bounds2.upperBound + #end if + + #if $steps.step + -s $steps.stepSize + #end if + </command> <stdio> <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" /> <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" /> </stdio> - <help> + <inputs> + <param name="input" type="data" format="bam,sam" label="input bam/sam file" /> + <param name="refgene" type="data" format="bed" label="reference gene model" /> + <conditional name="bounds"> + <param name="hasLowerBound" type="boolean" label="Specify lower bound" value="false"/> + <when value="true"> + <param name="lowerBound" type="integer" value="-250" label="Estimated Lower Bound (bp, default=-250)" /> + </when> + </conditional> + <conditional name="bounds2"> + <param name="hasUpperBound" type="boolean" label="Specify upper bound" value="false" /> + <when value="true"> + <param name="upperBound" type="integer" value="250" label="Estimated Upper Bound (bp, default=250)" /> + </when> + </conditional> + <conditional name="steps"> + <param name="step" type="boolean" label="Specify step size" value="false" /> + <when value="true"> + <param name="stepSize" type="integer" value="5" label="Step size (bp, default=5)" /> + </when> + </conditional> + </inputs> + <outputs> + <data format="txt" name="outputtxt" from_work_dir="output.inner_distance.txt" label="${tool.name} on ${on_string} (Text)"/> + <data format="txt" name="outputfreqtxt" from_work_dir="output.inner_distance_freq.txt" label="${tool.name} on ${on_string} (Freq Text)" /> + <data format="pdf" name="outputpdf" from_work_dir="output.inner_distance_plot.pdf" label="${tool.name} on ${on_string} (PDF)" /> + <data format="txt" name="outputr" from_work_dir="output.inner_distance_plot.r" label="${tool.name} on ${on_string} (R Script)" /> + </outputs> + <help> inner_distance.py +++++++++++++++++ @@ -71,16 +72,16 @@ ++++++++++++++ Input BAM/SAM file - Alignment file in BAM/SAM format. + Alignment file in BAM/SAM format. Reference gene model - Gene model in BED format. + Gene model in BED format. Estimated Upper/Lower Bounds (defaults=250 and -250) - Estimated upper/lower bounds of inner distance (bp). + Estimated upper/lower bounds of inner distance (bp). Step size (default=5) - Step size of histogram + Step size of histogram Output @@ -118,5 +119,5 @@ .. _RSeQC: http://rseqc.sourceforge.net/ - </help> + </help> </tool>
--- a/junction_annotation.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/junction_annotation.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,38 +1,38 @@ -<tool id="junction_annotation" name="Junction Annotation" version="1.1"> - <description>compares detected splice junctions to reference gene model</description> - <requirements> - <requirement type="package" version="2.11.0">R</requirement> - <requirement type="package" version="1.7.1">numpy</requirement> - <requirement type="package" version="2.3.7">rseqc</requirement> - </requirements> - <command> junction_annotation.py -i $input -o output -r $refgene - - #if $intron.hasIntron - -m $intron.min_Intron - #end if - - </command> - <inputs> - <param name="input" type="data" format="bam,sam" label="input bam/sam file" /> - <param name="refgene" type="data" format="bed" label="reference gene model" /> - <conditional name="intron"> - <param name="hasIntron" type="boolean" label="Specify minimum intron length" value="false"/> - <when value="true"> - <param name="min_Intron" type="integer" value="50" label="Minimum intron length (bp, default=50)" /> - </when> - </conditional> - </inputs> - <outputs> - <data format="xls" name="outputxls" from_work_dir="output.junction.xls" label="${tool.name} on ${on_string} (XLS)"/> - <data format="r" name="outputr" from_work_dir="output.junction_plot.r" label="${tool.name} on ${on_string} (R Script)" /> - <data format="pdf" name="outputpdf" from_work_dir="output.splice_events.pdf" label="${tool.name} on ${on_string} (Splice Events PDF)"/> - <data format="pdf" name="outputjpdf" from_work_dir="output.splice_junction.pdf" label="${tool.name} on ${on_string} (Splice Junction PDF)" /> - </outputs> +<tool id="rseqc_junction_annotation" name="Junction Annotation" version="1.1"> + <description>compares detected splice junctions to reference gene model</description> + <requirements> + <requirement type="package" version="3.0.1">R</requirement> + <requirement type="package" version="1.7.1">numpy</requirement> + <requirement type="package" version="2.3.7">rseqc</requirement> + </requirements> + <command> + junction_annotation.py + -i $input -o output -r $refgene + #if $intron.hasIntron + -m $intron.min_Intron + #end if + </command> <stdio> <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" /> <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" /> </stdio> - <help> + <inputs> + <param name="input" type="data" format="bam,sam" label="input bam/sam file" /> + <param name="refgene" type="data" format="bed" label="reference gene model" /> + <conditional name="intron"> + <param name="hasIntron" type="boolean" label="Specify minimum intron length" value="false"/> + <when value="true"> + <param name="min_Intron" type="integer" value="50" label="Minimum intron length (bp, default=50)" /> + </when> + </conditional> + </inputs> + <outputs> + <data format="xls" name="outputxls" from_work_dir="output.junction.xls" label="${tool.name} on ${on_string} (XLS)"/> + <data format="txt" name="outputr" from_work_dir="output.junction_plot.r" label="${tool.name} on ${on_string} (R Script)" /> + <data format="pdf" name="outputpdf" from_work_dir="output.splice_events.pdf" label="${tool.name} on ${on_string} (Splice Events PDF)"/> + <data format="pdf" name="outputjpdf" from_work_dir="output.splice_junction.pdf" label="${tool.name} on ${on_string} (Splice Junction PDF)" /> + </outputs> + <help> junction_annotation.py ++++++++++++++++++++++ @@ -54,13 +54,13 @@ ++++++++++++++ Input BAM/SAM file - Alignment file in BAM/SAM format. + Alignment file in BAM/SAM format. Reference gene model - Gene model in BED format. + Gene model in BED format. Minimum intron length (default=50) - Minimum intron length (bp). + Minimum intron length (bp). Output @@ -97,5 +97,5 @@ - </help> + </help> </tool>
--- a/junction_saturation.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/junction_saturation.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,40 +1,40 @@ -<tool id="junction_saturation" name="Junction Saturation" version="1.1"> - <description>detects splice junctions from each subset and compares them to reference gene model</description> - <requirements> - <requirement type="package" version="2.11.0">R</requirement> - <requirement type="package" version="1.7.1">numpy</requirement> - <requirement type="package" version="2.3.7">rseqc</requirement> - </requirements> - <command> junction_saturation.py -i $input -o output -r $refgene -m $intronSize -v $minSplice - - #if $percentiles.specifyPercentiles - -l $percentiles.lowBound -u $percentiles.upBound -s $percentiles.percentileStep - #end if +<tool id="rseqc_junction_saturation" name="Junction Saturation" version="1.1"> + <description>detects splice junctions from each subset and compares them to reference gene model</description> + <requirements> + <requirement type="package" version="3.0.1">R</requirement> + <requirement type="package" version="1.7.1">numpy</requirement> + <requirement type="package" version="2.3.7">rseqc</requirement> + </requirements> + <command> junction_saturation.py -i $input -o output -r $refgene -m $intronSize -v $minSplice - </command> - <inputs> - <param name="input" type="data" format="bam,sam" label="input bam/sam file" /> - <param name="refgene" type="data" format="bed" label="reference gene model" /> - <param name="intronSize" type="integer" label="Minimum intron size (bp, default=50)" value="50"/> - <param name="minSplice" type="integer" label="Minimum coverage (default=1)" value="1" /> - <conditional name="percentiles"> - <param name="specifyPercentiles" type="boolean" label="Specify sampling bounds and frequency" value="false"/> - <when value="true"> - <param name="lowBound" type="integer" value="5" label="Lower Bound Sampling Frequency (bp, default=5)" /> - <param name="upBound" type="integer" value="100" label="Upper Bound Sampling Frequency (bp, default=100)" /> - <param name="percentileStep" type="integer" value="5" label="Sampling increment (default=5)" /> - </when> - </conditional> - </inputs> - <outputs> - <data format="r" name="outputr" from_work_dir="output.junctionSaturation_plot.r" label="${tool.name} on ${on_string} (R Script)"/> - <data format="pdf" name="outputpdf" from_work_dir="output.junctionSaturation_plot.pdf" label="${tool.name} on ${on_string} (PDF)"/> - </outputs> + #if $percentiles.specifyPercentiles + -l $percentiles.lowBound -u $percentiles.upBound -s $percentiles.percentileStep + #end if + + </command> <stdio> <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" /> <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" /> </stdio> - <help> + <inputs> + <param name="input" type="data" format="bam,sam" label="input bam/sam file" /> + <param name="refgene" type="data" format="bed" label="reference gene model" /> + <param name="intronSize" type="integer" label="Minimum intron size (bp, default=50)" value="50"/> + <param name="minSplice" type="integer" label="Minimum coverage (default=1)" value="1" /> + <conditional name="percentiles"> + <param name="specifyPercentiles" type="boolean" label="Specify sampling bounds and frequency" value="false"/> + <when value="true"> + <param name="lowBound" type="integer" value="5" label="Lower Bound Sampling Frequency (bp, default=5)" /> + <param name="upBound" type="integer" value="100" label="Upper Bound Sampling Frequency (bp, default=100)" /> + <param name="percentileStep" type="integer" value="5" label="Sampling increment (default=5)" /> + </when> + </conditional> + </inputs> + <outputs> + <data format="txt" name="outputr" from_work_dir="output.junctionSaturation_plot.r" label="${tool.name} on ${on_string} (R Script)"/> + <data format="pdf" name="outputpdf" from_work_dir="output.junctionSaturation_plot.pdf" label="${tool.name} on ${on_string} (PDF)"/> + </outputs> + <help> junction_saturation.py ++++++++++++++++++++++ @@ -52,19 +52,19 @@ ++++++++++++++ Input BAM/SAM file - Alignment file in BAM/SAM format. + Alignment file in BAM/SAM format. Reference gene model - Gene model in BED format. + Gene model in BED format. Sampling Percentiles - Upper Bound, Lower Bound, Sampling Increment (defaults= 100, 5, and 5) - Sampling starts from the Lower Bound and increments to the Upper Bound at the rate of the Sampling Increment. + Sampling starts from the Lower Bound and increments to the Upper Bound at the rate of the Sampling Increment. Minimum intron length (default=50) - Minimum intron length (bp). + Minimum intron length (bp). Minimum coverage (default=1) - Minimum number of supportting reads to call a junction. + Minimum number of supportting reads to call a junction. Output ++++++++++++++ @@ -95,5 +95,5 @@ - </help> + </help> </tool>
--- a/read_GC.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/read_GC.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,25 +1,26 @@ -<tool id="read_GC" name="Read GC" version="1.1"> - <description>determines GC% and read count</description> - <requirements> - <requirement type="package" version="2.11.0">R</requirement> - <requirement type="package" version="1.7.1">numpy</requirement> - <requirement type="package" version="2.3.7">rseqc</requirement> - </requirements> - <command> read_GC.py -i $input -o output - </command> - <inputs> - <param name="input" type="data" format="bam,sam" label="input bam/sam file" /> - </inputs> - <outputs> - <data format="xls" name="outputxls" from_work_dir="output.GC.xls" label="${tool.name} on ${on_string} (XLS)"/> - <data format="r" name="outputr" from_work_dir="output.GC_plot.r" label="${tool.name} on ${on_string} (R Script)" /> - <data format="pdf" name="outputpdf" from_work_dir="output.GC_plot.pdf" label="${tool.name} on ${on_string} (PDF)" /> - </outputs> +<tool id="rseqc_read_GC" name="Read GC" version="1.1"> + <description>determines GC% and read count</description> + <requirements> + <requirement type="package" version="3.0.1">R</requirement> + <requirement type="package" version="1.7.1">numpy</requirement> + <requirement type="package" version="2.3.7">rseqc</requirement> + </requirements> + <command> + read_GC.py -i $input -o output + </command> <stdio> <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" /> <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" /> </stdio> - <help> + <inputs> + <param name="input" type="data" format="bam,sam" label="input bam/sam file" /> + </inputs> + <outputs> + <data format="xls" name="outputxls" from_work_dir="output.GC.xls" label="${tool.name} on ${on_string} (XLS)"/> + <data format="txt" name="outputr" from_work_dir="output.GC_plot.r" label="${tool.name} on ${on_string} (R Script)" /> + <data format="pdf" name="outputpdf" from_work_dir="output.GC_plot.pdf" label="${tool.name} on ${on_string} (PDF)" /> + </outputs> + <help> read_GC.py ++++++++++ @@ -28,7 +29,7 @@ ++++++++++++++ Input BAM/SAM file - Alignment file in BAM/SAM format. + Alignment file in BAM/SAM format. Output ++++++++++++++ @@ -56,5 +57,5 @@ .. _RSeQC: http://rseqc.sourceforge.net/ - </help> + </help> </tool>
--- a/read_NVC.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/read_NVC.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,30 +1,27 @@ -<tool id="read_NVC" name="Read NVC" version="1.1"> - <description>to check the nucleotide composition bias</description> - <requirements> - <requirement type="package" version="2.11.0">R</requirement> - <requirement type="package" version="1.7.1">numpy</requirement> - <requirement type="package" version="2.3.7">rseqc</requirement> - </requirements> - <command> read_NVC.py -i $input -o output - - #if $nx - -x - #end if - </command> - <inputs> - <param name="input" type="data" format="bam,sam" label="input bam/sam file" /> - <param name="nx" type="boolean" label="Include N,X in NVC plot" value="false" /> - </inputs> - <outputs> - <data format="xls" name="outputxls" from_work_dir="output.NVC.xls" label="${tool.name} on ${on_string} (XLS)" /> - <data format="r" name="outputr" from_work_dir="output.NVC_plot.r" label="${tool.name} on ${on_string} (R Script)" /> - <data format="pdf" name="outputpdf" from_work_dir="output.NVC_plot.pdf" label="${tool.name} on ${on_string} (PDF)" /> - </outputs> +<tool id="rseqc_read_NVC" name="Read NVC" version="1.1"> + <description>to check the nucleotide composition bias</description> + <requirements> + <requirement type="package" version="3.0.1">R</requirement> + <requirement type="package" version="1.7.1">numpy</requirement> + <requirement type="package" version="2.3.7">rseqc</requirement> + </requirements> + <command> + read_NVC.py -i $input -o output $nx + </command> <stdio> <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" /> <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" /> </stdio> - <help> + <inputs> + <param name="input" type="data" format="bam,sam" label="input bam/sam file" /> + <param name="nx" type="boolean" value="false" truevalue="-x" falsevalue="" label="Include N,X in NVC plot"/> + </inputs> + <outputs> + <data format="xls" name="outputxls" from_work_dir="output.NVC.xls" label="${tool.name} on ${on_string} (XLS)" /> + <data format="txt" name="outputr" from_work_dir="output.NVC_plot.r" label="${tool.name} on ${on_string} (R Script)" /> + <data format="pdf" name="outputpdf" from_work_dir="output.NVC_plot.pdf" label="${tool.name} on ${on_string} (PDF)" /> + </outputs> + <help> read_NVC.py +++++++++++ @@ -41,10 +38,10 @@ ++++++++++++++ Input BAM/SAM file - Alignment file in BAM/SAM format. + Alignment file in BAM/SAM format. Include N,X in NVC plot - Plots N and X alongside A, T, C, and G in plot. + Plots N and X alongside A, T, C, and G in plot. Output ++++++++++++++ @@ -76,5 +73,5 @@ .. _RSeQC: http://rseqc.sourceforge.net/ - </help> + </help> </tool>
--- a/read_distribution.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/read_distribution.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,23 +1,24 @@ -<tool id="read_distribution" name="Read Distribution" version="1.1"> - <description>calculates how mapped reads were distributed over genome feature</description> - <requirements> - <requirement type="package" version="1.7.1">numpy</requirement> - <requirement type="package" version="2.3.7">rseqc</requirement> - </requirements> - <command> read_distribution.py -i $input -r $refgene > $output - </command> - <inputs> - <param name="input" type="data" format="bam,sam" label="input bam/sam file" /> - <param name="refgene" type="data" format="bed" label="reference gene model" /> - </inputs> - <outputs> - <data format="txt" name="output" /> - </outputs> +<tool id="rseqc_read_distribution" name="Read Distribution" version="1.1"> + <description>calculates how mapped reads were distributed over genome feature</description> + <requirements> + <requirement type="package" version="1.7.1">numpy</requirement> + <requirement type="package" version="2.3.7">rseqc</requirement> + </requirements> + <command> + read_distribution.py -i $input -r $refgene > $output + </command> <stdio> <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" /> <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" /> </stdio> - <help> + <inputs> + <param name="input" type="data" format="bam,sam" label="input bam/sam file" /> + <param name="refgene" type="data" format="bed" label="reference gene model" /> + </inputs> + <outputs> + <data format="txt" name="output" /> + </outputs> + <help> read_distribution.py ++++++++++++++++++++ @@ -45,10 +46,10 @@ ++++++++++++++ Input BAM/SAM file - Alignment file in BAM/SAM format. + Alignment file in BAM/SAM format. Reference gene model - Gene model in BED format. + Gene model in BED format. Sample Output ++++++++++++++ @@ -85,5 +86,5 @@ - </help> + </help> </tool>
--- a/read_duplication.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/read_duplication.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,27 +1,28 @@ -<tool id="read_duplication" name="Read Duplication" version="1.1"> - <description>determines reads duplication rate with sequence-based and mapping-based strategies</description> - <requirements> - <requirement type="package" version="2.11.0">R</requirement> - <requirement type="package" version="1.7.1">numpy</requirement> - <requirement type="package" version="2.3.7">rseqc</requirement> - </requirements> - <command> read_duplication.py -i $input -o output -u $upLimit - </command> - <inputs> - <param name="input" type="data" format="bam,sam" label="input bam/sam file" /> - <param name="upLimit" type="integer" label="Upper Limit of Plotted Duplicated Times (default=500)" value="500" /> - </inputs> - <outputs> - <data format="xls" name="outputxls" from_work_dir="output.dup.pos.DupRate.xls" label="${tool.name} on ${on_string} (Position XLS)"/> - <data format="xls" name="outputseqxls" from_work_dir="output.dup.seq.DupRate.xls" label="${tool.name} on ${on_string} (Sequence XLS)"/> - <data format="r" name="outputr" from_work_dir="output.DupRate_plot.r" label="${tool.name} on ${on_string} (R Script)" /> - <data format="pdf" name="outputpdf" from_work_dir="output.DupRate_plot.pdf" label="${tool.name} on ${on_string} (PDF)" /> - </outputs> +<tool id="rseqc_read_duplication" name="Read Duplication" version="1.1"> + <description>determines reads duplication rate with sequence-based and mapping-based strategies</description> + <requirements> + <requirement type="package" version="3.0.1">R</requirement> + <requirement type="package" version="1.7.1">numpy</requirement> + <requirement type="package" version="2.3.7">rseqc</requirement> + </requirements> + <command> + read_duplication.py -i $input -o output -u $upLimit + </command> <stdio> <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" /> <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" /> </stdio> - <help> + <inputs> + <param name="input" type="data" format="bam,sam" label="input bam/sam file" /> + <param name="upLimit" type="integer" label="Upper Limit of Plotted Duplicated Times (default=500)" value="500" /> + </inputs> + <outputs> + <data format="xls" name="outputxls" from_work_dir="output.dup.pos.DupRate.xls" label="${tool.name} on ${on_string} (Position XLS)"/> + <data format="xls" name="outputseqxls" from_work_dir="output.dup.seq.DupRate.xls" label="${tool.name} on ${on_string} (Sequence XLS)"/> + <data format="txt" name="outputr" from_work_dir="output.DupRate_plot.r" label="${tool.name} on ${on_string} (R Script)" /> + <data format="pdf" name="outputpdf" from_work_dir="output.DupRate_plot.pdf" label="${tool.name} on ${on_string} (PDF)" /> + </outputs> + <help> read_duplication.py +++++++++++++++++++ @@ -34,10 +35,10 @@ ++++++++++++++ Input BAM/SAM file - Alignment file in BAM/SAM format. + Alignment file in BAM/SAM format. Upper Limit of Plotted Duplicated Times (default=500) - Only used for plotting. + Only used for plotting. Output ++++++++++++++ @@ -66,5 +67,5 @@ .. _RSeQC: http://rseqc.sourceforge.net/ - </help> + </help> </tool>
--- a/read_quality.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/read_quality.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,26 +1,27 @@ -<tool id="read_quality" name="Read Quality" version="1.1"> - <description>determines Phred quality score</description> - <requirements> - <requirement type="package" version="2.11.0">R</requirement> - <requirement type="package" version="1.7.1">numpy</requirement> - <requirement type="package" version="2.3.7">rseqc</requirement> - </requirements> - <command> read_quality.py -i $input -o output -r $reduce - </command> - <inputs> - <param name="input" type="data" format="bam,sam" label="input bam/sam file" /> - <param name="reduce" type="integer" label="Ignore Phred scores less than this amount (only applies to 'boxplot', default=1000)" value="1000" /> - </inputs> - <outputs> - <data format="r" name="outputr" from_work_dir="output.qual.r" label="${tool.name} on ${on_string} (R Script)" /> - <data format="pdf" name="outputpdf" from_work_dir="output.qual.heatmap.pdf" label="${tool.name} on ${on_string} (Heatmap PDF)" /> - <data format="pdf" name="outputpdf" from_work_dir="output.qual.boxplot.pdf" label="${tool.name} on ${on_string} (Boxplot PDF)" /> - </outputs> +<tool id="rseqc_read_quality" name="Read Quality" version="1.1"> + <description>determines Phred quality score</description> + <requirements> + <requirement type="package" version="3.0.1">R</requirement> + <requirement type="package" version="1.7.1">numpy</requirement> + <requirement type="package" version="2.3.7">rseqc</requirement> + </requirements> + <command> + read_quality.py -i $input -o output -r $reduce + </command> <stdio> <exit_code range="1:" level="fatal" description="An error occured during execution, see stderr and stdout for more information" /> <regex match="[Ee]rror" source="both" description="An error occured during execution, see stderr and stdout for more information" /> </stdio> - <help> + <inputs> + <param name="input" type="data" format="bam,sam" label="input bam/sam file" /> + <param name="reduce" type="integer" label="Ignore Phred scores less than this amount (only applies to 'boxplot', default=1000)" value="1000" /> + </inputs> + <outputs> + <data format="txt" name="outputr" from_work_dir="output.qual.r" label="${tool.name} on ${on_string} (R Script)" /> + <data format="pdf" name="outputpdf" from_work_dir="output.qual.heatmap.pdf" label="${tool.name} on ${on_string} (Heatmap PDF)" /> + <data format="pdf" name="outputpdf" from_work_dir="output.qual.boxplot.pdf" label="${tool.name} on ${on_string} (Boxplot PDF)" /> + </outputs> + <help> read_quality.py +++++++++++++++ @@ -37,10 +38,10 @@ ++++++++++++++ Input BAM/SAM file - Alignment file in BAM/SAM format. + Alignment file in BAM/SAM format. Ignore phred scores less than this number (default=1000) - To avoid making huge vector in R, nucleotide with certain phred score represented less than this number will be ignored. Increase this number save more memory while reduce precision. This option only applies to the 'boxplot'. + To avoid making huge vector in R, nucleotide with certain phred score represented less than this number will be ignored. Increase this number save more memory while reduce precision. This option only applies to the 'boxplot'. Output ++++++++++++++ @@ -73,5 +74,5 @@ .. _RSeQC: http://rseqc.sourceforge.net/ - </help> + </help> </tool>
--- a/samtoolshelper.py Wed Oct 02 02:20:04 2013 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,20 +0,0 @@ -import sys -import subprocess as sp -import os - -# Creates the sorted and indexed bam/bai files that are requried for both bam2wig and RSEQC_count -def samtools_sorted(bam): - sortedbam = bam + ".sorted" - indexedbam = ".".join([sortedbam,"bam.bai"]) - sp.call(['samtools', 'sort', '-m 1000000000', bam, sortedbam]) - sortedbam = sortedbam + '.bam' - sp.call(['samtools', 'index', sortedbam, indexedbam]) - return sortedbam - -def main(args): - args[2] = samtools_sorted(args[2]) - sp.call(args) - - -if __name__ == "__main__": - main(sys.argv[1:]) \ No newline at end of file
--- a/tool_dependencies.xml Wed Oct 02 02:20:04 2013 -0400 +++ b/tool_dependencies.xml Mon Oct 07 15:01:13 2013 -0400 @@ -1,49 +1,25 @@ <?xml version="1.0"?> <tool_dependency> - <package name="R" version="2.11.0"> + <package name="R" version="3.0.1"> + <repository changeset_revision="7473992d1f38" name="package_r_3_0_1" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" /> + </package> + <package name="numpy" version="1.7.1"> + <repository changeset_revision="028df1ddd7a2" name="package_numpy_1_7" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" /> + </package> + <package name="rseqc" version="2.3.7"> <install version="1.0"> <actions> - <action type="download_by_url">http://cran.rstudio.com/src/base/R-2/R-2.11.0.tar.gz</action> - <action type="shell_command"> - ./configure --enable-R-shlib \ - --with-readline=no \ - --with-x=no \ - --prefix=$INSTALL_DIR \ - --libdir=$INSTALL_DIR/lib \ - --disable-R-framework - </action> - <action type="shell_command">make && make install</action> + <action type="download_by_url">http://sourceforge.net/projects/rseqc/files/RSeQC-2.3.7.tar.gz</action> + <action type="shell_command">python setup.py install --install-lib $INSTALL_DIR/lib/python --install-scripts $INSTALL_DIR/bin</action> <action type="set_environment"> - <environment_variable action="set_to" name="R_HOME">$INSTALL_DIR/lib/R</environment_variable> - <environment_variable action="set_to" name="R_LIBS">$INSTALL_DIR/lib/R/library</environment_variable> - <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/lib/R/bin</environment_variable> + <environment_variable action="prepend_to" name="PYTHONPATH">$INSTALL_DIR/lib/python</environment_variable> + <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable> </action> </actions> </install> <readme> - R is a free software environment for statistical computing and graphics. - NOTE: See custom compilation options above - </readme> - </package> - <package name="numpy" version="1.7.1"> - <repository toolshed="http://toolshed.g2.bx.psu.edu/" name="package_numpy_1_7" owner="iuc" changeset_revision="74c21f9bdc39" /> - </package> - <package name="rseqc" version="2.3.7"> - <install version = "1.0"> - <actions> - <action type="download_by_url">http://sourceforge.net/projects/rseqc/files/RSeQC-2.3.7.tar.gz</action> - <action type="shell_command">python setup.py install --root $INSTALL_DIR --prefix . --install-lib lib</action> - <action type="set_environment"> - <environment_variable name="PYTHONPATH" action="prepend_to">$INSTALL_DIR/lib</environment_variable> - </action> - <action type="set_environment"> - <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable> - </action> - </actions> - </install> - <readme> - RSeQC version 2.3.7, documentation available at http://dldcc-web.brc.bcm.edu/lilab/liguow/CGI/rseqc/_build/html/index.html#. - Requires gcc, python, numpy, and R + RSeQC version 2.3.7, documentation available at http://dldcc-web.brc.bcm.edu/lilab/liguow/CGI/rseqc/_build/html/index.html. + Requires gcc and python. </readme> </package>