Mercurial > repos > iuc > bbgbigwig
changeset 0:498748c87252 draft
planemo upload for repository https://www.encodeproject.org/software/bedgraphtobigwig/ commit fa9d44f1ca94a7522f21db6c7771764e1b8d92a5
author | iuc |
---|---|
date | Fri, 14 Jun 2024 21:23:54 +0000 |
parents | |
children | b95435276746 |
files | bam_bed_gff_to_bigwig.xml gff_to_bed_converter.py test-data/dbkeys.loc.test test-data/featureCounts_input1.bam test-data/featureCounts_input1.bigwig test-data/merlin.bed.bigwig test-data/merlin.gff.bigwig test-data/srma_out2.bam test-data/srma_out2.bigwig test-data/test-6.bed test-data/test5.bed test-data/test5.bed.bigwig test-data/test5.gff.bigwig test-data/test5.gff3 test-data/testing.len tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
diffstat | 17 files changed, 246 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bam_bed_gff_to_bigwig.xml Fri Jun 14 21:23:54 2024 +0000 @@ -0,0 +1,109 @@ +<tool id="bbgtobigwig" name="BAM BED GFF coverage bigWigs" version="0.1" profile="22.05"> + <xrefs> + <xref type="bio.tools">UCSC_Genome_Browser_Utilities</xref> + </xrefs> + <requirements> + <requirement type="package" version="455">ucsc-bedgraphtobigwig</requirement> + <requirement type="package" version="2.31.1">bedtools</requirement> + <requirement type="package" version="9.5">coreutils</requirement> + <requirement type="package" version="3.12.3">python</requirement> + </requirements> + <required_files> + <include path="gff_to_bed_converter.py"/> + </required_files> + <command detect_errors="aggressive"><![CDATA[ +#if $hist_or_builtin.genosrc == "indexed": + ln -s '$hist_or_builtin.chromfile.fields.len_path' ./CHROMFILE && +#else: + ln -s '$chromfile' ./CHROMFILE && +#end if +#if $input1.ext in ['gff', 'gff3']: + python '$__tool_directory__/gff_to_bed_converter.py' < '$input1' > input2 && +#else: + ln -s '$input1' input2 && +#end if +#if $input1.ext == "bam": + bedtools genomecov -bg -split -ibam input2 | +#else + bedtools genomecov -bg -i input2 -g ./CHROMFILE | +#end if +LC_COLLATE=C sort -k1,1 -k2,2n > temp.bg && +bedGraphToBigWig temp.bg ./CHROMFILE '$output' + ]]></command> + <inputs> + <conditional name="hist_or_builtin"> + <param name="genosrc" type="select" label="Is the input assigned to a built-in or custom reference genome?" + help="If the input has no dbkey, supply a chromosome lengths file"> + <option selected="True" value="indexed">Input data was made with a built-in genome or already has a custom genome dbkey</option> + <option value="history">Input data mapped on a genome from the current history. The chromosome lengths file is also in the history</option> + </param> + <when value="indexed"> + <param name="input1" type="data" format="bam,unsorted.bam,bed,gff,gff3" label="bam/bed/gff to convert"> + <validator type="unspecified_build" /> + </param> + <param name="chromfile" type="select" label="Source Genome Build"> + <options from_data_table="__dbkeys__"> + <filter type="data_meta" column="0" key="dbkey" ref="input1"/> + </options> + <validator type="no_options" message="The chosen genome build is not available."/> + </param> + </when> + <when value="history"> + <param name="input1" type="data" format="bam,unsorted.bam,bed,gff,gff3" label="bam/bed/gff to convert"/> + <param name="chromfile" type="data" format="len,txt,tabular" label="Chromosome length file" + help="Sequence lengths for the history reference are required to make a bigwig. Compute sequence length tool makes these from fasta files"/> + </when> + </conditional> + </inputs> + <outputs> + <data name="output" format="bigwig"/> + </outputs> + <tests> + <test expect_num_outputs="1"> + <conditional name="hist_or_builtin"> + <param name="genosrc" value="indexed"/> + <param name="input1" value="featureCounts_input1.bam" dbkey="hg38"/> + <param name="chromfile" value="hg38"/> + </conditional> + <output name="output" value="featureCounts_input1.bigwig" compare="sim_size"/> + </test> + <test expect_num_outputs="1"> + <conditional name="hist_or_builtin"> + <param name="genosrc" value="history"/> + <param name="input1" value="srma_out2.bam"/> + <param name="chromfile" value="testing.len"/> + </conditional> + <output name="output" value="srma_out2.bigwig" compare="sim_size"/> + </test> + <test expect_num_outputs="1"> + <conditional name="hist_or_builtin"> + <param name="genosrc" value="history"/> + <param name="input1" value="test5.gff3"/> + <param name="chromfile" value="testing.len"/> + </conditional> + <output name="output" value="test5.gff.bigwig" compare="sim_size"/> + </test> + <test expect_num_outputs="1"> + <conditional name="hist_or_builtin"> + <param name="genosrc" value="history"/> + <param name="input1" value="test5.bed"/> + <param name="chromfile" value="testing.len"/> + </conditional> + <output name="output" value="test5.bed.bigwig" compare="sim_size"/> + </test> + </tests> + <help> + + Estimates coverage of a reference genome for bam, bed or gff as a bigwig, suitable for viewing in JBrowse2 or other browser. + + A chromosome lengths file must be provided if the input has a missing dbkey='?' on the pencil (edit attributes) tab. + + The actual reference is not needed. The Compute sequence length tool can generate the lengths file. + + This can be useful in workflows with assemblies in progress before a stable reference is available for a custom or built in reference dbkey. + + </help> + <citations> + <citation type="doi">10.1093/bioinformatics/btq351</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gff_to_bed_converter.py Fri Jun 14 21:23:54 2024 +0000 @@ -0,0 +1,33 @@ +#!/usr/bin/env python + +import sys + +assert sys.version_info[:2] >= (2, 6) + + +def __main__(): + skipped_lines = 0 + first_skipped_line = None + # was sys.argv[2] but we need stdout for a pipe in bam_bed_gff_to_bigwig.xml + for i, line in enumerate(sys.stdin): + line = line.rstrip("\r\n") + if line and not line.startswith("#"): + try: + elems = line.split("\t") + start = str(int(elems[3]) - 1) + endoff = str(int(elems[4]) - 1) + # GFF format: chrom, source, name, chromStart, chromEnd, score, strand + # bedtools puts out only 4 fields: chrom, chromStart, chromEnd, score + sys.stdout.write(f"{elems[0]}\t{start}\t{endoff}\t0\n") + except Exception: + skipped_lines += 1 + if not first_skipped_line: + first_skipped_line = i + 1 + else: + skipped_lines += 1 + if not first_skipped_line: + first_skipped_line = i + 1 + + +if __name__ == "__main__": + __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/dbkeys.loc.test Fri Jun 14 21:23:54 2024 +0000 @@ -0,0 +1,2 @@ +#<dbkey> <display_name> <len_file_path> +hg38 hg38 ${__HERE__}/testing.len
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test-6.bed Fri Jun 14 21:23:54 2024 +0000 @@ -0,0 +1,3 @@ +Merlin 49 1452 chromosomal_replication_initiator_protein_DnaA 0 + +Merlin 1457 2557 DNA_polymerase_III_subunit_beta 0 + +Merlin 2557 3630 DNA_replication_and_repair_protein_RecF 0 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test5.bed Fri Jun 14 21:23:54 2024 +0000 @@ -0,0 +1,17 @@ +chr7 115444712 115444739 CCDS5763.1_cds_0_0_chr7_115444713_f 0 + +chr7 115468538 115468624 CCDS5763.1_cds_1_0_chr7_115468539_f 0 + +chr7 115483024 115483277 CCDS5763.1_cds_2_0_chr7_115483025_f 0 + +chr7 115484165 115484501 CCDS5763.1_cds_3_0_chr7_115484166_f 0 + +chr7 115485764 115485980 CCDS5763.1_cds_4_0_chr7_115485765_f 0 + +chr7 115486322 115486481 CCDS5763.1_cds_5_0_chr7_115486323_f 0 + +chr7 115491298 115491487 CCDS5763.1_cds_6_0_chr7_115491299_f 0 + +chr7 115468538 115468624 CCDS5764.1_cds_0_0_chr7_115468539_f 0 + +chr7 115483024 115483277 CCDS5764.1_cds_1_0_chr7_115483025_f 0 + +chr7 115484165 115484501 CCDS5764.1_cds_2_0_chr7_115484166_f 0 + +chr7 115485764 115485980 CCDS5764.1_cds_3_0_chr7_115485765_f 0 + +chr7 115486322 115486481 CCDS5764.1_cds_4_0_chr7_115486323_f 0 + +chr7 115491298 115491487 CCDS5764.1_cds_5_0_chr7_115491299_f 0 + +chr7 115733786 115733936 CCDS5766.1_cds_0_0_chr7_115733787_f 0 + +chr7 115734264 115734452 CCDS5766.1_cds_1_0_chr7_115734265_f 0 + +chr7 115739975 115740126 CCDS5766.1_cds_2_0_chr7_115739976_f 0 + +chr7 115733786 115733936 CCDS5765.1_cds_0_0_chr7_115733787_f 0 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test5.gff3 Fri Jun 14 21:23:54 2024 +0000 @@ -0,0 +1,38 @@ +##gff-version 3 +##date Tue Jun 26 10:48:17 2007 +##sequence-region ctgA 1 50000 +##source gbrowse GFFToGalaxyDumper plugin +##NOTE: All features dumped. +ctgA example my_feature 22132 24633 . + . ID=My_feature:f15 +ctgA example my_feature 46990 48410 . - . ID=My_feature:f11 +ctgA example my_feature 44705 47713 . - . ID=My_feature:f01 +ctgA example my_feature 36649 40440 . - . ID=My_feature:f03 +ctgA example my_feature 23072 23185 . + . ID=My_feature:f14 +ctgA example my_feature 37242 38653 . + . ID=My_feature:f04 +ctgA example motif 37497 40559 . - . ID=Motif:m15;Note=7-transmembrane +ctgA example my_feature 36034 38167 . + . ID=My_feature:f09 +ctgA example motif 28332 30033 . - . ID=Motif:m02;Note=HOX +ctgA example my_feature 4715 5968 . - . ID=My_feature:f05 +ctgA example motif 48253 48366 . + . ID=Motif:m01;Note=WD40 +ctgA example BAC 1000 20000 . . . ID=BAC:b101.2;Note=Fingerprinted+BAC+with+end+reads +ctgA example right_end_read 19500 20000 . - . Parent=BAC:b101.2 +ctgA example left_end_read 1000 1500 . + . Parent=BAC:b101.2 +ctgA example motif 13801 14007 . - . ID=Motif:m05;Note=helix+loop+helix +ctgA example coding 1050 9000 . + . ID=mRNA:EDEN.1;Gene=EDEN +ctgA example CDS 1201 1500 . + 0 Parent=mRNA:EDEN.1 +ctgA example CDS 3000 3902 . + 0 Parent=mRNA:EDEN.1 +ctgA example CDS 5000 5500 . + 0 Parent=mRNA:EDEN.1 +ctgA example CDS 7000 7608 . + 0 Parent=mRNA:EDEN.1 +ctgA example processed_transcript 1050 9000 . + . ID=mRNA:EDEN.1 +ctgA example 5'-UTR 1050 1200 . + . Parent=mRNA:EDEN.1 +ctgA example 3'-UTR 7609 9000 . + . Parent=mRNA:EDEN.1 +ctgA est match 5410 7503 . - . ID=EST:agt830.3;Target=agt830.3+1+595 +ctgA est HSP 7000 7503 . - . Parent=EST:agt830.3;Target=agt830.3+1+504 +ctgA est HSP 5410 5500 . - . Parent=EST:agt830.3;Target=agt830.3+505+595 +ctgA example motif 46012 48851 . + . ID=Motif:m09;Note=kinase +ctgA example match 6885 8999 . - . ID=Match:seg03 +ctgA example HSP 8306 8999 . - . Parent=Match:seg03 +ctgA example HSP 8055 8080 . - . Parent=Match:seg03 +ctgA example HSP 7410 7737 . - . Parent=Match:seg03 +ctgA example HSP 6885 7241 . - . Parent=Match:seg03 +ctgA example my_feature 13280 16394 . + . ID=My_feature:f08
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/testing.len Fri Jun 14 21:23:54 2024 +0000 @@ -0,0 +1,30 @@ +dummy_chr 100000000 +chr1 1000000000 +X 1000000000 +16 1000000000 +ctgA 1000000000 +Merlin 10000000 +super_1 1000000000 +chr1 1000000000 +chr7 2000000000 +chrX 2000000000 +phiX174 100000000 +random_phiX_region_1 100000000 +random_phiX_region_2 100000000 +random_phiX_region_3 100000000 +random_phiX_region_4 100000000 +random_phiX_region_5 100000000 +random_phiX_region_6 100000000 +random_phiX_region_7 100000000 +random_phiX_region_8 100000000 +random_phiX_region_9 100000000 +random_phiX_region_10 100000000 +random_phiX_region_11 100000000 +random_phiX_region_12 100000000 +random_phiX_region_13 100000000 +random_phiX_region_14 100000000 +random_phiX_region_15 100000000 +random_phiX_region_16 100000000 +random_phiX_region_17 100000000 +random_phiX_region_18 100000000 +random_phiX_region_19 100000000
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Fri Jun 14 21:23:54 2024 +0000 @@ -0,0 +1,7 @@ +<tables> + <!-- Locations of dbkeys and len files under genome directory --> + <table name="__dbkeys__" comment_char="#"> + <columns>value, name, len_path</columns> + <file path="tool-data/dbkeys.loc" /> + </table> +</tables> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Fri Jun 14 21:23:54 2024 +0000 @@ -0,0 +1,7 @@ +<tables> + <!-- Locations of dbkeys and len files under genome directory --> + <table name="__dbkeys__" comment_char="#" allow_duplicate_entries="False"> + <columns>value, name, len_path</columns> + <file path="${__HERE__}/test-data/dbkeys.loc.test" /> + </table> +</tables>