Mercurial > repos > fubar > jbrowse2
comparison jbrowse2.py @ 46:4181e97c70a7 draft
planemo upload for repository https://github.com/usegalaxy-eu/temporary-tools/tree/master/jbrowse2 commit 3a43e9e0ffce0966101203102e769d1ced28618a
author | fubar |
---|---|
date | Mon, 04 Mar 2024 09:47:19 +0000 |
parents | bea0916e1080 |
children | 3e53204c2419 |
comparison
equal
deleted
inserted
replaced
45:0ec526d2d8c1 | 46:4181e97c70a7 |
---|---|
9 import re | 9 import re |
10 import shutil | 10 import shutil |
11 import struct | 11 import struct |
12 import subprocess | 12 import subprocess |
13 import tempfile | 13 import tempfile |
14 import urllib.request | |
14 import xml.etree.ElementTree as ET | 15 import xml.etree.ElementTree as ET |
15 from collections import defaultdict | 16 from collections import defaultdict |
16 | 17 |
17 logging.basicConfig(level=logging.INFO) | 18 logging.basicConfig(level=logging.INFO) |
18 log = logging.getLogger("jbrowse") | 19 log = logging.getLogger("jbrowse") |
444 ] | 445 ] |
445 } | 446 } |
446 | 447 |
447 def process_genomes(self): | 448 def process_genomes(self): |
448 assemblies = [] | 449 assemblies = [] |
450 useuri = False | |
449 for i, genome_node in enumerate(self.genome_paths): | 451 for i, genome_node in enumerate(self.genome_paths): |
452 if genome_node["useuri"].strip().lower() == "yes": | |
453 useuri = True | |
450 genome_name = genome_node["meta"]["dataset_dname"].strip() | 454 genome_name = genome_node["meta"]["dataset_dname"].strip() |
451 if len(genome_name.split()) > 1: | 455 if len(genome_name.split()) > 1: |
452 genome_name = genome_name.split()[0] | 456 genome_name = genome_name.split()[0] |
453 # spaces and cruft break scripts when substituted | 457 # spaces and cruft break scripts when substituted |
454 if genome_name not in self.genome_names: | 458 if genome_name not in self.genome_names: |
455 # ignore dupes - can have multiple pafs with same references? | 459 # ignore dupes - can have multiple pafs with same references? |
456 fapath = genome_node["path"] | 460 fapath = genome_node["path"] |
457 assem = self.make_assembly(fapath, genome_name) | 461 if not useuri: |
462 fapath = os.path.realpath(fapath) | |
463 assem = self.make_assembly(fapath, genome_name, useuri) | |
458 assemblies.append(assem) | 464 assemblies.append(assem) |
459 self.genome_names.append(genome_name) | 465 self.genome_names.append(genome_name) |
460 if self.genome_name is None: | 466 if self.genome_name is None: |
461 self.genome_name = ( | 467 self.genome_name = ( |
462 genome_name # first one for all tracks - other than paf | 468 genome_name # first one for all tracks - other than paf |
463 ) | 469 ) |
464 self.genome_firstcontig = None | 470 self.genome_firstcontig = None |
465 fl = open(fapath, "r").readline().strip().split(">") | 471 if not useuri: |
466 if len(fl) > 1: | 472 # https://lazarus.name/jbrowse/fish/bigwig_0_coverage_bedgraph_cov_count_count_bw.bigwig |
467 fl = fl[1] | 473 # https://lazarus.name/jbrowse/fish/klBraLanc5.haps_combined.decontam.20230620.fasta.fa.gz |
468 if len(fl.split()) > 1: | 474 fl = open(fapath, "r").readline() |
469 self.genome_firstcontig = fl.split()[0].strip() | 475 fls = fl.strip().split(">") |
476 if len(fls) > 1: | |
477 fl = fls[1] | |
478 if len(fl.split()) > 1: | |
479 self.genome_firstcontig = fl.split()[0].strip() | |
480 else: | |
481 self.genome_firstcontig = fl | |
470 else: | 482 else: |
471 self.genome_firstcontig = fl | 483 fl = urrlib.request.urlopen(faname+".fai").readline() |
484 if fl: # is first row of the text fai so the first contig name | |
485 self.genome_firstcontig = fl.decode('utf8').strip().split()[0] | |
472 if self.config_json.get("assemblies", None): | 486 if self.config_json.get("assemblies", None): |
473 self.config_json["assemblies"] += assemblies | 487 self.config_json["assemblies"] += assemblies |
474 else: | 488 else: |
475 self.config_json["assemblies"] = assemblies | 489 self.config_json["assemblies"] = assemblies |
476 | 490 |
477 def make_assembly(self, fapath, gname): | 491 def make_assembly(self, fapath, gname, useuri): |
478 | 492 if useuri: |
479 faname = gname + ".fa.gz" | 493 faname = fapath |
480 fadest = os.path.realpath(os.path.join(self.outdir, faname)) | 494 adapter = { |
481 cmd = "bgzip -i -c %s -I %s.gzi > %s && samtools faidx %s" % ( | 495 "type": "BgzipFastaAdapter", |
482 fapath, | 496 "fastaLocation": { |
483 fadest, | 497 "uri": faname, |
484 fadest, | 498 "locationType": "UriLocation", |
485 fadest, | 499 }, |
486 ) | 500 "faiLocation": { |
487 self.subprocess_popen(cmd) | 501 "uri": faname + ".fai", |
488 adapter = { | 502 "locationType": "UriLocation", |
489 "type": "BgzipFastaAdapter", | 503 }, |
490 "fastaLocation": { | 504 "gziLocation": { |
491 "uri": faname, | 505 "uri": faname + ".gzi", |
492 }, | 506 "locationType": "UriLocation", |
493 "faiLocation": { | 507 }, |
494 "uri": faname + ".fai", | 508 } |
495 }, | 509 else: |
496 "gziLocation": { | 510 faname = gname + ".fa.gz" |
497 "uri": faname + ".gzi", | 511 fadest = os.path.realpath(os.path.join(self.outdir, faname)) |
498 }, | 512 cmd = "bgzip -i -c %s -I %s.gzi > %s && samtools faidx %s" % ( |
499 } | 513 fapath, |
514 fadest, | |
515 fadest, | |
516 fadest, | |
517 ) | |
518 self.subprocess_popen(cmd) | |
519 | |
520 adapter = { | |
521 "type": "BgzipFastaAdapter", | |
522 "fastaLocation": { | |
523 "uri": faname, | |
524 }, | |
525 "faiLocation": { | |
526 "uri": faname + ".fai", | |
527 }, | |
528 "gziLocation": { | |
529 "uri": faname + ".gzi", | |
530 }, | |
531 } | |
500 self.genome_sequence_adapter = adapter | 532 self.genome_sequence_adapter = adapter |
501 trackDict = { | 533 trackDict = { |
502 "name": gname, | 534 "name": gname, |
503 "sequence": { | 535 "sequence": { |
504 "type": "ReferenceSequenceTrack", | 536 "type": "ReferenceSequenceTrack", |
526 ] | 558 ] |
527 self.subprocess_check_call(cmd) | 559 self.subprocess_check_call(cmd) |
528 | 560 |
529 def write_config(self): | 561 def write_config(self): |
530 with open(self.config_json_file, "w") as fp: | 562 with open(self.config_json_file, "w") as fp: |
531 json.dump(self.config_json, fp) | 563 json.dump(self.config_json, fp, indent=2) |
532 | 564 |
533 def text_index(self): | 565 def text_index(self): |
534 # Index tracks | 566 # Index tracks |
535 args = [ | 567 args = [ |
536 "jbrowse", | 568 "jbrowse", |
565 """ | 597 """ |
566 tId = trackData["label"] | 598 tId = trackData["label"] |
567 # can be served - if public. | 599 # can be served - if public. |
568 # dsId = trackData["metadata"]["dataset_id"] | 600 # dsId = trackData["metadata"]["dataset_id"] |
569 # url = "%s/api/datasets/%s/display?to_ext=hic " % (self.giURL, dsId) | 601 # url = "%s/api/datasets/%s/display?to_ext=hic " % (self.giURL, dsId) |
570 hname = trackData["hic_url"] | 602 useuri = trackData["useuri"].lower() == "yes" |
571 floc = { | 603 if useuri: |
572 "uri": hname, | 604 uri = data |
573 } | 605 else: |
606 uri = trackData["hic_url"] | |
574 trackDict = { | 607 trackDict = { |
575 "type": "HicTrack", | 608 "type": "HicTrack", |
576 "trackId": tId, | 609 "trackId": tId, |
577 "name": hname, | 610 "name": uri, |
578 "assemblyNames": [self.genome_name], | 611 "assemblyNames": [self.genome_name], |
579 "adapter": { | 612 "adapter": { |
580 "type": "HicAdapter", | 613 "type": "HicAdapter", |
581 "hicLocation": floc, | 614 "hicLocation": uri, |
582 }, | 615 }, |
583 "displays": [ | 616 "displays": [ |
584 { | 617 { |
585 "type": "LinearHicDisplay", | 618 "type": "LinearHicDisplay", |
586 "displayId": "%s-LinearHicDisplay" % tId, | 619 "displayId": "%s-LinearHicDisplay" % tId, |
597 from https://github.com/cmdcolin/maf2bed | 630 from https://github.com/cmdcolin/maf2bed |
598 Note: Both formats start with a MAF as input, and note that your MAF file should contain the species name and chromosome name | 631 Note: Both formats start with a MAF as input, and note that your MAF file should contain the species name and chromosome name |
599 e.g. hg38.chr1 in the sequence identifiers. | 632 e.g. hg38.chr1 in the sequence identifiers. |
600 need the reference id - eg hg18, for maf2bed.pl as the first parameter | 633 need the reference id - eg hg18, for maf2bed.pl as the first parameter |
601 """ | 634 """ |
635 tId = trackData["label"] | |
602 mafPlugin = { | 636 mafPlugin = { |
603 "plugins": [ | 637 "plugins": [ |
604 { | 638 { |
605 "name": "MafViewer", | 639 "name": "MafViewer", |
606 "url": "https://unpkg.com/jbrowse-plugin-mafviewer/dist/jbrowse-plugin-mafviewer.umd.production.min.js", | 640 "url": "https://unpkg.com/jbrowse-plugin-mafviewer/dist/jbrowse-plugin-mafviewer.umd.production.min.js", |
607 } | 641 } |
608 ] | 642 ] |
609 } | 643 } |
610 tId = trackData["label"] | 644 |
611 fname = "%s.bed" % tId | 645 fname = "%s.bed" % tId |
612 dest = "%s/%s" % (self.outdir, fname) | 646 dest = "%s/%s" % (self.outdir, fname) |
613 gname = self.genome_name | 647 gname = self.genome_name |
614 cmd = [ | 648 cmd = [ |
615 "bash", | 649 "bash", |
742 "resolution": 1, | 776 "resolution": 1, |
743 "posColor": "rgb(228, 26, 28)", | 777 "posColor": "rgb(228, 26, 28)", |
744 "negColor": "rgb(255, 255, 51)", | 778 "negColor": "rgb(255, 255, 51)", |
745 "constraints": {} | 779 "constraints": {} |
746 """ | 780 """ |
747 url = "%s.bigwig" % trackData["label"] | 781 useuri = trackData["useuri"].lower() == "yes" |
748 # slashes in names cause path trouble | 782 if useuri: |
749 dest = os.path.join(self.outdir, url) | 783 url = data |
750 cmd = ["cp", data, dest] | 784 else: |
751 self.subprocess_check_call(cmd) | 785 url = "%s.bigwig" % trackData["label"] |
786 # slashes in names cause path trouble | |
787 dest = os.path.join(self.outdir, url) | |
788 cmd = ["cp", data, dest] | |
789 self.subprocess_check_call(cmd) | |
752 bwloc = {"uri": url} | 790 bwloc = {"uri": url} |
753 tId = trackData["label"] | 791 tId = trackData["label"] |
754 trackDict = { | 792 trackDict = { |
755 "type": "QuantitativeTrack", | 793 "type": "QuantitativeTrack", |
756 "trackId": tId, | 794 "trackId": tId, |
772 style_json = self._prepare_track_style(trackDict) | 810 style_json = self._prepare_track_style(trackDict) |
773 trackDict["style"] = style_json | 811 trackDict["style"] = style_json |
774 self.tracksToAdd.append(trackDict) | 812 self.tracksToAdd.append(trackDict) |
775 self.trackIdlist.append(tId) | 813 self.trackIdlist.append(tId) |
776 | 814 |
777 def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs): | 815 def add_bam(self, data, trackData, bam_index=None, **kwargs): |
778 tId = trackData["label"] | 816 tId = trackData["label"] |
779 fname = "%s.bam" % trackData["label"] | 817 useuri = trackData["useuri"].lower() == "yes" |
780 dest = "%s/%s" % (self.outdir, fname) | 818 bindex = bam_index |
781 url = fname | 819 if useuri: |
782 self.subprocess_check_call(["cp", data, dest]) | 820 url = data |
783 bloc = {"uri": url} | |
784 if bam_index is not None and os.path.exists(os.path.realpath(bam_index)): | |
785 # bai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest | |
786 self.subprocess_check_call( | |
787 ["cp", os.path.realpath(bam_index), dest + ".bai"] | |
788 ) | |
789 else: | 821 else: |
790 # Can happen in exotic condition | 822 fname = "%s.bam" % trackData["label"] |
791 # e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam | 823 dest = "%s/%s" % (self.outdir, fname) |
792 # => no index generated by galaxy, but there might be one next to the symlink target | 824 url = fname |
793 # this trick allows to skip the bam sorting made by galaxy if already done outside | 825 bindex = fname + '.bai' |
794 if os.path.exists(os.path.realpath(data) + ".bai"): | 826 self.subprocess_check_call(["cp", data, dest]) |
795 self.symlink_or_copy(os.path.realpath(data) + ".bai", dest + ".bai") | 827 if bam_index is not None and os.path.exists(bam_index): |
796 else: | 828 if not os.path.exists(bindex): |
797 log.warn("Could not find a bam index (.bai file) for %s", data) | 829 # bai most probably made by galaxy and stored in galaxy dirs, need to copy it to dest |
830 self.subprocess_check_call( | |
831 ["cp", bam_index, bindex] | |
832 ) | |
833 else: | |
834 # Can happen in exotic condition | |
835 # e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam | |
836 # => no index generated by galaxy, but there might be one next to the symlink target | |
837 # this trick allows to skip the bam sorting made by galaxy if already done outside | |
838 if os.path.exists(os.path.realpath(data) + ".bai"): | |
839 self.symlink_or_copy(os.path.realpath(data) + ".bai", bindex) | |
840 else: | |
841 log.warn("Could not find a bam index (.bai file) for %s", data) | |
798 trackDict = { | 842 trackDict = { |
799 "type": "AlignmentsTrack", | 843 "type": "AlignmentsTrack", |
800 "trackId": tId, | 844 "trackId": tId, |
801 "name": trackData["name"], | 845 "name": trackData["name"], |
802 "assemblyNames": [self.genome_name], | 846 "assemblyNames": [self.genome_name], |
803 "adapter": { | 847 "adapter": { |
804 "type": "BamAdapter", | 848 "type": "BamAdapter", |
805 "bamLocation": bloc, | 849 "bamLocation": {"uri": url}, |
806 "index": { | 850 "index": { |
807 "location": { | 851 "location": { |
808 "uri": fname + ".bai", | 852 "uri": bindex, |
809 } | 853 } |
810 }, | 854 }, |
811 }, | 855 }, |
812 "displays": [ | 856 "displays": [ |
813 { | 857 { |
819 style_json = self._prepare_track_style(trackDict) | 863 style_json = self._prepare_track_style(trackDict) |
820 trackDict["style"] = style_json | 864 trackDict["style"] = style_json |
821 self.tracksToAdd.append(trackDict) | 865 self.tracksToAdd.append(trackDict) |
822 self.trackIdlist.append(tId) | 866 self.trackIdlist.append(tId) |
823 | 867 |
824 def add_cram(self, data, trackData, cramOpts, cram_index=None, **kwargs): | 868 def add_cram(self, data, trackData, cram_index=None, **kwargs): |
825 tId = trackData["label"] | 869 tId = trackData["label"] |
826 fname = "%s.cram" % trackData["label"] | 870 useuri = trackData["useuri"].lower() == "yes" |
827 dest = "%s/%s" % (self.outdir, fname) | 871 bindex = cram_index |
828 url = fname | 872 if useuri: |
829 self.subprocess_check_call(["cp", data, dest]) | 873 url = data |
830 bloc = {"uri": url} | |
831 if cram_index is not None and os.path.exists(os.path.realpath(cram_index)): | |
832 # most probably made by galaxy and stored in galaxy dirs, need to copy it to dest | |
833 self.subprocess_check_call( | |
834 ["cp", os.path.realpath(cram_index), dest + ".crai"] | |
835 ) | |
836 else: | 874 else: |
837 # Can happen in exotic condition | 875 fname = "%s.cram" % trackData["label"] |
838 # e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam | 876 dest = "%s/%s" % (self.outdir, fname) |
839 # => no index generated by galaxy, but there might be one next to the symlink target | 877 bindex = fname + '.bai' |
840 # this trick allows to skip the bam sorting made by galaxy if already done outside | 878 url = fname |
841 if os.path.exists(os.path.realpath(data) + ".crai"): | 879 self.subprocess_check_call(["cp", data, dest]) |
842 self.symlink_or_copy(os.path.realpath(data) + ".crai", dest + ".crai") | 880 |
843 else: | 881 if bindex is not None and os.path.exists(bindex): |
844 log.warn("Could not find a cram index (.crai file) for %s", data) | 882 if not os.path.exists(dest+'.crai'): |
883 # most probably made by galaxy and stored in galaxy dirs, need to copy it to dest | |
884 self.subprocess_check_call( | |
885 ["cp", os.path.realpath(cram_index), dest + ".crai"] | |
886 ) | |
887 else: | |
888 # Can happen in exotic condition | |
889 # e.g. if bam imported as symlink with datatype=unsorted.bam, then datatype changed to bam | |
890 # => no index generated by galaxy, but there might be one next to the symlink target | |
891 # this trick allows to skip the bam sorting made by galaxy if already done outside | |
892 if os.path.exists(os.path.realpath(data) + ".crai"): | |
893 self.symlink_or_copy( | |
894 os.path.realpath(data) + ".crai", dest + ".crai" | |
895 ) | |
896 else: | |
897 log.warn("Could not find a cram index (.crai file) for %s", data) | |
845 trackDict = { | 898 trackDict = { |
846 "type": "AlignmentsTrack", | 899 "type": "AlignmentsTrack", |
847 "trackId": tId, | 900 "trackId": tId, |
848 "name": trackData["name"], | 901 "name": trackData["name"], |
849 "assemblyNames": [self.genome_name], | 902 "assemblyNames": [self.genome_name], |
850 "adapter": { | 903 "adapter": { |
851 "type": "CramAdapter", | 904 "type": "CramAdapter", |
852 "cramLocation": bloc, | 905 "cramLocation": {"uri": url}, |
853 "craiLocation": { | 906 "craiLocation": { |
854 "uri": fname + ".crai", | 907 "uri": bindex, |
855 }, | 908 }, |
856 "sequenceAdapter": self.genome_sequence_adapter, | 909 "sequenceAdapter": self.genome_sequence_adapter, |
857 }, | 910 }, |
858 "displays": [ | 911 "displays": [ |
859 { | 912 { |
871 tId = trackData["label"] | 924 tId = trackData["label"] |
872 # url = "%s/api/datasets/%s/display" % ( | 925 # url = "%s/api/datasets/%s/display" % ( |
873 # self.giURL, | 926 # self.giURL, |
874 # trackData["metadata"]["dataset_id"], | 927 # trackData["metadata"]["dataset_id"], |
875 # ) | 928 # ) |
876 url = "%s.vcf.gz" % tId | 929 |
877 dest = "%s/%s" % (self.outdir, url) | 930 useuri = trackData["useuri"].lower() == "yes" |
878 cmd = "bgzip -c %s > %s" % (data, dest) | 931 if useuri: |
879 self.subprocess_popen(cmd) | 932 url = data |
880 cmd = ["tabix", "-f", "-p", "vcf", dest] | 933 else: |
881 self.subprocess_check_call(cmd) | 934 url = "%s.vcf.gz" % tId |
935 dest = "%s/%s" % (self.outdir, url) | |
936 cmd = "bgzip -c %s > %s" % (data, dest) | |
937 self.subprocess_popen(cmd) | |
938 cmd = ["tabix", "-f", "-p", "vcf", dest] | |
939 self.subprocess_check_call(cmd) | |
882 trackDict = { | 940 trackDict = { |
883 "type": "VariantTrack", | 941 "type": "VariantTrack", |
884 "trackId": tId, | 942 "trackId": tId, |
885 "name": trackData["name"], | 943 "name": trackData["name"], |
886 "assemblyNames": [self.genome_name], | 944 "assemblyNames": [self.genome_name], |
887 "adapter": { | 945 "adapter": { |
888 "type": "VcfTabixAdapter", | 946 "type": "VcfTabixAdapter", |
889 "vcfGzLocation": { | 947 "vcfGzLocation": { |
890 "uri": url, | 948 "uri": url |
891 }, | 949 }, |
892 "index": { | 950 "index": { |
893 "location": { | 951 "location": { |
894 "uri": url + ".tbi", | 952 "uri": url + ".tbi", |
895 } | 953 } |
915 self.tracksToAdd.append(trackDict) | 973 self.tracksToAdd.append(trackDict) |
916 self.trackIdlist.append(tId) | 974 self.trackIdlist.append(tId) |
917 | 975 |
918 def _sort_gff(self, data, dest): | 976 def _sort_gff(self, data, dest): |
919 # Only index if not already done | 977 # Only index if not already done |
920 if not os.path.exists(dest + ".gz"): | 978 if not os.path.exists(dest): |
921 cmd = "jbrowse sort-gff '%s' | bgzip -c > '%s.gz'" % ( | 979 cmd = "jbrowse sort-gff '%s' | bgzip -c > '%s'" % ( |
922 data, | 980 data, |
923 dest, | 981 dest, |
924 ) # "gff3sort.pl --precise '%s' | grep -v \"^$\" > '%s'" | 982 ) # "gff3sort.pl --precise '%s' | grep -v \"^$\" > '%s'" |
925 self.subprocess_popen(cmd) | 983 self.subprocess_popen(cmd) |
926 self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest + ".gz"]) | 984 self.subprocess_check_call(["tabix", "-f", "-p", "gff", dest]) |
927 | 985 |
928 def _sort_bed(self, data, dest): | 986 def _sort_bed(self, data, dest): |
929 # Only index if not already done | 987 # Only index if not already done |
930 if not os.path.exists(dest): | 988 if not os.path.exists(dest): |
931 cmd = "sort -k1,1 -k2,2n '%s' | bgzip -c > '%s'" % (data, dest) | 989 cmd = "sort -k1,1 -k2,2n '%s' | bgzip -c > '%s'" % (data, dest) |
932 self.subprocess_popen(cmd) | 990 self.subprocess_popen(cmd) |
933 cmd = ["tabix", "-f", "-p", "bed", dest] | 991 cmd = ["tabix", "-f", "-p", "bed", dest] |
934 self.subprocess_check_call(cmd) | 992 self.subprocess_check_call(cmd) |
935 | 993 |
936 def add_gff(self, data, ext, trackData): | 994 def add_gff(self, data, ext, trackData): |
937 url = "%s.%s" % (trackData["label"], ext) | 995 useuri = trackData["useuri"].lower() == "yes" |
938 dest = "%s/%s" % (self.outdir, url) | 996 if useuri: |
939 self._sort_gff(data, dest) | 997 url = trackData["path"] |
940 url = url + ".gz" | 998 else: |
999 url = "%s.%s.gz" % (trackData["label"], ext) | |
1000 dest = "%s/%s" % (self.outdir, url) | |
1001 self._sort_gff(data, dest) | |
941 tId = trackData["label"] | 1002 tId = trackData["label"] |
942 trackDict = { | 1003 trackDict = { |
943 "type": "FeatureTrack", | 1004 "type": "FeatureTrack", |
944 "trackId": tId, | 1005 "trackId": tId, |
945 "name": trackData["name"], | 1006 "name": trackData["name"], |
970 trackDict["style"] = style_json | 1031 trackDict["style"] = style_json |
971 self.tracksToAdd.append(trackDict) | 1032 self.tracksToAdd.append(trackDict) |
972 self.trackIdlist.append(tId) | 1033 self.trackIdlist.append(tId) |
973 | 1034 |
974 def add_bed(self, data, ext, trackData): | 1035 def add_bed(self, data, ext, trackData): |
975 url = "%s.%s" % (trackData["label"], ext) | |
976 dest = "%s/%s.gz" % (self.outdir, url) | |
977 self._sort_bed(data, dest) | |
978 tId = trackData["label"] | 1036 tId = trackData["label"] |
979 url = url + ".gz" | 1037 useuri = trackData["useuri"].lower() == "yes" |
1038 if useuri: | |
1039 url = data | |
1040 else: | |
1041 url = "%s.%s.gz" % (trackData["label"], ext) | |
1042 dest = "%s/%s" % (self.outdir, url) | |
1043 self._sort_bed(data, dest) | |
980 trackDict = { | 1044 trackDict = { |
981 "type": "FeatureTrack", | 1045 "type": "FeatureTrack", |
982 "trackId": tId, | 1046 "trackId": tId, |
983 "name": trackData["name"], | 1047 "name": trackData["name"], |
984 "assemblyNames": [self.genome_name], | 1048 "assemblyNames": [self.genome_name], |
1066 def process_annotations(self, track): | 1130 def process_annotations(self, track): |
1067 category = track["category"].replace("__pd__date__pd__", TODAY) | 1131 category = track["category"].replace("__pd__date__pd__", TODAY) |
1068 for i, ( | 1132 for i, ( |
1069 dataset_path, | 1133 dataset_path, |
1070 dataset_ext, | 1134 dataset_ext, |
1135 useuri, | |
1071 track_human_label, | 1136 track_human_label, |
1072 extra_metadata, | 1137 extra_metadata, |
1073 ) in enumerate(track["trackfiles"]): | 1138 ) in enumerate(track["trackfiles"]): |
1074 # Unsanitize labels (element_identifiers are always sanitized by Galaxy) | 1139 if not dataset_path.strip().startswith("http"): |
1075 for key, value in mapped_chars.items(): | 1140 # Unsanitize labels (element_identifiers are always sanitized by Galaxy) |
1076 track_human_label = track_human_label.replace(value, key) | 1141 for key, value in mapped_chars.items(): |
1077 track_human_label = track_human_label.replace(" ", "_") | 1142 track_human_label = track_human_label.replace(value, key) |
1143 track_human_label = track_human_label.replace(" ", "_") | |
1078 outputTrackConfig = { | 1144 outputTrackConfig = { |
1079 "category": category, | 1145 "category": category, |
1080 "style": {}, | 1146 "style": {}, |
1081 } | 1147 } |
1082 | 1148 |
1083 outputTrackConfig["key"] = track_human_label | 1149 outputTrackConfig["key"] = track_human_label |
1150 outputTrackConfig["useuri"] = useuri | |
1084 | 1151 |
1085 outputTrackConfig["trackset"] = track.get("trackset", {}) | 1152 outputTrackConfig["trackset"] = track.get("trackset", {}) |
1086 outputTrackConfig["label"] = "%s_%i_%s" % ( | 1153 outputTrackConfig["label"] = "%s_%i_%s" % ( |
1087 dataset_ext, | 1154 dataset_ext, |
1088 i, | 1155 i, |
1137 dataset_path, | 1204 dataset_path, |
1138 outputTrackConfig, | 1205 outputTrackConfig, |
1139 ) | 1206 ) |
1140 elif dataset_ext == "bam": | 1207 elif dataset_ext == "bam": |
1141 real_indexes = track["conf"]["options"]["bam"]["bam_index"] | 1208 real_indexes = track["conf"]["options"]["bam"]["bam_index"] |
1142 if not isinstance(real_indexes, list): | |
1143 real_indexes = [real_indexes] | |
1144 | |
1145 self.add_bam( | 1209 self.add_bam( |
1146 dataset_path, | 1210 dataset_path, |
1147 outputTrackConfig, | 1211 outputTrackConfig, |
1148 track["conf"]["options"]["bam"], | 1212 bam_index=real_indexes, |
1149 bam_index=real_indexes[i], | |
1150 ) | 1213 ) |
1151 elif dataset_ext == "cram": | 1214 elif dataset_ext == "cram": |
1152 real_indexes = track["conf"]["options"]["cram"][ "cram_index"] | 1215 real_indexes = track["conf"]["options"]["cram"]["cram_index"] |
1153 if not isinstance(real_indexes, list): | |
1154 real_indexes = [real_indexes] | |
1155 | |
1156 self.add_cram( | 1216 self.add_cram( |
1157 dataset_path, | 1217 dataset_path, |
1158 outputTrackConfig, | 1218 outputTrackConfig, |
1159 track["conf"]["options"]["cram"], | 1219 cram_index=real_indexes, |
1160 cram_index=real_indexes[i], | |
1161 ) | 1220 ) |
1162 elif dataset_ext == "blastxml": | 1221 elif dataset_ext == "blastxml": |
1163 self.add_blastxml( | 1222 self.add_blastxml( |
1164 dataset_path, | 1223 dataset_path, |
1165 outputTrackConfig, | 1224 outputTrackConfig, |
1219 drdict = { | 1278 drdict = { |
1220 "reversed": False, | 1279 "reversed": False, |
1221 "assemblyName": self.genome_name, | 1280 "assemblyName": self.genome_name, |
1222 "start": 0, | 1281 "start": 0, |
1223 "end": 100000, | 1282 "end": 100000, |
1283 "refName": "x", | |
1224 } | 1284 } |
1225 | 1285 |
1226 if data.get("defaultLocation", ""): | 1286 if data.get("defaultLocation", ""): |
1227 ddl = data["defaultLocation"] | 1287 ddl = data["defaultLocation"] |
1228 loc_match = re.search(r"^([^:]+):([\d,]*)\.*([\d,]*)$", ddl) | 1288 loc_match = re.search(r"^([^:]+):([\d,]*)\.*([\d,]*)$", ddl) |
1305 self.config_json.update(config_json) | 1365 self.config_json.update(config_json) |
1306 with open(config_path, "w") as config_file: | 1366 with open(config_path, "w") as config_file: |
1307 json.dump(self.config_json, config_file, indent=2) | 1367 json.dump(self.config_json, config_file, indent=2) |
1308 | 1368 |
1309 def clone_jbrowse(self): | 1369 def clone_jbrowse(self): |
1310 """Clone a JBrowse directory into a destination directory. This also works in Biocontainer testing now """ | 1370 """Clone a JBrowse directory into a destination directory. This also works in Biocontainer testing now""" |
1311 dest = self.outdir | 1371 dest = self.outdir |
1312 #self.subprocess_check_call(['jbrowse', 'create', dest, '--tag', f"{JB_VER}"]) | 1372 # self.subprocess_check_call(['jbrowse', 'create', dest, '--tag', f"{JB_VER}"]) |
1313 shutil.copytree(self.jbrowse2path, dest, dirs_exist_ok=True) | 1373 shutil.copytree(self.jbrowse2path, dest, dirs_exist_ok=True) |
1314 for fn in [ | 1374 for fn in [ |
1315 "asset-manifest.json", | 1375 "asset-manifest.json", |
1316 "favicon.ico", | 1376 "favicon.ico", |
1317 "robots.txt", | 1377 "robots.txt", |
1339 | 1399 |
1340 | 1400 |
1341 if __name__ == "__main__": | 1401 if __name__ == "__main__": |
1342 parser = argparse.ArgumentParser(description="", epilog="") | 1402 parser = argparse.ArgumentParser(description="", epilog="") |
1343 parser.add_argument("--xml", help="Track Configuration") | 1403 parser.add_argument("--xml", help="Track Configuration") |
1344 parser.add_argument("--jbrowse2path", help="Path to JBrowse2 directory in biocontainer or Conda") | 1404 parser.add_argument( |
1405 "--jbrowse2path", help="Path to JBrowse2 directory in biocontainer or Conda" | |
1406 ) | |
1345 parser.add_argument("--outdir", help="Output directory", default="out") | 1407 parser.add_argument("--outdir", help="Output directory", default="out") |
1346 parser.add_argument("--version", "-V", action="version", version="%(prog)s 2.0.1") | 1408 parser.add_argument("--version", "-V", action="version", version="%(prog)s 2.0.1") |
1347 args = parser.parse_args() | 1409 args = parser.parse_args() |
1348 tree = ET.parse(args.xml) | 1410 tree = ET.parse(args.xml) |
1349 root = tree.getroot() | 1411 root = tree.getroot() |
1358 jc = JbrowseConnector( | 1420 jc = JbrowseConnector( |
1359 outdir=args.outdir, | 1421 outdir=args.outdir, |
1360 jbrowse2path=args.jbrowse2path, | 1422 jbrowse2path=args.jbrowse2path, |
1361 genomes=[ | 1423 genomes=[ |
1362 { | 1424 { |
1363 "path": os.path.realpath(x.attrib["path"]), | 1425 "path": x.attrib["path"], |
1426 "label": x.attrib["label"], | |
1427 "useuri": x.attrib["useuri"], | |
1364 "meta": metadata_from_node(x.find("metadata")), | 1428 "meta": metadata_from_node(x.find("metadata")), |
1365 } | 1429 } |
1366 for x in root.findall("metadata/genomes/genome") | 1430 for x in root.findall("metadata/genomes/genome") |
1367 ], | 1431 ], |
1368 ) | 1432 ) |
1393 pass | 1457 pass |
1394 | 1458 |
1395 trackfiles = track.findall("files/trackFile") | 1459 trackfiles = track.findall("files/trackFile") |
1396 if trackfiles: | 1460 if trackfiles: |
1397 for x in track.findall("files/trackFile"): | 1461 for x in track.findall("files/trackFile"): |
1462 track_conf["useuri"] = x.attrib["useuri"] | |
1398 if is_multi_bigwig: | 1463 if is_multi_bigwig: |
1399 multi_bigwig_paths.append( | 1464 multi_bigwig_paths.append( |
1400 ( | 1465 ( |
1401 x.attrib["label"], | 1466 x.attrib["label"], |
1467 x.attrib["useuri"], | |
1402 os.path.realpath(x.attrib["path"]), | 1468 os.path.realpath(x.attrib["path"]), |
1403 ) | 1469 ) |
1404 ) | 1470 ) |
1405 else: | 1471 else: |
1406 if trackfiles: | 1472 if trackfiles: |
1407 metadata = metadata_from_node(x.find("metadata")) | 1473 metadata = metadata_from_node(x.find("metadata")) |
1408 track_conf["dataset_id"] = metadata["dataset_id"] | 1474 track_conf["dataset_id"] = metadata["dataset_id"] |
1409 track_conf["trackfiles"].append( | 1475 if x.attrib["useuri"].lower() == "yes": |
1410 ( | 1476 tfa = ( |
1411 os.path.realpath(x.attrib["path"]), | 1477 x.attrib["path"], |
1412 x.attrib["ext"], | 1478 x.attrib["ext"], |
1479 x.attrib["useuri"], | |
1413 x.attrib["label"], | 1480 x.attrib["label"], |
1414 metadata, | 1481 metadata, |
1415 ) | 1482 ) |
1416 ) | 1483 else: |
1484 tfa = ( | |
1485 os.path.realpath(x.attrib["path"]), | |
1486 x.attrib["ext"], | |
1487 x.attrib["useuri"], | |
1488 x.attrib["label"], | |
1489 metadata, | |
1490 ) | |
1491 track_conf["trackfiles"].append(tfa) | |
1417 | 1492 |
1418 if is_multi_bigwig: | 1493 if is_multi_bigwig: |
1419 metadata = metadata_from_node(x.find("metadata")) | 1494 metadata = metadata_from_node(x.find("metadata")) |
1420 | 1495 |
1421 track_conf["trackfiles"].append( | 1496 track_conf["trackfiles"].append( |
1445 # Only pertains to gff3 + blastxml. TODO? | 1520 # Only pertains to gff3 + blastxml. TODO? |
1446 track_conf["style"] = {t.tag: t.text for t in track.find("options/style")} | 1521 track_conf["style"] = {t.tag: t.text for t in track.find("options/style")} |
1447 except TypeError: | 1522 except TypeError: |
1448 track_conf["style"] = {} | 1523 track_conf["style"] = {} |
1449 pass | 1524 pass |
1450 track_conf["conf"] = etree_to_dict(track.find("options")) | |
1451 keys = jc.process_annotations(track_conf) | 1525 keys = jc.process_annotations(track_conf) |
1452 | 1526 |
1453 if keys: | 1527 if keys: |
1454 for key in keys: | 1528 for key in keys: |
1455 default_session_data["visibility"][ | 1529 default_session_data["visibility"][ |