Mercurial > repos > devteam > ucsc_custom_track
annotate build_ucsc_custom_track.py @ 2:3d87079756e1 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
author | devteam |
---|---|
date | Mon, 28 Feb 2022 20:06:04 +0000 |
parents | 618e56c3109b |
children |
rev | line source |
---|---|
0 | 1 #!/usr/bin/env python |
2 """ | |
3 Build a UCSC genome browser custom track file | |
4 """ | |
5 | |
2
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
6 import sys |
0 | 7 |
2
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
8 FILE_TYPE_TO_TRACK_TYPE = {'bed': None, 'bedstrict': None, 'bed6': None, 'bed12': None, 'bedgraph': 'bedGraph', 'wig': 'wiggle_0'} |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
9 CHUNK_SIZE = 2**20 # 1 mb |
0 | 10 |
11 | |
2
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
12 def get_track_line_is_interval(file_type, name, description, color, visibility): |
0 | 13 if file_type in FILE_TYPE_TO_TRACK_TYPE: |
2
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
14 track_type = FILE_TYPE_TO_TRACK_TYPE[file_type] |
0 | 15 is_interval = False |
16 else: | |
17 track_type = None | |
18 is_interval = True | |
19 track_line = 'track ' | |
20 if track_type: | |
2
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
21 track_line += f"type={track_type} " |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
22 track_line += f'name="{name}" description="{description}" color={color} visibility={visibility}\n' |
0 | 23 return track_line, is_interval |
24 | |
25 | |
26 num_tracks = 0 | |
27 skipped_lines = 0 | |
28 first_invalid_line = 0 | |
2
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
29 args = sys.argv[1:] |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
30 out_fname = args.pop(0) |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
31 with open(out_fname, "w") as out: |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
32 while args: |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
33 # Suck in one dataset worth of arguments |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
34 in_fname = args.pop(0) |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
35 file_type = args.pop(0) |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
36 colspec = args.pop(0) |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
37 name = args.pop(0) |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
38 description = args.pop(0) |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
39 color = args.pop(0).replace('-', ',') |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
40 visibility = args.pop(0) |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
41 track_line, is_interval = get_track_line_is_interval(file_type, name, description, color, visibility) |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
42 # Do the work |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
43 out.write(track_line) |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
44 with open(in_fname) as in_file: |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
45 if not is_interval: |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
46 while True: |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
47 chunk = in_file.read(CHUNK_SIZE) |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
48 if chunk: |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
49 out.write(chunk) |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
50 else: |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
51 break |
0 | 52 else: |
2
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
53 # Assume type is interval (don't pass this script anything else!) |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
54 try: |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
55 c, s, e, st = (int(x) - 1 for x in colspec.split(",")) |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
56 except ValueError: |
0 | 57 try: |
2
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
58 c, s, e = (int(x) - 1 for x in colspec.split(",")[:3]) |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
59 st = -1 # strand column is absent |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
60 except Exception: |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
61 sys.exit("Columns in interval file invalid for UCSC custom track.") |
0 | 62 |
2
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
63 i = 0 |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
64 for i, line in enumerate(in_file): |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
65 line = line.rstrip('\r\n') |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
66 if line and not line.startswith('#'): |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
67 fields = line.split("\t") |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
68 if st > 0: |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
69 # strand column is present |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
70 try: |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
71 out.write(f"{fields[c]}\t{fields[s]}\t{fields[e]}\t{i}\t0\t{fields[st]}\n") |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
72 except Exception: |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
73 skipped_lines += 1 |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
74 if not first_invalid_line: |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
75 first_invalid_line = i + 1 |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
76 else: |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
77 try: |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
78 out.write(f"{fields[c]}\t{fields[s]}\t{fields[e]}\n") |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
79 except Exception: |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
80 skipped_lines += 1 |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
81 if not first_invalid_line: |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
82 first_invalid_line = i + 1 |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
83 out.write("\n") # separating newline |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
84 num_tracks += 1 |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
85 |
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
86 print(f"Generated a custom track containing {num_tracks} subtracks.") |
0 | 87 if skipped_lines: |
2
3d87079756e1
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/main/tools/ucsc_custom_track commit 68ba77da2a1f8d6cd04dd7dc6efc5edcefcfa0c9"
devteam
parents:
0
diff
changeset
|
88 print(f"Skipped {skipped_lines} invalid lines starting at #{first_invalid_line}") |