annotate split_imgt_file.py @ 92:cf8ad181628f draft

planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
author rhpvorderman
date Mon, 12 Dec 2022 12:32:44 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
92
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
1 #!/usr/bin/env python3
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
2
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
3 """
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
4 Script to split IMGT file into several archives for each of the genes
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
5
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
6 Rather than creating each new archive individually this script will read
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
7 the input files only once and as such enormously shorten processing time.
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
8 """
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
9
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
10 import argparse
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
11 import io
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
12 import os
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
13 import tarfile
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
14 import tempfile
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
15 from typing import Iterator, List, Tuple
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
16
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
17
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
18 def merged_txt_to_match_dict(merged: str):
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
19 with open(merged, "rt") as f:
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
20 header = next(f).strip("\n")
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
21 column_names = header.split("\t")
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
22 # For the baseline result there is no best_match column
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
23 if "best_match" in column_names:
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
24 best_match_index = column_names.index("best_match")
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
25 else:
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
26 best_match_index = None
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
27 sequence_id_index = column_names.index("Sequence.ID")
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
28 match_dict = {}
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
29 for line in f:
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
30 values = line.strip().split("\t")
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
31 sequence_id = values[sequence_id_index]
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
32 if best_match_index is not None:
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
33 best_match = values[best_match_index]
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
34 if "unmatched" in best_match:
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
35 # For some reason the table has values such as: unmatched, IGA2
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
36 continue
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
37 else:
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
38 best_match = ""
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
39 match_dict[sequence_id] = best_match
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
40 return match_dict
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
41
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
42
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
43 def imgt_to_tables(imgt_file: str) -> Iterator[Tuple[str, io.TextIOWrapper]]:
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
44 print(f"opening IMGT file: {imgt_file}")
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
45 with tarfile.open(imgt_file, "r") as archive:
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
46 while True:
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
47 member = archive.next()
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
48 if member is None:
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
49 return
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
50 if member.name in {"README.txt"}:
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
51 continue
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
52 if member.name.startswith("11_"):
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
53 continue
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
54 f = archive.extractfile(member)
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
55 f_text = io.TextIOWrapper(f)
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
56 yield member.name, f_text
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
57 f_text.close()
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
58
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
59
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
60 def split_imgt(imgt_file: str, merged_file: str, outdir: str, genes: List[str],
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
61 prefix: str):
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
62 """
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
63 This function creates a separate tar file for each of the gene matches
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
64 based on the merged file. Unmatched genes are left out.
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
65 :param imgt_file: The original IMGT file
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
66 :param merged_file: The merged data file generated by SHM&CSR pipeline
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
67 :param outdir: The output directory.
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
68 :param genes: The genes to split out. Use '-' for all identified genes.
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
69 :return:
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
70 """
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
71 match_dict = merged_txt_to_match_dict(merged_file)
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
72 gene_tarfiles = []
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
73 os.makedirs(outdir, exist_ok=True)
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
74 for gene in genes:
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
75 new_filename = f"{prefix}_{gene}.txz" if gene else f"{prefix}.txz"
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
76 gene_tarfiles.append(
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
77 tarfile.open(os.path.join(outdir, new_filename), mode="w:xz")
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
78 )
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
79 for name, table in imgt_to_tables(imgt_file):
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
80 # Read each table one by one and per line select in which output
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
81 # files it should go.
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
82 gene_files = []
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
83 for gene in genes:
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
84 fp, fname = tempfile.mkstemp()
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
85 # The file pointer fp will be wrapped in a python file object
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
86 # so we can ensure there remain no open files.
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
87 f = open(fp, mode="wt")
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
88 gene_files.append((gene, f, fname))
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
89 header = next(table)
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
90 header_number_of_tabs = header.count('\t')
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
91 column_names = header.strip("\n").split("\t")
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
92 fr1_columns = [index for index, column in enumerate(column_names)
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
93 if column.startswith("FR1")]
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
94 sequence_id_index = column_names.index("Sequence ID")
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
95 for _, gene_file, _ in gene_files:
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
96 gene_file.write(header)
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
97 for line in table:
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
98 # IMGT sometimes delivers half-empty rows.
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
99 row_number_of_tabs = line.count("\t")
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
100 missing_tabs = header_number_of_tabs - row_number_of_tabs
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
101 if missing_tabs:
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
102 line = line.strip("\n") + missing_tabs * "\t" + "\n"
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
103 values = line.strip("\n").split("\t")
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
104 sequence_id = values[sequence_id_index]
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
105 match = match_dict.get(sequence_id)
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
106 if match is None:
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
107 continue
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
108 if name.startswith("8_"):
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
109 # change the FR1 columns to 0 in the "8_..." file
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
110 for index in fr1_columns:
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
111 values[index] = "0"
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
112 line = "\t".join(values) + "\n"
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
113 for gene, gene_file, _ in gene_files:
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
114 if gene in match:
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
115 gene_file.write(line)
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
116 for gene_tarfile, (_, gene_file, fname) in zip(gene_tarfiles, gene_files):
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
117 gene_file.flush()
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
118 gene_tarfile.add(fname, name)
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
119 gene_file.close()
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
120 os.remove(fname)
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
121 for gene_tarfile in gene_tarfiles:
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
122 gene_tarfile.close()
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
123
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
124
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
125 def argument_parser() -> argparse.ArgumentParser:
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
126 parser = argparse.ArgumentParser()
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
127 parser.add_argument("imgt_file", help="The original IMGT FILE")
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
128 parser.add_argument("merged", help="merged.txt file")
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
129 parser.add_argument("--outdir", help="output directory")
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
130 parser.add_argument(
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
131 "genes",
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
132 nargs="+",
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
133 help="The genes to split out. Use '-' for all identified genes.")
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
134 parser.add_argument("--prefix", help="Prefix for the archives and "
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
135 "directories")
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
136 return parser
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
137
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
138
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
139 def main():
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
140 args = argument_parser().parse_args()
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
141 genes = ["" if gene == "-" else gene for gene in args.genes]
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
142 split_imgt(args.imgt_file, args.merged, args.outdir, genes,
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
143 args.prefix)
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
144
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
145
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
146 if __name__ == "__main__":
cf8ad181628f planemo upload commit 36be3b053802693392f935e6619ba3f2b1704e3c
rhpvorderman
parents:
diff changeset
147 main()