Mercurial > repos > earlhaminst > replace_chromosome_names
comparison replace_chromosome_names.py @ 0:97c11d04cd4c draft
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/replace_chromosome_names/ commit 588282bf5cbb9909ad9cd8c316ec33158c858727
| author | earlhaminst |
|---|---|
| date | Thu, 18 May 2017 14:17:48 -0400 |
| parents | |
| children | 6c0373cc070f |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:97c11d04cd4c |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 from __future__ import print_function | |
| 3 | |
| 4 import argparse | |
| 5 import sys | |
| 6 | |
| 7 | |
| 8 def main(): | |
| 9 parser = argparse.ArgumentParser(description='Replace chromosome names in a tabular (e.g. VCF) file using a mapping table.') | |
| 10 parser.add_argument('--cols', required=True, help='comma-separated list of column indexes (starting from 1) on which to perform the replacement') | |
| 11 parser.add_argument('-m', dest='mapping_file', type=argparse.FileType(), required=True, help='mapping table file. Must contain 2 tab-separated columns') | |
| 12 parser.add_argument('--comment-char', help='lines starting with this character will be directly printed to the output file') | |
| 13 parser.add_argument('-o', dest='output', type=argparse.FileType('w'), default=sys.stdout, help='output file. If not specified, writes on standard output') | |
| 14 parser.add_argument('input', metavar='INPUT', type=argparse.FileType(), help='tabular input file') | |
| 15 args = parser.parse_args() | |
| 16 | |
| 17 map_dict = dict() | |
| 18 for line in args.mapping_file: | |
| 19 line = line.rstrip('\r\n') | |
| 20 line_cols = line.split('\t') | |
| 21 if len(line_cols) < 2: | |
| 22 raise Exception("Line '%s' in mapping table file does not contain 2 tab-separated columns" % line) | |
| 23 map_dict[line_cols[0]] = line_cols[1] | |
| 24 | |
| 25 cols_to_map = [int(_) - 1 for _ in args.cols.split(',')] | |
| 26 | |
| 27 for line in args.input: | |
| 28 line = line.rstrip('\r\n') | |
| 29 if args.comment_char and line.startswith(args.comment_char): | |
| 30 print(line, file=args.output) | |
| 31 else: | |
| 32 line_cols = line.split('\t') | |
| 33 for col_to_map in cols_to_map: | |
| 34 old_value = line_cols[col_to_map] | |
| 35 line_cols[col_to_map] = map_dict.get(old_value, old_value) | |
| 36 mapped_line = '\t'.join(line_cols) | |
| 37 print(mapped_line, file=args.output) | |
| 38 | |
| 39 | |
| 40 if __name__ == "__main__": | |
| 41 main() |
