Mercurial > repos > iuc > read_it_and_keep
diff trim_reference.py @ 0:554aa2a63f04 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/read-it-and-keep commit 4b41e2742ba5f9c957e13a188ca49e60e16ae13b"
author | iuc |
---|---|
date | Fri, 28 Jan 2022 18:47:34 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trim_reference.py Fri Jan 28 18:47:34 2022 +0000 @@ -0,0 +1,28 @@ +#!/usr/bin/env python + +from __future__ import print_function + +import argparse +import sys + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('input_file', type=argparse.FileType()) + parser.add_argument('output_file', type=argparse.FileType('w'), nargs='?', default=sys.stdout) + args = parser.parse_args() + lines = args.input_file.readlines() + i = len(lines) - 1 + trimmed = False + # step backwards through the lines, removing all As until we find a non-A nucleotide + while not trimmed: + line = lines[i].upper().rstrip() + for j in range(len(line) - 1, -1, -1): + # walk backwards through the line, checking for a non-A (and non-space) character + if line[j] not in ['A', ' ']: + lines[i] = line[:j + 1] + '\n' + trimmed = True + break + else: + # we processed the whole line - all As - so we don't include this line in the output + i -= 1 + args.output_file.write(''.join(lines[:i + 1]))