Mercurial > repos > davidvanzessen > sff_extract_demultiplex
diff trim.py @ 0:cb08a27e5fc2 draft
Uploaded
author | davidvanzessen |
---|---|
date | Mon, 29 Aug 2016 05:44:57 -0400 |
parents | |
children | 423d320bc1ba |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trim.py Mon Aug 29 05:44:57 2016 -0400 @@ -0,0 +1,59 @@ +import argparse + +#docs.python.org/dev/library/argparse.html +parser = argparse.ArgumentParser() +parser.add_argument("--input", help="Input fasta") +parser.add_argument("--output", help="Output fasta") +parser.add_argument("--start", help="How many nucleotides to trim from the start", type=int) +parser.add_argument("--end", help="How many nucleotides to trim from the end", type=int) + +args = parser.parse_args() +start = int(args.start) +end = int(args.end) + +print args.input +print args.output +print start +print end + +if end <= 0 and start <= 0: + import shutil + shutil.copy(args.input, args.output) + import sys + sys.exit() + + + +currentSeq = "" +currentId = "" + +if end is 0: + with open(args.input, 'r') as i: + with open(args.output, 'w') as o: + for line in i.readlines(): + if line[0] is ">": + currentSeq = currentSeq[start:] + if currentSeq is not "" and currentId is not "": + o.write(currentId) + o.write(currentSeq + "\n") + currentId = line + currentSeq = "" + else: + currentSeq += line.rstrip() + o.write(currentId) + o.write(currentSeq[start:] + "\n") +else: + with open(args.input, 'r') as i: + with open(args.output, 'w') as o: + for line in i.readlines(): + if line[0] is ">": + currentSeq = currentSeq[start:-end] + if currentSeq is not "" and currentId is not "": + o.write(currentId) + o.write(currentSeq + "\n") + currentId = line + currentSeq = "" + else: + currentSeq += line.rstrip() + o.write(currentId) + o.write(currentSeq[start:-end] + "\n")