Mercurial > repos > triasteran > ribogalaxy_umi_processing
comparison UMI_riboseq_processing/UMI.py @ 0:ef98c6fad2a2 draft
Uploaded
| author | triasteran |
|---|---|
| date | Sun, 19 Jun 2022 11:29:41 +0000 |
| parents | |
| children | 5d0d5933d370 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:ef98c6fad2a2 |
|---|---|
| 1 import itertools | |
| 2 from sys import argv, exit | |
| 3 from itertools import zip_longest | |
| 4 | |
| 5 def grouper(iterable, n, fillvalue=None): | |
| 6 args = [iter(iterable)] * n | |
| 7 return zip_longest(*args, fillvalue=fillvalue) | |
| 8 | |
| 9 | |
| 10 chunk_size=4 | |
| 11 | |
| 12 | |
| 13 def trimandpaste(pathToFastaFile, output): | |
| 14 #filename = pathToFastaFile.split('/')[-1] | |
| 15 output = open(output,"w") | |
| 16 with open(pathToFastaFile) as f: | |
| 17 for lines in grouper(f, chunk_size, ""): #for every chunk_sized chunk | |
| 18 header = lines[0] | |
| 19 seq = lines[1] | |
| 20 sep = lines[2] | |
| 21 qual = lines[3] | |
| 22 trimmed_seq = seq[2:-11]+seq[-6:-1]+"\n" # fooprint + barcode | |
| 23 UMI = seq[0:2]+seq[-11:-6] #7nt in total | |
| 24 split_header = header.split(" ") | |
| 25 new_header = split_header[0]+"_"+UMI+" "+split_header[1] | |
| 26 if qual[-1:] == "\n": | |
| 27 new_qual = qual[2:-11]+qual[-6:-1]+"\n" | |
| 28 else: | |
| 29 new_qual = qual[2:-10]+qual[-6:-1] | |
| 30 output.write(new_header) | |
| 31 output.write(trimmed_seq) | |
| 32 output.write(sep) | |
| 33 output.write(new_qual) | |
| 34 | |
| 35 output.close() | |
| 36 | |
| 37 def main(): | |
| 38 if len(argv) != 3: | |
| 39 exit("Usage: 2 arguments required\n1: Path to fasta file \n2: name of output file") | |
| 40 | |
| 41 # Get paths | |
| 42 pathToFastaFile = argv[1] | |
| 43 output = argv[2] | |
| 44 | |
| 45 trimandpaste(pathToFastaFile, output) | |
| 46 | |
| 47 if __name__ == "__main__": | |
| 48 main() |
