| 
0
 | 
     1 import itertools
 | 
| 
 | 
     2 from sys import argv, exit
 | 
| 
 | 
     3 from itertools import zip_longest
 | 
| 
 | 
     4 
 | 
| 
 | 
     5 def grouper(iterable, n, fillvalue=None):
 | 
| 
 | 
     6     args = [iter(iterable)] * n
 | 
| 
 | 
     7     return zip_longest(*args, fillvalue=fillvalue)
 | 
| 
 | 
     8 
 | 
| 
 | 
     9 
 | 
| 
 | 
    10 chunk_size=4
 | 
| 
 | 
    11 
 | 
| 
 | 
    12 
 | 
| 
 | 
    13 def trimandpaste(pathToFastaFile, output):
 | 
| 
 | 
    14     #filename = pathToFastaFile.split('/')[-1]
 | 
| 
 | 
    15     output = open(output,"w")
 | 
| 
 | 
    16     with open(pathToFastaFile) as f:
 | 
| 
 | 
    17         for lines in grouper(f, chunk_size, ""): #for every chunk_sized chunk
 | 
| 
 | 
    18             header = lines[0]
 | 
| 
 | 
    19             seq = lines[1]
 | 
| 
 | 
    20             sep = lines[2]
 | 
| 
 | 
    21             qual = lines[3]
 | 
| 
 | 
    22             trimmed_seq = seq[2:-11]+seq[-6:-1]+"\n" # fooprint + barcode
 | 
| 
 | 
    23             UMI = seq[0:2]+seq[-11:-6] #7nt in total 
 | 
| 
 | 
    24             split_header = header.split(" ")
 | 
| 
 | 
    25             new_header = split_header[0]+"_"+UMI+" "+split_header[1]
 | 
| 
 | 
    26             if qual[-1:] == "\n":
 | 
| 
 | 
    27                 new_qual = qual[2:-11]+qual[-6:-1]+"\n"
 | 
| 
 | 
    28             else:
 | 
| 
 | 
    29                 new_qual = qual[2:-10]+qual[-6:-1]
 | 
| 
 | 
    30             output.write(new_header)
 | 
| 
 | 
    31             output.write(trimmed_seq) 
 | 
| 
 | 
    32             output.write(sep) 
 | 
| 
 | 
    33             output.write(new_qual)
 | 
| 
 | 
    34 
 | 
| 
 | 
    35     output.close() 
 | 
| 
 | 
    36 
 | 
| 
 | 
    37 def main():
 | 
| 
 | 
    38     if len(argv) != 3: 
 | 
| 
 | 
    39         exit("Usage: 2 arguments required\n1: Path to fasta file \n2: name of output file")
 | 
| 
 | 
    40 
 | 
| 
 | 
    41     # Get paths
 | 
| 
 | 
    42     pathToFastaFile = argv[1]
 | 
| 
 | 
    43     output = argv[2]
 | 
| 
 | 
    44         
 | 
| 
 | 
    45     trimandpaste(pathToFastaFile, output)
 | 
| 
 | 
    46 
 | 
| 
 | 
    47 if __name__ == "__main__":
 | 
| 
 | 
    48     main()
 |