Mercurial > repos > bebatut > fasta_add_barcode
view fasta_add_barcode.py @ 0:04699558a38a draft default tip
planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/fasta_add_barcode commit e857f7126443e115f11954085423f8999bc870aa-dirty
author | bebatut |
---|---|
date | Fri, 15 Apr 2016 06:04:56 -0400 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python # -*- coding: utf-8 -*- import sys import os import argparse import copy import operator def write_seq_fasta_format(seq, output_file): split_seq = [seq[i:i+60] for i in xrange(0,len(seq),60)] for split in split_seq: output_file.write(split + '\n') def fasta_add_barcode(args): mapping = {} with open(args.input_mapping_file,'r') as input_mapping_file: for line in input_mapping_file: split_line = line[:-1].split('\t') if len(split_line) != 2: string = 'Incorrect number of column in mapping file.' string += '\nTwo tabular separated columns are expected' raise ValueError(string) mapping[split_line[0]] = split_line[1] seq_id = '' seq = '' with open(args.input_sequence_file,'r') as input_sequence_file: with open(args.output_sequence_file, 'w') as output_sequence_file: for line in input_sequence_file: if line.startswith('>'): if seq != '': if not mapping.has_key(seq_id): string = 'A sequence identifier (' + seq_id + ') is' string += ' not found in mapping file' raise ValueError(string) output_sequence_file.write('>' + seq_id + '\n') barcode = mapping[seq_id] seq = barcode + seq write_seq_fasta_format(seq, output_sequence_file) seq_id = line[1:-1].split( )[0] seq = '' else: seq += line[:-1] ######## # Main # ######## if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--input_sequence_file', required=True) parser.add_argument('--input_mapping_file', required=True) parser.add_argument('--output_sequence_file', required=True) args = parser.parse_args() fasta_add_barcode(args)