Mercurial > repos > earlhaminst > t_coffee
comparison filter_by_fasta_ids.py @ 4:fa59d6fea7f5 draft
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/t_coffee commit 988b1fc1cb8739e45648465adbf099f3fdaf87f8
author | earlhaminst |
---|---|
date | Fri, 03 Mar 2017 07:29:32 -0500 |
parents | 78dd29aa7fc1 |
children | 0a189243186d |
comparison
equal
deleted
inserted
replaced
3:78dd29aa7fc1 | 4:fa59d6fea7f5 |
---|---|
7 | 7 |
8 Sequence = collections.namedtuple('Sequence', ['header', 'sequence']) | 8 Sequence = collections.namedtuple('Sequence', ['header', 'sequence']) |
9 | 9 |
10 | 10 |
11 def FASTAReader_gen(fasta_filename): | 11 def FASTAReader_gen(fasta_filename): |
12 fasta_file = open(fasta_filename) | 12 with open(fasta_filename) as fasta_file: |
13 line = fasta_file.readline() | |
14 while True: | |
15 if not line: | |
16 return | |
17 assert line.startswith('>'), "FASTA headers must start with >" | |
18 header = line.rstrip() | |
19 sequence_parts = [] | |
20 line = fasta_file.readline() | 13 line = fasta_file.readline() |
21 while line and line[0] != '>': | 14 while True: |
22 sequence_parts.append(line.rstrip()) | 15 if not line: |
16 return | |
17 assert line.startswith('>'), "FASTA headers must start with >" | |
18 header = line.rstrip() | |
19 sequence_parts = [] | |
23 line = fasta_file.readline() | 20 line = fasta_file.readline() |
24 sequence = "".join(sequence_parts) | 21 while line and line[0] != '>': |
25 yield Sequence(header, sequence) | 22 sequence_parts.append(line.rstrip()) |
23 line = fasta_file.readline() | |
24 sequence = "".join(sequence_parts) | |
25 yield Sequence(header, sequence) | |
26 | 26 |
27 | 27 |
28 def target_match(target, search_entry): | 28 def target_match(target, search_entry): |
29 ''' Matches ''' | 29 ''' Matches ''' |
30 search_entry = search_entry.upper() | 30 search_entry = search_entry.upper() |
45 for line in f_target.readlines(): | 45 for line in f_target.readlines(): |
46 targets.append(">%s" % line.strip().upper()) | 46 targets.append(">%s" % line.strip().upper()) |
47 | 47 |
48 work_summary['wanted'] = len(targets) | 48 work_summary['wanted'] = len(targets) |
49 | 49 |
50 # output = open(sys.argv[3], "w") | |
51 for entry in FASTAReader_gen(sys.argv[2]): | 50 for entry in FASTAReader_gen(sys.argv[2]): |
52 target_matched_results = target_match(targets, entry.header) | 51 target_matched_results = target_match(targets, entry.header) |
53 if target_matched_results: | 52 if target_matched_results: |
54 work_summary['found'] += 1 | 53 work_summary['found'] += 1 |
55 targets.remove(target_matched_results) | 54 targets.remove(target_matched_results) |