Mercurial > repos > iuc > tn93_filter
comparison tn93_filter.py @ 0:ba95715078c9 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
author | iuc |
---|---|
date | Fri, 23 Apr 2021 03:05:08 +0000 |
parents | |
children | cf50aeb956f2 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:ba95715078c9 |
---|---|
1 import argparse | |
2 import csv | |
3 | |
4 from Bio import SeqIO | |
5 | |
6 arguments = argparse.ArgumentParser(description='Combine alignments into a single file, adding a reference sequence as well') | |
7 | |
8 arguments.add_argument('-f', '--reference', help='Reference sequence', required=True, type=str) | |
9 arguments.add_argument('-d', '--distances', help='Calculated pairwise distances', required=True, type=str) | |
10 arguments.add_argument('-r', '--reads', help='Output file for filtered reads', required=True, type=str) | |
11 arguments.add_argument('-q', '--clusters', help='Compressed clusters', required=True, type=str) | |
12 settings = arguments.parse_args() | |
13 | |
14 reference_name = 'REFERENCE' | |
15 reference_seq = '' | |
16 | |
17 with open(settings.reference) as seq_fh: | |
18 for seq_record in SeqIO.parse(seq_fh, 'fasta'): | |
19 reference_name = seq_record.name | |
20 reference_seq = seq_record.seq | |
21 break | |
22 | |
23 with open(settings.distances) as fh: | |
24 reader = csv.reader(fh, delimiter=',') | |
25 next(reader) | |
26 seqs_to_filter = set() | |
27 for line in reader: | |
28 if line[1] not in seqs_to_filter: | |
29 seqs_to_filter.add(line[1]) | |
30 if reference_name in seqs_to_filter: | |
31 seqs_to_filter.remove(reference_name) | |
32 | |
33 with open(settings.reads, "a+") as fh: | |
34 seqs_filtered = list() | |
35 for seq_record in SeqIO.parse(settings.clusters, "fasta"): | |
36 if seq_record.name not in seqs_to_filter: | |
37 if seq_record.name == reference_name: | |
38 if seq_record.name not in seqs_filtered: | |
39 seqs_filtered.append(seq_record.name) | |
40 else: | |
41 continue | |
42 if reference_name not in seqs_filtered: | |
43 fh.write('\n>REFERENCE\n%s' % reference_seq) |