Mercurial > repos > iuc > tn93_cluster
annotate tn93_filter.py @ 0:af03f3398f03 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
author | iuc |
---|---|
date | Fri, 23 Apr 2021 03:04:15 +0000 |
parents | |
children | 112d80c9ccca |
rev | line source |
---|---|
0
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
1 import argparse |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
2 import csv |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
3 |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
4 from Bio import SeqIO |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
5 |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
6 arguments = argparse.ArgumentParser(description='Combine alignments into a single file, adding a reference sequence as well') |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
7 |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
8 arguments.add_argument('-f', '--reference', help='Reference sequence', required=True, type=str) |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
9 arguments.add_argument('-d', '--distances', help='Calculated pairwise distances', required=True, type=str) |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
10 arguments.add_argument('-r', '--reads', help='Output file for filtered reads', required=True, type=str) |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
11 arguments.add_argument('-q', '--clusters', help='Compressed clusters', required=True, type=str) |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
12 settings = arguments.parse_args() |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
13 |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
14 reference_name = 'REFERENCE' |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
15 reference_seq = '' |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
16 |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
17 with open(settings.reference) as seq_fh: |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
18 for seq_record in SeqIO.parse(seq_fh, 'fasta'): |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
19 reference_name = seq_record.name |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
20 reference_seq = seq_record.seq |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
21 break |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
22 |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
23 with open(settings.distances) as fh: |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
24 reader = csv.reader(fh, delimiter=',') |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
25 next(reader) |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
26 seqs_to_filter = set() |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
27 for line in reader: |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
28 if line[1] not in seqs_to_filter: |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
29 seqs_to_filter.add(line[1]) |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
30 if reference_name in seqs_to_filter: |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
31 seqs_to_filter.remove(reference_name) |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
32 |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
33 with open(settings.reads, "a+") as fh: |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
34 seqs_filtered = list() |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
35 for seq_record in SeqIO.parse(settings.clusters, "fasta"): |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
36 if seq_record.name not in seqs_to_filter: |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
37 if seq_record.name == reference_name: |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
38 if seq_record.name not in seqs_filtered: |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
39 seqs_filtered.append(seq_record.name) |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
40 else: |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
41 continue |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
42 if reference_name not in seqs_filtered: |
af03f3398f03
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff
changeset
|
43 fh.write('\n>REFERENCE\n%s' % reference_seq) |