annotate tn93_filter.py @ 0:af03f3398f03 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
author iuc
date Fri, 23 Apr 2021 03:04:15 +0000
parents
children 112d80c9ccca
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
1 import argparse
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
2 import csv
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
3
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
4 from Bio import SeqIO
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
5
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
6 arguments = argparse.ArgumentParser(description='Combine alignments into a single file, adding a reference sequence as well')
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
7
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
8 arguments.add_argument('-f', '--reference', help='Reference sequence', required=True, type=str)
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
9 arguments.add_argument('-d', '--distances', help='Calculated pairwise distances', required=True, type=str)
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
10 arguments.add_argument('-r', '--reads', help='Output file for filtered reads', required=True, type=str)
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
11 arguments.add_argument('-q', '--clusters', help='Compressed clusters', required=True, type=str)
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
12 settings = arguments.parse_args()
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
13
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
14 reference_name = 'REFERENCE'
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
15 reference_seq = ''
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
16
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
17 with open(settings.reference) as seq_fh:
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
18 for seq_record in SeqIO.parse(seq_fh, 'fasta'):
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
19 reference_name = seq_record.name
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
20 reference_seq = seq_record.seq
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
21 break
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
22
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
23 with open(settings.distances) as fh:
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
24 reader = csv.reader(fh, delimiter=',')
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
25 next(reader)
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
26 seqs_to_filter = set()
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
27 for line in reader:
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
28 if line[1] not in seqs_to_filter:
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
29 seqs_to_filter.add(line[1])
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
30 if reference_name in seqs_to_filter:
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
31 seqs_to_filter.remove(reference_name)
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
32
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
33 with open(settings.reads, "a+") as fh:
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
34 seqs_filtered = list()
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
35 for seq_record in SeqIO.parse(settings.clusters, "fasta"):
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
36 if seq_record.name not in seqs_to_filter:
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
37 if seq_record.name == reference_name:
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
38 if seq_record.name not in seqs_filtered:
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
39 seqs_filtered.append(seq_record.name)
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
40 else:
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
41 continue
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
42 if reference_name not in seqs_filtered:
af03f3398f03 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tn93/ commit eec640a7c26b728f8175885926fe368b0756d9e5"
iuc
parents:
diff changeset
43 fh.write('\n>REFERENCE\n%s' % reference_seq)