Mercurial > repos > iuc > khmer_abundance_distribution
annotate filter-below-abund.py @ 9:7624945cacd3 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/khmer commit 7599ef16dc8b83ee49236ed5bb229260c969b0ab
author | iuc |
---|---|
date | Mon, 17 Jun 2024 11:47:51 +0000 |
parents | 5a97c5bbd51e |
children |
rev | line source |
---|---|
0
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
1 #! /usr/bin/env python |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
2 # This file is part of khmer, https://github.com/dib-lab/khmer/, and is |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
3 # Copyright (C) 2011-2015, Michigan State University. |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
4 # Copyright (C) 2015, The Regents of the University of California. |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
5 # |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
6 # Redistribution and use in source and binary forms, with or without |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
7 # modification, are permitted provided that the following conditions are |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
8 # met: |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
9 # |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
10 # * Redistributions of source code must retain the above copyright |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
11 # notice, this list of conditions and the following disclaimer. |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
12 # |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
13 # * Redistributions in binary form must reproduce the above |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
14 # copyright notice, this list of conditions and the following |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
15 # disclaimer in the documentation and/or other materials provided |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
16 # with the distribution. |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
17 # |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
18 # * Neither the name of the Michigan State University nor the names |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
19 # of its contributors may be used to endorse or promote products |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
20 # derived from this software without specific prior written |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
21 # permission. |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
22 # |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
23 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
24 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
25 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
26 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
27 # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
28 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
29 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
30 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
31 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
32 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
33 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
34 # |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
35 # Contact: khmer-project@idyll.org |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
36 from __future__ import print_function |
5
a02efb62565b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit 19ea4feff5ccf3744c549b9a67259947a1cb90ba
iuc
parents:
4
diff
changeset
|
37 |
a02efb62565b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit 19ea4feff5ccf3744c549b9a67259947a1cb90ba
iuc
parents:
4
diff
changeset
|
38 import os |
0
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
39 import sys |
5
a02efb62565b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit 19ea4feff5ccf3744c549b9a67259947a1cb90ba
iuc
parents:
4
diff
changeset
|
40 |
6
5a97c5bbd51e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/khmer commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
5
diff
changeset
|
41 import screed |
5a97c5bbd51e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/khmer commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
5
diff
changeset
|
42 from khmer import Countgraph, ReadParser |
5a97c5bbd51e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/khmer commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
5
diff
changeset
|
43 from khmer.utils import (broken_paired_reader, write_record) |
0
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
44 |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
45 CUTOFF = 50 |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
46 |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
47 |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
48 def main(): |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
49 counting_ht = sys.argv[1] |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
50 infiles = sys.argv[2:] |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
51 |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
52 print('file with ht: %s' % counting_ht) |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
53 |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
54 print('making hashtable') |
6
5a97c5bbd51e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/khmer commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
5
diff
changeset
|
55 ht = Countgraph.load(counting_ht) |
0
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
56 K = ht.ksize() |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
57 |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
58 for infile in infiles: |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
59 print('filtering', infile) |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
60 outfile = os.path.basename(infile) + '.below' |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
61 |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
62 outfp = open(outfile, 'w') |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
63 |
6
5a97c5bbd51e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/khmer commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
5
diff
changeset
|
64 paired_iter = broken_paired_reader(ReadParser(infile), min_length=K, |
5a97c5bbd51e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/khmer commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
5
diff
changeset
|
65 force_single=True) |
5a97c5bbd51e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/khmer commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
5
diff
changeset
|
66 for n, is_pair, read1, read2 in paired_iter: |
5a97c5bbd51e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/khmer commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
5
diff
changeset
|
67 name = read1.name |
5a97c5bbd51e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/khmer commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
5
diff
changeset
|
68 seq = read1.sequence |
0
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
69 if 'N' in seq: |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
70 return None, None |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
71 |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
72 trim_seq, trim_at = ht.trim_below_abundance(seq, CUTOFF) |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
73 |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
74 if trim_at >= K: |
6
5a97c5bbd51e
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/khmer commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
iuc
parents:
5
diff
changeset
|
75 write_record(screed.Record(name=name, sequence=trim_seq), outfp) |
0
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
76 |
4
5a2da133262a
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit f30bc6f7e10409dfa0dd65688e60da8b59f12464
iuc
parents:
0
diff
changeset
|
77 |
0
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
78 if __name__ == '__main__': |
c07433531fd3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
79 main() |