Mercurial > repos > iuc > khmer_extract_partitions
annotate filter-below-abund.py @ 4:7d8138b4a593 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit 19ea4feff5ccf3744c549b9a67259947a1cb90ba
author | iuc |
---|---|
date | Sat, 21 Jan 2017 14:43:04 -0500 |
parents | 18dc7b2d49d9 |
children | 518ba4a77274 |
rev | line source |
---|---|
0
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
1 #! /usr/bin/env python |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
2 # This file is part of khmer, https://github.com/dib-lab/khmer/, and is |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
3 # Copyright (C) 2011-2015, Michigan State University. |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
4 # Copyright (C) 2015, The Regents of the University of California. |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
5 # |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
6 # Redistribution and use in source and binary forms, with or without |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
7 # modification, are permitted provided that the following conditions are |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
8 # met: |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
9 # |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
10 # * Redistributions of source code must retain the above copyright |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
11 # notice, this list of conditions and the following disclaimer. |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
12 # |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
13 # * Redistributions in binary form must reproduce the above |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
14 # copyright notice, this list of conditions and the following |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
15 # disclaimer in the documentation and/or other materials provided |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
16 # with the distribution. |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
17 # |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
18 # * Neither the name of the Michigan State University nor the names |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
19 # of its contributors may be used to endorse or promote products |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
20 # derived from this software without specific prior written |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
21 # permission. |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
22 # |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
23 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
24 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
25 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
26 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
27 # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
28 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
29 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
30 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
31 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
32 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
33 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
34 # |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
35 # Contact: khmer-project@idyll.org |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
36 from __future__ import print_function |
4
7d8138b4a593
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit 19ea4feff5ccf3744c549b9a67259947a1cb90ba
iuc
parents:
3
diff
changeset
|
37 |
7d8138b4a593
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit 19ea4feff5ccf3744c549b9a67259947a1cb90ba
iuc
parents:
3
diff
changeset
|
38 import os |
0
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
39 import sys |
4
7d8138b4a593
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit 19ea4feff5ccf3744c549b9a67259947a1cb90ba
iuc
parents:
3
diff
changeset
|
40 |
0
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
41 import khmer |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
42 from khmer.thread_utils import ThreadedSequenceProcessor, verbose_fasta_iter |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
43 |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
44 WORKER_THREADS = 8 |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
45 GROUPSIZE = 100 |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
46 CUTOFF = 50 |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
47 |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
48 |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
49 def main(): |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
50 counting_ht = sys.argv[1] |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
51 infiles = sys.argv[2:] |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
52 |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
53 print('file with ht: %s' % counting_ht) |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
54 print('-- settings:') |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
55 print('N THREADS', WORKER_THREADS) |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
56 print('--') |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
57 |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
58 print('making hashtable') |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
59 ht = khmer.load_countgraph(counting_ht) |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
60 K = ht.ksize() |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
61 |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
62 for infile in infiles: |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
63 print('filtering', infile) |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
64 outfile = os.path.basename(infile) + '.below' |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
65 |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
66 outfp = open(outfile, 'w') |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
67 |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
68 def process_fn(record, ht=ht): |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
69 name = record['name'] |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
70 seq = record['sequence'] |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
71 if 'N' in seq: |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
72 return None, None |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
73 |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
74 trim_seq, trim_at = ht.trim_below_abundance(seq, CUTOFF) |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
75 |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
76 if trim_at >= K: |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
77 return name, trim_seq |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
78 |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
79 return None, None |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
80 |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
81 tsp = ThreadedSequenceProcessor(process_fn, WORKER_THREADS, GROUPSIZE) |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
82 |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
83 tsp.start(verbose_fasta_iter(infile), outfp) |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
84 |
3
18dc7b2d49d9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit f30bc6f7e10409dfa0dd65688e60da8b59f12464
iuc
parents:
0
diff
changeset
|
85 |
0
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
86 if __name__ == '__main__': |
d5a18dd63529
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/khmer/ commit be9a20423d1a6ec33d59341e0e61b535127bbce2
iuc
parents:
diff
changeset
|
87 main() |