Mercurial > repos > iuc > khmer_abundance_distribution
changeset 6:5a97c5bbd51e draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/khmer commit e0cd7ae10ce97bed51594e7cc0b969a803d698b7
author | iuc |
---|---|
date | Fri, 07 Sep 2018 10:59:06 -0400 |
parents | a02efb62565b |
children | e0de12ae3fc2 |
files | filter-below-abund.py macros.xml tool_dependencies.xml |
diffstat | 3 files changed, 14 insertions(+), 72 deletions(-) [+] |
line wrap: on
line diff
--- a/filter-below-abund.py Sat Jan 21 14:42:20 2017 -0500 +++ b/filter-below-abund.py Fri Sep 07 10:59:06 2018 -0400 @@ -38,11 +38,10 @@ import os import sys -import khmer -from khmer.thread_utils import ThreadedSequenceProcessor, verbose_fasta_iter +import screed +from khmer import Countgraph, ReadParser +from khmer.utils import (broken_paired_reader, write_record) -WORKER_THREADS = 8 -GROUPSIZE = 100 CUTOFF = 50 @@ -51,12 +50,9 @@ infiles = sys.argv[2:] print('file with ht: %s' % counting_ht) - print('-- settings:') - print('N THREADS', WORKER_THREADS) - print('--') print('making hashtable') - ht = khmer.load_countgraph(counting_ht) + ht = Countgraph.load(counting_ht) K = ht.ksize() for infile in infiles: @@ -65,22 +61,18 @@ outfp = open(outfile, 'w') - def process_fn(record, ht=ht): - name = record['name'] - seq = record['sequence'] + paired_iter = broken_paired_reader(ReadParser(infile), min_length=K, + force_single=True) + for n, is_pair, read1, read2 in paired_iter: + name = read1.name + seq = read1.sequence if 'N' in seq: return None, None trim_seq, trim_at = ht.trim_below_abundance(seq, CUTOFF) if trim_at >= K: - return name, trim_seq - - return None, None - - tsp = ThreadedSequenceProcessor(process_fn, WORKER_THREADS, GROUPSIZE) - - tsp.start(verbose_fasta_iter(infile), outfp) + write_record(screed.Record(name=name, sequence=trim_seq), outfp) if __name__ == '__main__':
--- a/macros.xml Sat Jan 21 14:42:20 2017 -0500 +++ b/macros.xml Fri Sep 07 10:59:06 2018 -0400 @@ -1,9 +1,7 @@ <macros> - <token name="@WRAPPER_VERSION@">2.0</token> + <token name="@WRAPPER_VERSION@">3.0.0a1</token> <xml name="requirements"> <requirements> - <requirement type="package" version="0.98">bz2file</requirement> - <requirement type="package" version="0.9">screed</requirement> <requirement type="package" version="@WRAPPER_VERSION@">khmer</requirement> </requirements> </xml> @@ -47,7 +45,7 @@ <when value="specific"> <param name="ksize" type="integer" value="20" label="k-mer size" help="k-mer size to use" /> <param name="n_tables" type="integer" min="1" value="4" label="n_tables" help="number of tables to use" /> - <param name="tablesize_specific" type="text" value="1000000.0" + <param name="tablesize_specific" type="text" value="1000000.0" label="tablesize" help="(--max-tablesize) upper bound on the tablesize to use" /> </when> </conditional> @@ -93,31 +91,7 @@ <token name="@HELP_FOOTER@"><![CDATA[ (from the khmer project: http://khmer.readthedocs.org/en/v2.0/ )]]></token> <xml name="software-citation"> - <citation type="bibtex">@article{khmer2015, - author = "Crusoe, Michael R. and Alameldin, Hussien F. and Awad, Sherine - and Bucher, Elmar and Caldwell, Adam and Cartwright, Reed and Charbonneau, - Amanda and Constantinides, Bede and Edvenson, Greg and Fay, Scott and Fenton, - Jacob and Fenzl, Thomas and Fish, Jordan and Garcia-Gutierrez, Leonor and - Garland, Phillip and Gluck, Jonathan and González, Iván and Guermond, Sarah - and Guo, Jiarong and Gupta, Aditi and Herr, Joshua R. and Howe, Adina and - Hyer, Alex and Härpfer, Andreas and Irber, Luiz and Kidd, Rhys and Lin, David - and Lippi, Justin and Mansour, Tamer and McA'Nulty, Pamela and McDonald, Eric - and Mizzi, Jessica and Murray, Kevin D. and Nahum, Joshua R. and Nanlohy, - Kaben and Nederbragt, Alexander Johan and Ortiz-Zuazaga, Humberto and Ory, - Jeramia and Pell, Jason and Pepe-Ranney, Charles and Russ, Zachary N and - Schwarz, Erich and Scott, Camille and Seaman, Josiah and Sievert, Scott and - Simpson, Jared and Skennerton, Connor T. and Spencer, James and Srinivasan, - Ramakrishnan and Standage, Daniel and Stapleton, James A. and Stein, Joe and - Steinman, Susan R and Taylor, Benjamin and Trimble, Will and Wiencko, Heather - L. and Wright, Michael and Wyss, Brian and Zhang, Qingpeng and zyme, en and - Brown, C. Titus" - title = "The khmer software package: enabling efficient nucleotide - sequence analysis", - year = "2015", - month = "08", - publisher = "F1000", - url = "http://dx.doi.org/10.12688/f1000research.6924.1" - }</citation> + <citation type="doi">10.12688/f1000research.6924.1</citation> </xml> <xml name="diginorm-citation"> <citation type="bibtex">@unpublished{diginorm, @@ -130,19 +104,7 @@ url = "http://arxiv.org/abs/1203.4802", }</citation></xml> <xml name="graph-citation"> - <citation type="bibtex">@article{Pell2012, - doi = {10.1073/pnas.1121464109}, - url = {http://dx.doi.org/10.1073/pnas.1121464109}, - year = {2012}, - month = {jul}, - publisher = {Proceedings of the National Academy of Sciences}, - volume = {109}, - number = {33}, - pages = {13272--13277}, - author = {J. Pell and A. Hintze and R. Canino-Koning and A. Howe and J. M. Tiedje and C. T. Brown}, - title = {Scaling metagenome sequence assembly with probabilistic de Bruijn graphs}, - journal = {Proceedings of the National Academy of Sciences} - }</citation> + <citation type="doi">10.1073/pnas.1121464109</citation> </xml> <xml name="counting-citation"> <citation type="doi">10.1371/journal.pone.0101271</citation>
--- a/tool_dependencies.xml Sat Jan 21 14:42:20 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="bz2file" version="0.98"> - <repository changeset_revision="73382c8fa8f3" name="package_bz2file_0_98" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> - <package name="screed" version="0.9"> - <repository changeset_revision="639e5e37404d" name="package_screed_0_9" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> - <package name="khmer" version="2.0"> - <repository changeset_revision="c02c1e1abd48" name="package_khmer_2_0" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency>