Mercurial > repos > iuc > stacks2_tsv2bam
changeset 2:bf0e43ab0416 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/check_bcfile.py Wed Jul 15 17:29:50 2020 -0400 @@ -0,0 +1,45 @@ +#!/usr/bin/env python + +import argparse +import sys + +parser = argparse.ArgumentParser() +parser.add_argument('bcfile', help='barcode file') +args = parser.parse_args() + +barcodes = [] + +with open(args.bcfile, "r") as fh: + for line in fh: + if len(line) == 0: + continue + if line.startswith("#"): + continue + barcodes.append(line.split()) + +if len(barcodes) <= 1: + sys.exit("barcode file is empty") + +# check that all lines have the same number of columns +ncol = None +for bc in barcodes: + if ncol is None: + ncol = len(bc) + elif ncol != len(bc): + sys.exit("barcode file has inconsistent number of columns") + +isname = False +for bc in barcodes: + if len(bc[-1].strip("ATCGatcg")) > 0: + isname = True + break + +names = set() +for bc in barcodes: + if isname: + n = bc[-1] + else: + n = '-'.join(bc) + if n in names: + sys.exit("duplicate sample %s in barcode file" % n) + names.add(n)
--- a/macros.xml Mon Sep 30 14:20:19 2019 -0400 +++ b/macros.xml Wed Jul 15 17:29:50 2020 -0400 @@ -3,12 +3,14 @@ <xml name="requirements"> <requirements> <requirement type="package" version="@STACKS_VERSION@">stacks</requirement> + <requirement type="package" version="3.7">python</requirement> + <requirement type="package" version="4.6.0">findutils</requirement> <yield/> </requirements> </xml> - <token name="@STACKS_VERSION@">2.4</token> - <token name="@WRAPPER_VERSION@">1</token> + <token name="@STACKS_VERSION@">2.53</token> + <token name="@WRAPPER_VERSION@">0</token> <!-- fix to 18.01 since https://github.com/galaxyproject/galaxy/pull/7032 --> <token name="@PROFILE@">18.01</token> @@ -107,16 +109,11 @@ <!-- log file handling --> <token name="@TEE_APPEND_LOG@"><![CDATA[ #if $output_log - 2>> '$output_log' && - #end if - ]]></token> - <token name="@CAT_LOG_TO_STDERR@"><![CDATA[ - #if $output_log - cat '$output_log' 2>&1 + 2> '$output_log' #end if ]]></token> <xml name="in_log"> - <param name="add_log" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Add log output as dataset" /> + <param name="add_log" type="boolean" checked="true" truevalue="yes" falsevalue="no" label="Add log output as dataset"/> </xml> <xml name="out_log"> <data format="txt" name="output_log" label="${tool.name} on ${on_string} log file"> @@ -126,19 +123,19 @@ <!-- inputs from previous pipeline steps --> <xml name="input_stacks_macro"> - <param name="input_stacks" format="tabular,txt" type="data_collection" collection_type="list" label="Loci and polymorphism" help="output from previous Stacks pipeline steps e.g. denovo_map, refmap or ustacks" /> + <param name="input_stacks" format="tabular,txt" type="data_collection" collection_type="list" label="Loci and polymorphism" help="output from previous Stacks pipeline steps e.g. denovo_map, refmap or ustacks"/> </xml> <xml name="input_cat_macro"> - <param name="input_cat" format="tabular,txt" type="data_collection" collection_type="list" label="Catalog of loci" help="output from a previous Stacks pipeline steps e.g. denovo_map, refmap or cstacks" /> + <param name="input_cat" format="tabular,txt" type="data_collection" collection_type="list" label="Catalog of loci" help="output from a previous Stacks pipeline steps e.g. denovo_map, refmap or cstacks"/> </xml> <xml name="input_matches_macro"> - <param name="input_matches" format="tabular,txt" type="data_collection" collection_type="list" label="Matches to the catalog" help="output from previous Stacks pipeline steps e.g. denovo_map, refmap or sstacks" /> + <param name="input_matches" format="tabular,txt" type="data_collection" collection_type="list" label="Matches to the catalog" help="output from previous Stacks pipeline steps e.g. denovo_map, refmap or sstacks"/> </xml> <xml name="bam_input_macro"> - <param name="input_bam" format="bam" type="data" multiple="true" optional="false" label="Aligned data" help="either the matches to the catalog (bam), i.e. tsv2bam, or reads aligned to a reference" /> + <param name="input_bam" format="bam" type="data" multiple="true" optional="false" label="Aligned data" help="either the matches to the catalog (bam), i.e. tsv2bam, or reads aligned to a reference"/> </xml> <xml name="input_aln_macro"> - <param name="input_aln" format="vcf,fasta.gz" type="data_collection" collection_type="list" label="Assembled contigs and variant sites" help="output from previous Stacks pipeline steps (e.g. gstacks, denovo_map, or refmap)" argument="-P" /> + <param name="input_aln" format="vcf,fasta.gz" type="data_collection" collection_type="list" label="Assembled contigs and variant sites" help="output from previous Stacks pipeline steps (e.g. gstacks, denovo_map, or refmap)" argument="-P"/> </xml> <!-- code for creating links to the data sets from previous pipeline steps @@ -195,15 +192,15 @@ <option value="paired">Paired-end files</option> </param> <when value="single"> - <param name="fqinputs" argument="-f" type="data" format="fastqsanger,fastqsanger.gz" multiple="@MULTIPLE@" label="Singles-end reads" /> + <param name="fqinputs" argument="-f" type="data" format="fastqsanger,fastqsanger.gz" multiple="@MULTIPLE@" label="Singles-end reads"/> <param name="barcode_encoding" type="select" label="Barcode location"> - <expand macro="barcode_encoding_single" type="Barcode" /> + <expand macro="barcode_encoding_single" type="Barcode"/> </param> </when> <when value="paired"> <param name="fqinputs" type="data_collection" collection_type="@LISTTYPE@" label="Paired-end reads" format="fastqsanger,fastqsanger.gz"/> <param name="barcode_encoding" type="select" label="Barcode location"> - <expand macro="barcode_encoding_pair" type="Barcode" /> + <expand macro="barcode_encoding_pair" type="Barcode"/> </param> </when> </conditional> @@ -212,7 +209,7 @@ <xml name="fastq_input_bc_file" token_multiple="false" token_listtype="paired"> <expand macro="fastq_input_bc" multiple="@MULTIPLE@" listtype="@LISTTYPE@"> - <param name="barcode" argument="-b" type="data" format="tabular,txt" label="Barcode file" /> + <param name="barcode" argument="-b" type="data" format="tabular,txt" label="Barcode file"/> </expand> </xml> @@ -392,27 +389,27 @@ <!-- TODO tags, snps, and alleles could go to sub collections; same for other tools --> <xml name="ustacks_outputs_macro" token_tooladd=""> <collection name="tabs" type="list" label="${tool.name} @TOOLADD@ on ${on_string} Loci and polymorphism"> - <discover_datasets pattern="(?P<name>(?!catalog).+\.tags)\.tsv$" ext="tabular" directory="stacks_outputs" /> - <discover_datasets pattern="(?P<name>(?!catalog).+\.snps)\.tsv$" ext="tabular" directory="stacks_outputs" /> - <discover_datasets pattern="(?P<name>(?!catalog).+\.alleles)\.tsv$" ext="tabular" directory="stacks_outputs" /> + <discover_datasets pattern="(?P<name>(?!catalog).+\.tags)\.tsv$" ext="tabular" directory="stacks_outputs"/> + <discover_datasets pattern="(?P<name>(?!catalog).+\.snps)\.tsv$" ext="tabular" directory="stacks_outputs"/> + <discover_datasets pattern="(?P<name>(?!catalog).+\.alleles)\.tsv$" ext="tabular" directory="stacks_outputs"/> </collection> </xml> <!-- cstacks outputs collection containing catalog.tags.tsv, catalog.snps.tsv, catalog.alleles.tsv --> <xml name="cstacks_outputs_macro" token_tooladd=""> <collection name="catalog" type="list" label="${tool.name} @TOOLADD@ on ${on_string} Catalog of loci"> - <discover_datasets pattern="(?P<name>catalog\.(tags|snps|alleles))\.tsv$" ext="tabular" directory="stacks_outputs" /> + <discover_datasets pattern="(?P<name>catalog\.(tags|snps|alleles))\.tsv$" ext="tabular" directory="stacks_outputs"/> </collection> </xml> <!-- sstacks outputs collection containing SAMPLE.matches.tsv --> <xml name="sstacks_outputs_macro" token_tooladd=""> <collection name="matches" type="list" label="${tool.name} @TOOLADD@ on ${on_string} Matches to the catalog"> - <discover_datasets pattern="(?P<name>.+\.matches)\.tsv$" ext="tabular" directory="stacks_outputs" /> + <discover_datasets pattern="(?P<name>.+\.matches)\.tsv$" ext="tabular" directory="stacks_outputs"/> </collection> </xml> <!-- tsv2bam outputs collection containing SAMPLE.matches.bam --> <xml name="tsv2bam_outputs_macro" token_tooladd=""> <collection name="bams" type="list" label="${tool.name} @TOOLADD@ on ${on_string} Matches to the catalog (bam)"> - <discover_datasets pattern="(?P<name>.+\.matches)\.bam$" ext="bam" directory="stacks_outputs" /> + <discover_datasets pattern="(?P<name>.+\.matches)\.bam$" ext="bam" directory="stacks_outputs"/> </collection> </xml> <!-- gstacks outputs collection containing catalog.calls.vcf and catalog.fa.gz @@ -423,7 +420,7 @@ <filter>add_log_distribs</filter> </data> <collection name="gstacks_alns_out" type="list" label="${tool.name} @TOOLADD@ on ${on_string} Read alignments"> - <discover_datasets pattern="(?P<name>.*).alns.bam$" ext="bam" directory="stacks_outputs" /> + <discover_datasets pattern="(?P<name>.*).alns.bam$" ext="bam" directory="stacks_outputs"/> <filter>mode_cond['mode_select'] == 'denovo' and mode_cond['advanced_cond']['advanced_select'] == "yes" and mode_cond['advanced_cond']['write_alignments'] != "" and popmap!=None</filter> </collection> <data name="gstacks_aln_out" format="bam" label="${tool.name} @TOOLADD@ on ${on_string} Read alignment" from_work_dir="stacks_outputs/alignments.bam"> @@ -432,24 +429,27 @@ </xml> <xml name="gstacks_outputs_macro" token_tooladd=""> <collection name="gstacks_out" type="list" label="${tool.name} @TOOLADD@ on ${on_string} Assembled contigs and variant sites"> - <discover_datasets pattern="(?P<name>catalog\.calls\.vcf)$" ext="vcf" directory="stacks_outputs" /> - <discover_datasets pattern="(?P<name>catalog\.fa\.gz)$" ext="fasta.gz" directory="stacks_outputs" /> + <discover_datasets pattern="(?P<name>catalog\.calls\.vcf)$" ext="vcf" directory="stacks_outputs"/> + <discover_datasets pattern="(?P<name>catalog\.fa\.gz)$" ext="fasta.gz" directory="stacks_outputs"/> </collection> </xml> <!-- default output of populations --> <xml name="populations_output_light" token_tooladd=""> - <data format="tabular" name="out_haplotypes" label="${tool.name} @TOOLADD@ on ${on_string} Raw Genotypes/Haplotypes" from_work_dir="stacks_outputs/populations.haplotypes.tsv" /> - <data format="tabular" name="out_hapstats" label="${tool.name} @TOOLADD@ on ${on_string} Population-level haplotype summary statistics" from_work_dir="stacks_outputs/populations.hapstats.tsv" /> - <data format="txt" name="out_populations_log_distribs" label="${tool.name} @TOOLADD@ on ${on_string} Populations log distributions" from_work_dir="stacks_outputs/populations.log.distribs" /> - <data format="tabular" name="out_sumstats_sum" label="${tool.name} @TOOLADD@ on ${on_string} Summary of Population-level summary statistics" from_work_dir="stacks_outputs/populations.sumstats_summary.tsv" /> - <data format="tabular" name="out_sumstats" label="${tool.name} @TOOLADD@ on ${on_string} Population-level summary statistics" from_work_dir="stacks_outputs/populations.sumstats.tsv" /> - <data format="tabular" name="out_sql" label="${tool.name} @TOOLADD@ on ${on_string} Genotyping markers" from_work_dir="stacks_outputs/populations.markers.tsv" /> + <data format="tabular" name="out_haplotypes" label="${tool.name} @TOOLADD@ on ${on_string} Raw Genotypes/Haplotypes" from_work_dir="stacks_outputs/populations.haplotypes.tsv"/> + <data format="tabular" name="out_hapstats" label="${tool.name} @TOOLADD@ on ${on_string} Population-level haplotype summary statistics" from_work_dir="stacks_outputs/populations.hapstats.tsv"/> + <data format="txt" name="out_populations_log_distribs" label="${tool.name} @TOOLADD@ on ${on_string} Populations log distributions" from_work_dir="stacks_outputs/populations.log.distribs"/> + <data format="tabular" name="out_sumstats_sum" label="${tool.name} @TOOLADD@ on ${on_string} Summary of Population-level summary statistics" from_work_dir="stacks_outputs/populations.sumstats_summary.tsv"/> + <data format="tabular" name="out_sumstats" label="${tool.name} @TOOLADD@ on ${on_string} Population-level summary statistics" from_work_dir="stacks_outputs/populations.sumstats.tsv"/> </xml> <xml name="populations_output_full"> <expand macro="populations_output_light"/> + <data format="txt" name="out_sql" label="${tool.name} @TOOLADD@ on ${on_string} Genotyping markers" from_work_dir="stacks_outputs/populations.sql.tsv"> + <filter>genetic_map_options['map_type'] and genetic_map_options['map_format']</filter> + </data> + <!-- log_fst_comp populations.fst_summary.tsv populations.phistats_summary.tsv populations.phistats.tsv--> <data format="tabular" name="out_phistats" label="${tool.name} on ${on_string} Phi_st statistics" from_work_dir="stacks_outputs/populations.phistats.tsv"> <filter>advanced_options['log_fst_comp'] and fstats_conditional['fstats']=='yes'</filter> @@ -533,6 +533,26 @@ </data> </xml> + <!-- fastq output for kmer/clone-filter --> + <xml name="fastq_output_filter"> + <data name="clean" format_source="fqinputs" label="${tool.name} on ${on_string}"> + <filter>input_type['input_type_select'] == 'single'</filter> + <yield/> + </data> + <collection name="clean_pair" type="paired" format_source="fqinputs" label="${tool.name} on ${on_string}"> + <filter>input_type['input_type_select'] == 'paired'</filter> + <yield/> + </collection> + <data name="discarded" format_source="fqinputs" label="${tool.name} on ${on_string}: discarded reads"> + <filter>capture and input_type['input_type_select'] == 'single'</filter> + <yield/> + </data> + <collection name="discarded_pair" format_source="fqinputs" type="paired" label="${tool.name} on ${on_string}: discarded reads"> + <filter>capture and input_type['input_type_select'] == 'paired'</filter> + <yield/> + </collection> + </xml> + <xml name="snp_options_alpha"> <param argument="--alpha" type="select" label="Chi square significance level required to call a heterozygote or homozygote" > <option value="0.1">0.1</option> @@ -554,7 +574,7 @@ </when> <when value="bounded"> <param argument="--bound_low" type="float" value="0.0" min="0.0" max="1.0" label="Lower bound for epsilon, the error rate" help="between 0 and 1.0"/> - <param argument="--bound_high" type="float" value="1.0" min="0.0" max="1.0" label="Upper bound for epsilon, the error rate" help="between 0 and 1.0" /> + <param argument="--bound_high" type="float" value="1.0" min="0.0" max="1.0" label="Upper bound for epsilon, the error rate" help="between 0 and 1.0"/> <expand macro="snp_options_alpha"/> </when> <when value="fixed"> @@ -574,8 +594,8 @@ "Error: No value was provided for \-\-var-alpha and there is no default for this model)" --> <xml name="variant_calling_options_vg" token_varalpha_default=""> - <param argument="--var-alpha" name="var_alpha" type="float" value="@VARALPHA_DEFAULT@" min="0" label="Alpha threshold for discovering SNPs" help="Default is 0.01 if the marukilow model is used (which is the case in refmap and denovomap), otherwise no default value is available." /> - <param argument="--gt-alpha" name="gt_alpha" type="float" value="0.05" min="0" label="Alpha threshold for calling genotypes" /> + <param argument="--var-alpha" name="var_alpha" type="float" value="@VARALPHA_DEFAULT@" min="0" label="Alpha threshold for discovering SNPs" help="Default is 0.01 if the marukilow model is used (which is the case in refmap and denovomap), otherwise no default value is available."/> + <param argument="--gt-alpha" name="gt_alpha" type="float" value="0.05" min="0" label="Alpha threshold for calling genotypes"/> </xml> <xml name="barcode_encoding_single" token_type="">
--- a/macros_process.xml Mon Sep 30 14:20:19 2019 -0400 +++ b/macros_process.xml Wed Jul 15 17:29:50 2020 -0400 @@ -29,12 +29,12 @@ <xml name="discover_faqgz_output_macro" token_pattern="" token_dir=""> <expand macro="discover_faq_output_macro" pattern="@PATTERN@" dir="@DIR@"/> - <discover_datasets pattern="@PATTERN@\.fq\.gz$" ext="fastqsanger.gz" directory="@DIR@/" /> - <discover_datasets pattern="@PATTERN@\.fa\.gz$" ext="fasta.gz" directory="@DIR@/" /> + <discover_datasets pattern="@PATTERN@\.fq\.gz$" ext="fastqsanger.gz" directory="@DIR@/"/> + <discover_datasets pattern="@PATTERN@\.fa\.gz$" ext="fasta.gz" directory="@DIR@/"/> </xml> <xml name="discover_faq_output_macro" token_pattern="" token_dir=""> - <discover_datasets pattern="@PATTERN@\.fq$" ext="fastqsanger" directory="@DIR@/" /> - <discover_datasets pattern="@PATTERN@\.fa$" ext="fasta" directory="@DIR@/" /> + <discover_datasets pattern="@PATTERN@\.fq$" ext="fastqsanger" directory="@DIR@/"/> + <discover_datasets pattern="@PATTERN@\.fa$" ext="fasta" directory="@DIR@/"/> </xml> <xml name="process_outputs"> @@ -71,17 +71,17 @@ <option value="no" selected="true">No</option> </param> <when value="yes"> - <param name="sliding" type="float" value="0.15" min="0" max="1" argument="-w" label="Set the size of the sliding window as a fraction of the read length, between 0 and 1" /> - <param name="score" type="integer" value="10" min="0" max="40" argument="-s" label="Set the score limit. If the average score within the sliding window drops below this value, the read is discarded" /> - <param name="remove" type="boolean" checked="false" truevalue="-c" falsevalue="" argument="-c" label="Clean data, remove any read with an uncalled base" /> + <param name="sliding" type="float" value="0.15" min="0" max="1" argument="-w" label="Set the size of the sliding window as a fraction of the read length, between 0 and 1"/> + <param name="score" type="integer" value="10" min="0" max="40" argument="-s" label="Set the score limit. If the average score within the sliding window drops below this value, the read is discarded"/> + <param name="remove" type="boolean" checked="false" truevalue="-c" falsevalue="" argument="-c" label="Clean data, remove any read with an uncalled base"/> <param name="discard" type="boolean" checked="false" truevalue="-q" falsevalue="" argument="-q" label="Discard reads with low quality scores"/> - <param argument="--filter-illumina" name="filter_illumina" type="boolean" checked="false" truevalue="--filter-illumina" falsevalue="" label="Discard reads that have been marked by Illumina's chastity/purity filter as failing" /> + <param argument="--filter-illumina" name="filter_illumina" type="boolean" checked="false" truevalue="--filter-illumina" falsevalue="" label="Discard reads that have been marked by Illumina's chastity/purity filter as failing"/> </when> <when value="no"> <param argument="--len_limit" type="integer" value="" optional="true" label="Minimum sequence length" help="useful if your data has already been trimmed"/> </when> </conditional> - <param name="capture" type="boolean" checked="false" truevalue="-D" falsevalue="" argument="-D" label="Capture discarded reads to a file" /> + <param name="capture" type="boolean" checked="false" truevalue="-D" falsevalue="" argument="-D" label="Capture discarded reads to a file"/> </xml> <token name="@PROCESS_FILTER@"><![CDATA[ #if $filter_cond.filter_select == 'yes': @@ -104,30 +104,30 @@ ## fix the _R[12]_0 that was added for preparing the input #if $input_type.input_type_select == 'paired': - && find stacks_outputs/discarded/ -type f | while read file; do mv "\$file" "\$(echo \$file | sed 's/_R1_0/.1/; s/_R2_0/.2/;')"; done + && (find stacks_outputs/discarded/ -type f | while read file; do mv "\$file" "\$(echo \$file | sed 's/_R1_0/.1/; s/_R2_0/.2/;')"; done) #end if ## also remove the gz which is added by procrad (but its uncompressed) - && find stacks_outputs/discarded/ -type f -iname "*.gz.discards" | while read file; do mv "\$file" "\$(echo \$file | sed 's/.gz.discards$/.discards/;')"; done + && (find stacks_outputs/discarded/ -type f -iname "*.gz.discards" | while read file; do mv "\$file" "\$(echo \$file | sed 's/.gz.discards$/.discards/;')"; done) ## the discard files are named fastq even if the output is fasta #if str($outype).endswith("fasta"): - && find stacks_outputs/discarded/ -type f | while read file; do mv "\$file" "\$(echo \$file | sed 's/\.fastq.discards/.fa/;')"; done + && (find stacks_outputs/discarded/ -type f | while read file; do mv "\$file" "\$(echo \$file | sed 's/\.fastq.discards/.fa/;')"; done) #else - && find stacks_outputs/discarded/ -type f | while read file; do mv "\$file" "\$(echo \$file | sed 's/\.fastq.discards/.fq/;')"; done + && (find stacks_outputs/discarded/ -type f | while read file; do mv "\$file" "\$(echo \$file | sed 's/\.fastq.discards/.fq/;')"; done) #end if #end if ## prepare paired read output for processing in galaxy #if $input_type.input_type_select == 'paired': && mkdir stacks_outputs/remaining - && find stacks_outputs -iregex ".*\.rem\.[12]\.f[aq]\(\.gz\)?" | while read file; do mv "\$file" stacks_outputs/remaining/; done - && find stacks_outputs/ -iregex ".*.f[aq]\(\.gz\)?" | while read file; do mv "\$file" "\$(echo \$file | sed 's/\.1\./.forward./; s/\.2\./.reverse./')"; done + && (find stacks_outputs -iregex ".*\.rem\.[12]\.f[aq]\(\.gz\)?" | while read file; do mv "\$file" stacks_outputs/remaining/; done) + && (find stacks_outputs/ -iregex ".*.f[aq]\(\.gz\)?" | while read file; do mv "\$file" "\$(echo \$file | sed 's/\.1\./.forward./; s/\.2\./.reverse./')"; done) #end if ]]></token> <!-- adapter trimming options --> <xml name="process_adapter"> - <param argument="--adapter_1" type="text" value="" optional="true" label="Adaptor sequence that may occur on the first read" /> - <param argument="--adapter_2" type="text" value="" optional="true" label="Adaptor sequence that may occur on the paired-read" /> + <param argument="--adapter_1" type="text" value="" optional="true" label="Adaptor sequence that may occur on the first read"/> + <param argument="--adapter_2" type="text" value="" optional="true" label="Adaptor sequence that may occur on the paired-read"/> <param argument="--adapter_mm" type="integer" value="" optional="true" label="Number of mismatches allowed in the adapter sequence"/> </xml> <token name="@PROCESS_ADAPTER@"><![CDATA[ @@ -171,8 +171,8 @@ <!-- advanced options that are shared --> <xml name="common_advanced"> - <param name="truncate" type="integer" value="" optional="True" argument="-t" label="Truncate final read length to this value" /> - <param argument="--retain_header" type="boolean" checked="false" truevalue="--retain_header" falsevalue="" label="Retain unmodified FASTQ headers in the output" /> + <param name="truncate" type="integer" value="" optional="True" argument="-t" label="Truncate final read length to this value"/> + <param argument="--retain_header" type="boolean" checked="false" truevalue="--retain_header" falsevalue="" label="Retain unmodified FASTQ headers in the output"/> </xml> <token name="@COMMON_ADVANCED@"><![CDATA[ #if str($options_advanced.truncate) @@ -181,3 +181,4 @@ $options_advanced.retain_header ]]></token> </macros> +
--- a/stacks_tsv2bam.xml Mon Sep 30 14:20:19 2019 -0400 +++ b/stacks_tsv2bam.xml Wed Jul 15 17:29:50 2020 -0400 @@ -56,7 +56,7 @@ <expand macro="input_matches_macro"/> <!-- TODO add BAM? --> <expand macro="fastq_input" fastq_optional="true" se_option="reverse reads" help="Paired end data or reverse reads. If a paired list is provided only the reverse reads are used in tsv2bam. Leave selection empty if you analyse single end data."/> - <param name="popmap" type="data" format="tabular,txt" label="Population map" help="If set, matching will be done only for samples listed in this file" optional="true" argument="-M" /> + <param name="popmap" type="data" format="tabular,txt" label="Population map" help="If set, matching will be done only for samples listed in this file" optional="true" argument="-M"/> <expand macro="in_log"/> </inputs> @@ -67,31 +67,31 @@ <tests> <!-- test wo paired end data --> - <test> + <test expect_num_outputs="2"> <param name="input_cat"> <collection type="list"> - <element name="catalog.alleles" ftype="tabular" value="cstacks/catalog.alleles.tsv" /> - <element name="catalog.snps" ftype="tabular" value="cstacks/catalog.snps.tsv" /> - <element name="catalog.tags" ftype="tabular" value="cstacks/catalog.tags.tsv" /> + <element name="catalog.alleles" ftype="tabular" value="cstacks/catalog.alleles.tsv"/> + <element name="catalog.snps" ftype="tabular" value="cstacks/catalog.snps.tsv"/> + <element name="catalog.tags" ftype="tabular" value="cstacks/catalog.tags.tsv"/> </collection> </param> <param name="input_stacks"> <collection type="list"> - <element name="PopA_01.alleles" ftype="tabular" value="ustacks/PopA_01.alleles.tsv" /> - <element name="PopA_01.snps" ftype="tabular" value="ustacks/PopA_01.snps.tsv" /> - <element name="PopA_01.tags" ftype="tabular" value="ustacks/PopA_01.tags.tsv" /> - <element name="PopA_02.alleles" ftype="tabular" value="ustacks/PopA_02.alleles.tsv" /> - <element name="PopA_02.snps" ftype="tabular" value="ustacks/PopA_02.snps.tsv" /> - <element name="PopA_02.tags" ftype="tabular" value="ustacks/PopA_02.tags.tsv" /> + <element name="PopA_01.alleles" ftype="tabular" value="ustacks/PopA_01.alleles.tsv"/> + <element name="PopA_01.snps" ftype="tabular" value="ustacks/PopA_01.snps.tsv"/> + <element name="PopA_01.tags" ftype="tabular" value="ustacks/PopA_01.tags.tsv"/> + <element name="PopA_02.alleles" ftype="tabular" value="ustacks/PopA_02.alleles.tsv"/> + <element name="PopA_02.snps" ftype="tabular" value="ustacks/PopA_02.snps.tsv"/> + <element name="PopA_02.tags" ftype="tabular" value="ustacks/PopA_02.tags.tsv"/> </collection> </param> <param name="input_matches"> <collection type="list"> - <element name="PopA_01.matches" ftype="tabular" value="sstacks/PopA_01.matches.tsv" /> - <element name="PopA_02.matches" ftype="tabular" value="sstacks/PopA_02.matches.tsv" /> + <element name="PopA_01.matches" ftype="tabular" value="sstacks/PopA_01.matches.tsv"/> + <element name="PopA_02.matches" ftype="tabular" value="sstacks/PopA_02.matches.tsv"/> </collection> </param> - <param name="add_log" value="yes" /> + <param name="add_log" value="yes"/> <output name="output_log" ftype="txt" file="tsv2bam/tsv2bam.log" lines_diff="14"/> <output_collection name="bams" type="list" count="2"> <element name="PopA_01.matches" file="tsv2bam/PopA_01.matches.bam" ftype="bam"/> @@ -99,96 +99,96 @@ </output_collection> </test> <!-- test w popmap, w reverse reads as multiple selection --> - <test> + <test expect_num_outputs="2"> <param name="input_cat"> <collection type="list"> - <element name="catalog.alleles" ftype="tabular" value="cstacks/catalog.alleles.tsv" /> - <element name="catalog.snps" ftype="tabular" value="cstacks/catalog.snps.tsv" /> - <element name="catalog.tags" ftype="tabular" value="cstacks/catalog.tags.tsv" /> + <element name="catalog.alleles" ftype="tabular" value="cstacks/catalog.alleles.tsv"/> + <element name="catalog.snps" ftype="tabular" value="cstacks/catalog.snps.tsv"/> + <element name="catalog.tags" ftype="tabular" value="cstacks/catalog.tags.tsv"/> </collection> </param> <param name="input_stacks"> <collection type="list"> - <element name="PopA_01.alleles" ftype="tabular" value="ustacks/PopA_01.alleles.tsv" /> - <element name="PopA_01.snps" ftype="tabular" value="ustacks/PopA_01.snps.tsv" /> - <element name="PopA_01.tags" ftype="tabular" value="ustacks/PopA_01.tags.tsv" /> - <element name="PopA_02.alleles" ftype="tabular" value="ustacks/PopA_02.alleles.tsv" /> - <element name="PopA_02.snps" ftype="tabular" value="ustacks/PopA_02.snps.tsv" /> - <element name="PopA_02.tags" ftype="tabular" value="ustacks/PopA_02.tags.tsv" /> + <element name="PopA_01.alleles" ftype="tabular" value="ustacks/PopA_01.alleles.tsv"/> + <element name="PopA_01.snps" ftype="tabular" value="ustacks/PopA_01.snps.tsv"/> + <element name="PopA_01.tags" ftype="tabular" value="ustacks/PopA_01.tags.tsv"/> + <element name="PopA_02.alleles" ftype="tabular" value="ustacks/PopA_02.alleles.tsv"/> + <element name="PopA_02.snps" ftype="tabular" value="ustacks/PopA_02.snps.tsv"/> + <element name="PopA_02.tags" ftype="tabular" value="ustacks/PopA_02.tags.tsv"/> </collection> </param> <param name="input_matches"> <collection type="list"> - <element name="PopA_01.matches" ftype="tabular" value="sstacks/PopA_01.matches.tsv" /> - <element name="PopA_02.matches" ftype="tabular" value="sstacks/PopA_02.matches.tsv" /> + <element name="PopA_01.matches" ftype="tabular" value="sstacks/PopA_01.matches.tsv"/> + <element name="PopA_02.matches" ftype="tabular" value="sstacks/PopA_02.matches.tsv"/> </collection> </param> - <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv" /> + <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv"/> <param name="input_type|input_type_select" value="single"/> - <param name="input_type|fqinputs" value="demultiplexed/PopA_01.2.fq,demultiplexed/PopA_02.2.fq" ftype="fastqsanger" /> - <param name="add_log" value="yes" /> - <output name="output_log"><assert_contents><has_text text="done." /></assert_contents></output> + <param name="input_type|fqinputs" value="demultiplexed/PopA_01.2.fq,demultiplexed/PopA_02.2.fq" ftype="fastqsanger"/> + <param name="add_log" value="yes"/> + <output name="output_log"><assert_contents><has_text text="done."/></assert_contents></output> <output_collection name="bams" type="list" count="2"/> </test> <!-- test w reverse reads as list --> - <test> + <test expect_num_outputs="2"> <param name="input_cat"> <collection type="list"> - <element name="catalog.alleles" ftype="tabular" value="cstacks/catalog.alleles.tsv" /> - <element name="catalog.snps" ftype="tabular" value="cstacks/catalog.snps.tsv" /> - <element name="catalog.tags" ftype="tabular" value="cstacks/catalog.tags.tsv" /> + <element name="catalog.alleles" ftype="tabular" value="cstacks/catalog.alleles.tsv"/> + <element name="catalog.snps" ftype="tabular" value="cstacks/catalog.snps.tsv"/> + <element name="catalog.tags" ftype="tabular" value="cstacks/catalog.tags.tsv"/> </collection> </param> <param name="input_stacks"> <collection type="list"> - <element name="PopA_01.alleles" ftype="tabular" value="ustacks/PopA_01.alleles.tsv" /> - <element name="PopA_01.snps" ftype="tabular" value="ustacks/PopA_01.snps.tsv" /> - <element name="PopA_01.tags" ftype="tabular" value="ustacks/PopA_01.tags.tsv" /> - <element name="PopA_02.alleles" ftype="tabular" value="ustacks/PopA_02.alleles.tsv" /> - <element name="PopA_02.snps" ftype="tabular" value="ustacks/PopA_02.snps.tsv" /> - <element name="PopA_02.tags" ftype="tabular" value="ustacks/PopA_02.tags.tsv" /> + <element name="PopA_01.alleles" ftype="tabular" value="ustacks/PopA_01.alleles.tsv"/> + <element name="PopA_01.snps" ftype="tabular" value="ustacks/PopA_01.snps.tsv"/> + <element name="PopA_01.tags" ftype="tabular" value="ustacks/PopA_01.tags.tsv"/> + <element name="PopA_02.alleles" ftype="tabular" value="ustacks/PopA_02.alleles.tsv"/> + <element name="PopA_02.snps" ftype="tabular" value="ustacks/PopA_02.snps.tsv"/> + <element name="PopA_02.tags" ftype="tabular" value="ustacks/PopA_02.tags.tsv"/> </collection> </param> <param name="input_matches"> <collection type="list"> - <element name="PopA_01.matches" ftype="tabular" value="sstacks/PopA_01.matches.tsv" /> - <element name="PopA_02.matches" ftype="tabular" value="sstacks/PopA_02.matches.tsv" /> + <element name="PopA_01.matches" ftype="tabular" value="sstacks/PopA_01.matches.tsv"/> + <element name="PopA_02.matches" ftype="tabular" value="sstacks/PopA_02.matches.tsv"/> </collection> </param> <param name="input_type|input_type_select" value="paired"/> <param name="input_type|fqinputs"> <collection type="list"> - <element name="PopA_01" value="demultiplexed/PopA_01.2.fq" ftype="fastqsanger" /> + <element name="PopA_01" value="demultiplexed/PopA_01.2.fq" ftype="fastqsanger"/> <element name="PopA_02" value="demultiplexed/PopA_02.2.fq" ftype="fastqsanger"/> </collection> </param> - <param name="add_log" value="yes" /> - <output name="output_log"><assert_contents><has_text text="done." /></assert_contents></output> + <param name="add_log" value="yes"/> + <output name="output_log"><assert_contents><has_text text="done."/></assert_contents></output> <output_collection name="bams" type="list" count="2"/> </test> <!-- test w paired reads as paired dataset list --> - <test> + <test expect_num_outputs="2"> <param name="input_cat"> <collection type="list"> - <element name="catalog.alleles" ftype="tabular" value="cstacks/catalog.alleles.tsv" /> - <element name="catalog.snps" ftype="tabular" value="cstacks/catalog.snps.tsv" /> - <element name="catalog.tags" ftype="tabular" value="cstacks/catalog.tags.tsv" /> + <element name="catalog.alleles" ftype="tabular" value="cstacks/catalog.alleles.tsv"/> + <element name="catalog.snps" ftype="tabular" value="cstacks/catalog.snps.tsv"/> + <element name="catalog.tags" ftype="tabular" value="cstacks/catalog.tags.tsv"/> </collection> </param> <param name="input_stacks"> <collection type="list"> - <element name="PopA_01.alleles" ftype="tabular" value="ustacks/PopA_01.alleles.tsv" /> - <element name="PopA_01.snps" ftype="tabular" value="ustacks/PopA_01.snps.tsv" /> - <element name="PopA_01.tags" ftype="tabular" value="ustacks/PopA_01.tags.tsv" /> - <element name="PopA_02.alleles" ftype="tabular" value="ustacks/PopA_02.alleles.tsv" /> - <element name="PopA_02.snps" ftype="tabular" value="ustacks/PopA_02.snps.tsv" /> - <element name="PopA_02.tags" ftype="tabular" value="ustacks/PopA_02.tags.tsv" /> + <element name="PopA_01.alleles" ftype="tabular" value="ustacks/PopA_01.alleles.tsv"/> + <element name="PopA_01.snps" ftype="tabular" value="ustacks/PopA_01.snps.tsv"/> + <element name="PopA_01.tags" ftype="tabular" value="ustacks/PopA_01.tags.tsv"/> + <element name="PopA_02.alleles" ftype="tabular" value="ustacks/PopA_02.alleles.tsv"/> + <element name="PopA_02.snps" ftype="tabular" value="ustacks/PopA_02.snps.tsv"/> + <element name="PopA_02.tags" ftype="tabular" value="ustacks/PopA_02.tags.tsv"/> </collection> </param> <param name="input_matches"> <collection type="list"> - <element name="PopA_01.matches" ftype="tabular" value="sstacks/PopA_01.matches.tsv" /> - <element name="PopA_02.matches" ftype="tabular" value="sstacks/PopA_02.matches.tsv" /> + <element name="PopA_01.matches" ftype="tabular" value="sstacks/PopA_01.matches.tsv"/> + <element name="PopA_02.matches" ftype="tabular" value="sstacks/PopA_02.matches.tsv"/> </collection> </param> <param name="input_type|input_type_select" value="paired"/> @@ -196,20 +196,20 @@ <collection type="list:paired"> <element name="PopA_01"> <collection type="paired"> - <element name="forward" value="demultiplexed/PopA_01.1.fq" ftype="fastqsanger" /> + <element name="forward" value="demultiplexed/PopA_01.1.fq" ftype="fastqsanger"/> <element name="reverse" value="demultiplexed/PopA_01.2.fq" ftype="fastqsanger"/> </collection> </element> <element name="PopA_02"> <collection type="paired"> - <element name="forward" value="demultiplexed/PopA_02.1.fq" ftype="fastqsanger" /> + <element name="forward" value="demultiplexed/PopA_02.1.fq" ftype="fastqsanger"/> <element name="reverse" value="demultiplexed/PopA_02.2.fq" ftype="fastqsanger"/> </collection> </element> </collection> </param> - <param name="add_log" value="yes" /> - <output name="output_log"><assert_contents><has_text text="done." /></assert_contents></output> + <param name="add_log" value="yes"/> + <output name="output_log"><assert_contents><has_text text="done."/></assert_contents></output> <output_collection name="bams" type="list" count="2"/> </test> </tests> @@ -251,5 +251,5 @@ @STACKS_INFOS@ ]]> </help> - <expand macro="citation" /> + <expand macro="citation"/> </tool>
--- a/test-data/cstacks/catalog.alleles.tsv Mon Sep 30 14:20:19 2019 -0400 +++ b/test-data/cstacks/catalog.alleles.tsv Wed Jul 15 17:29:50 2020 -0400 @@ -1,3 +1,4 @@ -# cstacks version 2.4; catalog generated on 2019-06-18 10:34:45 +# cstacks version 2.52; catalog generated on 2020-03-16 15:39:40 0 1 AC 0 0 0 1 CA 0 0 +# cstacks completed on 2020-03-16 15:39:40
--- a/test-data/cstacks/catalog.snps.tsv Mon Sep 30 14:20:19 2019 -0400 +++ b/test-data/cstacks/catalog.snps.tsv Wed Jul 15 17:29:50 2020 -0400 @@ -1,3 +1,4 @@ -# cstacks version 2.4; catalog generated on 2019-06-18 10:34:45 +# cstacks version 2.52; catalog generated on 2020-03-16 15:39:40 0 1 33 E 0 A C - - 0 1 88 E 0 A C - - +# cstacks completed on 2020-03-16 15:39:40
--- a/test-data/cstacks/catalog.tags.tsv Mon Sep 30 14:20:19 2019 -0400 +++ b/test-data/cstacks/catalog.tags.tsv Wed Jul 15 17:29:50 2020 -0400 @@ -1,4 +1,5 @@ -# cstacks version 2.4; catalog generated on 2019-06-18 10:34:45 +# cstacks version 2.52; catalog generated on 2020-03-16 15:39:40 0 1 consensus 0 1_1,2_1 AATTCGTTTGCTGCTTCAGGAATCTCTCGTATAATCTGAGTATGTGCGTACGTACGCTATTTAGATGGATAACCGACGCTGCCAGACGAGAGAC 0 0 0 0 2 consensus 0 1_2,2_2 AATTCGGCTTGCAACGCAAGTGACGATTCCCACGGACATAACTGATCTAAGTAACTTCCAAATCTGGGAATGGGATTTCATAATTAAGGACTAT 0 0 0 0 3 consensus 0 1_3,2_3 AATTCTCTACACCACAGCATCAATTCTAAAAATGACTACCAGAGAGACAACTCCGCAGTTAAACACTCTGACTGCCACGCCAGCTACCTCTAGA 0 0 0 +# cstacks completed on 2020-03-16 15:39:40
--- a/test-data/denovo_map/denovo_map.log Mon Sep 30 14:20:19 2019 -0400 +++ b/test-data/denovo_map/denovo_map.log Wed Jul 15 17:29:50 2020 -0400 @@ -1,5 +1,5 @@ -denovo_map.pl version 2.4 started at 2019-06-18 10:34:45 -/home/berntm/miniconda3/envs/mulled-v1-2b57e7596f85ebb3b321e6c9681e8fd9250523a80d97945c46ac7743359454e7/bin/denovo_map.pl --samples demultiplexed --popmap denovo_map/popmap_cstacks.tsv -o stacks_outputs --paired +denovo_map.pl version 2.52 started at 2020-03-16 15:39:40 +/home/berntm/miniconda3/envs/__stacks@2.52/bin/denovo_map.pl --samples demultiplexed --popmap denovo_map/popmap_cstacks.tsv -o stacks_outputs --paired ustacks ========== @@ -110,7 +110,7 @@ cstacks ========== -cstacks -P stacks_outputs -M denovo_map/popmap_cstacks.tsv +cstacks -M denovo_map/popmap_cstacks.tsv -P stacks_outputs cstacks parameters selected: Loci matched based on sequence identity. @@ -279,8 +279,7 @@ Working on 1 group(s) of populations: defaultgrp: 1 -Genotyping markers will be written to 'stacks_outputs/populations.markers.tsv' -Raw Genotypes/Haplotypes will be written to 'stacks_outputs/populations.haplotypes.tsv' +Raw haplotypes will be written to 'stacks_outputs/populations.haplotypes.tsv' Population-level summary statistics will be written to 'stacks_outputs/populations.sumstats.tsv' Population-level haplotype summary statistics will be written to 'stacks_outputs/populations.hapstats.tsv' @@ -306,4 +305,4 @@ Populations is done. denovo_map.pl is done. -denovo_map.pl completed at 2019-06-18 10:34:45 +denovo_map.pl completed at 2020-03-16 15:39:40
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/denovo_map/popmap_cstacks_genotypes.tsv Wed Jul 15 17:29:50 2020 -0400 @@ -0,0 +1,2 @@ +PopA_01 parent +PopA_02 progeny
--- a/test-data/gentest.sh Mon Sep 30 14:20:19 2019 -0400 +++ b/test-data/gentest.sh Wed Jul 15 17:29:50 2020 -0400 @@ -1,7 +1,11 @@ #!/usr/bin/env bash +eval "$(conda shell.bash hook)" +conda activate __stacks@2.52 + + + mkdir stacks_outputs - denovo_map.pl --samples demultiplexed --popmap denovo_map/popmap_cstacks.tsv -o stacks_outputs --paired && gunzip -c stacks_outputs/catalog.calls > stacks_outputs/catalog.calls.vcf rm stacks_outputs/catalog.calls
--- a/test-data/gstacks/catalog.calls.vcf Mon Sep 30 14:20:19 2019 -0400 +++ b/test-data/gstacks/catalog.calls.vcf Wed Jul 15 17:29:50 2020 -0400 @@ -1,6 +1,6 @@ ##fileformat=VCFv4.2 -##fileDate=20190618 -##source="Stacks v2.4" +##fileDate=20200316 +##source="Stacks v2.52" ##INFO=<ID=AD,Number=R,Type=Integer,Description="Total Depth for Each Allele"> ##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency"> ##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
--- a/test-data/gstacks/gstacks.log Mon Sep 30 14:20:19 2019 -0400 +++ b/test-data/gstacks/gstacks.log Wed Jul 15 17:29:50 2020 -0400 @@ -1,4 +1,4 @@ -gstacks v2.4, executed 2019-06-18 10:34:45 (zlib-1.2.11) +gstacks v2.52, executed 2020-03-16 15:39:40 (zlib-1.2.11) gstacks -P stacks_outputs -M denovo_map/popmap_cstacks.tsv Locus/sample distributions will be written to 'stacks_outputs/gstacks.log.distribs'.
--- a/test-data/gstacks/gstacks.log.distribs Mon Sep 30 14:20:19 2019 -0400 +++ b/test-data/gstacks/gstacks.log.distribs Wed Jul 15 17:29:50 2020 -0400 @@ -19,25 +19,25 @@ Num. threads: 1 Parallel time: 0.0 Average thread time spent: - 0.0 reading (3.1%) - 0.0 processing (95.2%) - 0.0 pre-alignments block (72.2%) - 0.0 reformatting fw-reads (0.1%) - 0.0 assembling (22.2%) - 0.0 initializing alignments (5.4%) - 0.0 aligning (42.9%) - 0.0 merging read pairs (1.5%) - 0.0 post-alignments block (21.2%) + 0.0 reading (3.0%) + 0.0 processing (95.7%) + 0.0 pre-alignments block (74.4%) + 0.0 reformatting fw-reads (0.2%) + 0.0 assembling (27.3%) + 0.0 initializing alignments (8.7%) + 0.0 aligning (36.6%) + 0.0 merging read pairs (1.6%) + 0.0 post-alignments block (19.8%) 0.0 filtering reads (0.0%) - 0.0 counting nucleotides (3.5%) - 0.0 genotyping (1.9%) - 0.0 haplotyping (1.0%) + 0.0 counting nucleotides (3.2%) + 0.0 genotyping (1.5%) + 0.0 haplotyping (0.9%) 0.0 computing consensus (0.1%) 0.0 building_fa (0.1%) - 0.0 building_vcf (14.6%) - 0.0 writing_fa (0.1%) - 0.0 writing_vcf (1.3%) - 0.0 clocking (0.2%) -Total time spent writing vcf: 0.0 (1.3%) + 0.0 building_vcf (14.0%) + 0.0 writing_fa (0.0%) + 0.0 writing_vcf (0.9%) + 0.0 clocking (0.1%) +Total time spent writing vcf: 0.0 (0.8%) VCFwrite block size: mean=1.0(n=3); max=1 END clockings
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/kmerfilter/Removed1_0001.1.1.fq.single.gz Wed Jul 15 17:29:50 2020 -0400 @@ -0,0 +1,20 @@ +@K00392:16:HJ2G2BBXX:7:1102:10825:462401 1:N:0:ATCACG +GGACATTGGCTGCAGTACTCTGACCCTGGCCACCTCAACCTGTCTCTCTCGCACCGGAAACCTCCGATCCGCAGAAACATGAGCAACCCTACAGTTGACACACACAACAGTTTTCGACCGAAACTACACATTCCTCTGTCCCATTTCCTC ++ +--A-FF-F-<A<-FAF7AAF--F--FF-A<-JJFJFFFF-<<-7--<FFF--77<7-77-<---7-7<-7-AA--<<-7<----<-A-FJFF-7A-F--7FF-<7A-A-<7F7---7-7-77-<7<<A--<J7--<-)-7)-7---7F<- +@K00392:16:HJ2G2BBXX:7:1102:10825:46240 1:N:0:ATCACG +GGACATTGGCTGCAGTACTCTGACCCTGGCCACCTCAACCTGTCTCTCTCGCACCGGAAACCTCCGATCCGCAGAAACATGAGCAACCCTACAGTTGACACACACAACAGTTTTCGACCGAAACTACACATTCCTCTGTCCCATTTCCTC ++ +--A-FF-F-<A<-FAF7AAF--F--FF-A<-JJFJFFFF-<<-7--<FFF--77<7-77-<---7-7<-7-AA--<<-7<----<-A-FJFF-7A-F--7FF-<7A-A-<7F7---7-7-77-<7<<A--<J7--<-)-7)-7---7F<- +@K00392:16:HJ2G2BBXX:8:1201:4929:9367 1:N:0:ATCACG +GGATTGAGGATGCAGCAACGTTCTAACATCTAGTGGAAAGCCTTCCCAGAAGAGTGGAGGCTGTTATAGCAGCAAAGGGGGGACCAACTCCATATTATTGCCCATGACTTTTGAATGAGATGTTTGACGAGCAGGGGTCGACATACTTTT ++ +AAAFFJJJJJJJFJFJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJFAJAJFFJFJJFJFJ7FJAJJJAFFJJJJJJAJAFFFFA<JJJAJJFAFJA7<AFAFFA7F7F<-77<F<--7-7-))---)AFF-<A-7FA +@K00392:16:HJ2G2BBXX:7:1104:19268:28727 1:N:0:ATCACG +ATGCCGCGGCCCTTGCAGAGCAAGGGGAACCACTACTTCAACTCAAGGTCTCAAAGCGAGTGACGTAACTGATTGAAACGCTATTAGCGCGCACCACCGCTAACTAGCTATCCATTTCACATCCGTTACATATGTATGTATGTACACACA ++ +AAFFFJJJJJJFJJJJJJJJJJJJJJJJJJJJJJJJJJJJFJJJJJJJJJJJJJJJJJJJFJJJFJJJJJJJJJJJJJFFJJJJJJJJJJJJJJJJJJFJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJFJJJJJFAFJJJJJJJJ +@K00392:16:HJ2G2BBXX:8:2125:3011:15574 1:N:0:ATCACG +TCAGAAGAAAAACCCACAAAATAGAACCGGAGTCCTATTCCATTATTCCTAGCTGCGGTATTCAGGCGACCGGGCCTGCTTTGAACACTCTAATTTTTTCAAAGTAAACGCTTCGGACCCCGCGGGCCACTCAGTTAAGAGCATCGAGGG ++ +AAFFFJJJJJJJJFJFJFJFJJJJFJJJJJ<J-7FJFAJJJJJJJJAJJFJAJJJJJJJJFJJFAAFAJJJJJFJJJJJFAJJJJJFFJJ<J-FFAJJJF-J<7-7<<A<---AF<7JJJJ-A)77-7--)F-7----<--7A7A<--7-
--- a/test-data/kmerfilter/kfreq.tsv Mon Sep 30 14:20:19 2019 -0400 +++ b/test-data/kmerfilter/kfreq.tsv Wed Jul 15 17:29:50 2020 -0400 @@ -0,0 +1,3 @@ +# KmerFrequency Count +1 408 +2 136
--- a/test-data/kmerfilter/kfreqdist.tsv Mon Sep 30 14:20:19 2019 -0400 +++ b/test-data/kmerfilter/kfreqdist.tsv Wed Jul 15 17:29:50 2020 -0400 @@ -1,3 +1,545 @@ -# KmerFrequency Count -1 408 -2 136 +# Kmer Count +TAAGAGCATCGAGGG 1 +CTCAGTTAAGAGCAT 1 +ACTCAGTTAAGAGCA 1 +CCACTCAGTTAAGAG 1 +GCCACTCAGTTAAGA 1 +GGCCACTCAGTTAAG 1 +GGGCCACTCAGTTAA 1 +CCGCGGGCCACTCAG 1 +CCCGCGGGCCACTCA 1 +CCCCGCGGGCCACTC 1 +TTCGGACCCCGCGGG 1 +GCTTCGGACCCCGCG 1 +CGCTTCGGACCCCGC 1 +AACGCTTCGGACCCC 1 +GTAAACGCTTCGGAC 1 +AGTAAACGCTTCGGA 1 +AAGTAAACGCTTCGG 1 +AAAGTAAACGCTTCG 1 +CAAAGTAAACGCTTC 1 +TCAAAGTAAACGCTT 1 +TTTTCAAAGTAAACG 1 +TTTTTTCAAAGTAAA 1 +ATTTTTTCAAAGTAA 1 +CTAATTTTTTCAAAG 1 +CACTCTAATTTTTTC 1 +AACACTCTAATTTTT 1 +GAACACTCTAATTTT 1 +CTTTGAACACTCTAA 1 +GCTTTGAACACTCTA 1 +TGCTTTGAACACTCT 1 +CTGCTTTGAACACTC 1 +TAAACGCTTCGGACC 1 +CCTGCTTTGAACACT 1 +GCCTGCTTTGAACAC 1 +GGGCCTGCTTTGAAC 1 +ACCGGGCCTGCTTTG 1 +GACCGGGCCTGCTTT 1 +ATTCAGGCGACCGGG 1 +TATTCAGGCGACCGG 1 +GTATTCAGGCGACCG 1 +CGGTATTCAGGCGAC 1 +ACACTCTAATTTTTT 1 +GCGGTATTCAGGCGA 1 +TGCGGTATTCAGGCG 1 +CTGCGGTATTCAGGC 1 +TAGCTGCGGTATTCA 1 +CTAGCTGCGGTATTC 1 +CCTAGCTGCGGTATT 1 +TCCTAGCTGCGGTAT 1 +TTCCTAGCTGCGGTA 1 +CATTATTCCTAGCTG 1 +CCATTATTCCTAGCT 1 +ATTCCATTATTCCTA 1 +TCCTATTCCATTATT 1 +GTCCTATTCCATTAT 1 +GAGTCCTATTCCATT 1 +GGAGTCCTATTCCAT 1 +AACCGGAGTCCTATT 1 +GGACCCCGCGGGCCA 1 +GAACCGGAGTCCTAT 1 +AATAGAACCGGAGTC 1 +AAATAGAACCGGAGT 1 +AAAATAGAACCGGAG 1 +ACAAAATAGAACCGG 1 +CCACAAAATAGAACC 1 +ACCCACAAAATAGAA 1 +AACCCACAAAATAGA 1 +AAACCCACAAAATAG 1 +AAAACCCACAAAATA 1 +AAAAACCCACAAAAT 1 +GAAAAACCCACAAAA 1 +TTAAGAGCATCGAGG 1 +AGAAAAACCCACAAA 1 +AAGAAAAACCCACAA 1 +TGTTATAGCAGCAAA 1 +GTACTCTGACCCTGG 2 +GGTCGACATACTTTT 1 +AGCAAAGGGGGGACC 1 +TAGAACCGGAGTCCT 1 +AGGCTGTTATAGCAG 1 +AGAACCGGAGTCCTA 1 +GAGGCTGTTATAGCA 1 +GGAGGCTGTTATAGC 1 +AGAAGAGTGGAGGCT 1 +CCAGAAGAGTGGAGG 1 +TTCCTCTGTCCCATT 2 +CCTTCCCAGAAGAGT 1 +GCAGCAAAGGGGGGA 1 +TTTTCGACCGAAACT 2 +TCTAACATCTAGTGG 1 +AGTGGAGGCTGTTAT 1 +AGTTGACACACACAA 2 +TCTAGTGGAAAGCCT 1 +CTGATTGAAACGCTA 1 +CAGTTAAGAGCATCG 1 +ACGCTATTAGCGCGC 1 +TTGACACACACAACA 2 +GAACCACTACTTCAA 1 +CACTCAGTTAAGAGC 1 +CTAACATCTAGTGGA 1 +GCTGCGGTATTCAGG 1 +GAGATGTTTGACGAG 1 +CGGAGTCCTATTCCA 1 +ACCGAAACTACACAT 2 +GAAGAGTGGAGGCTG 1 +AAGCCTTCCCAGAAG 1 +AACCTCCGATCCGCA 2 +TGAGCAACCCTACAG 2 +ACGTTCTAACATCTA 1 +AACGTTCTAACATCT 1 +TATTCCTAGCTGCGG 1 +ATTATTCCTAGCTGC 1 +ATGTTTGACGAGCAG 1 +TCTCTCGCACCGGAA 2 +ATGAGATGTTTGACG 1 +GGCCTGCTTTGAACA 1 +AGCCTTCCCAGAAGA 1 +TTCCATTATTCCTAG 1 +CAACGTTCTAACATC 1 +ACTGATTGAAACGCT 1 +CTAGTGGAAAGCCTT 1 +TGAGATGTTTGACGA 1 +TCTGTCCCATTTCCT 2 +CCTCTGTCCCATTTC 2 +CATTCCTCTGTCCCA 2 +TTGAACACTCTAATT 1 +GAATGAGATGTTTGA 1 +ACACATTCCTCTGTC 2 +ATTCCTAGCTGCGGT 1 +CTCCATATTATTGCC 1 +GACCGAAACTACACA 2 +CGACCGAAACTACAC 2 +ACTACACATTCCTCT 2 +CTGCAGTACTCTGAC 2 +TTCTAACATCTAGTG 1 +CCGAAACTACACATT 2 +ATCCGTTACATATGT 1 +TACATATGTATGTAT 1 +CAGTACTCTGACCCT 2 +GTTCTAACATCTAGT 1 +ACAGTTTTCGACCGA 2 +TTGACGAGCAGGGGT 1 +CTCTGTCCCATTTCC 2 +CACAAAATAGAACCG 1 +TACTCTGACCCTGGC 2 +ATAGCAGCAAAGGGG 1 +TGAATGAGATGTTTG 1 +AACAGTTTTCGACCG 2 +TCGGACCCCGCGGGC 1 +ACCCTACAGTTGACA 2 +TCTCTCTCGCACCGG 2 +GTATGTATGTACACA 1 +ACACAACAGTTTTCG 2 +ACACACAACAGTTTT 2 +TCCCAGAAGAGTGGA 1 +ACCACCGCTAACTAG 1 +GAGTGGAGGCTGTTA 1 +ACACACACAACAGTT 2 +AACGCTATTAGCGCG 1 +GTAACTGATTGAAAC 1 +GTCTCTCTCGCACCG 2 +AACATCTAGTGGAAA 1 +CTTCCCAGAAGAGTG 1 +GTGGAAAGCCTTCCC 1 +CTACTTCAACTCAAG 1 +CGCGGGCCACTCAGT 1 +AACCTGTCTCTCTCG 2 +CTCGCACCGGAAACC 2 +CGCACCGGAAACCTC 2 +TATGTATGTACACAC 1 +ATCCATTTCACATCC 1 +ACTCTAATTTTTTCA 1 +GCTGCAGTACTCTGA 2 +ACATGAGCAACCCTA 2 +GACCCCGCGGGCCAC 1 +AAGAGTGGAGGCTGT 1 +CCGGAAACCTCCGAT 2 +GCAGCAACGTTCTAA 1 +GAAACTACACATTCC 2 +CTGACCCTGGCCACC 2 +ACCTCAACCTGTCTC 2 +AAACTACACATTCCT 2 +AGAGTGGAGGCTGTT 1 +GCGACCGGGCCTGCT 1 +CATCTAGTGGAAAGC 1 +CAGAAACATGAGCAA 2 +ACTTCAACTCAAGGT 1 +CGGGCCACTCAGTTA 1 +AGAAACATGAGCAAC 2 +CTATTCCATTATTCC 1 +TTTTGAATGAGATGT 1 +GGCGACCGGGCCTGC 1 +ACATCTAGTGGAAAG 1 +TAGCAGCAAAGGGGG 1 +GGACCAACTCCATAT 1 +GACATTGGCTGCAGT 2 +CCTATTCCATTATTC 1 +CAGTTTTCGACCGAA 2 +CAACCCTACAGTTGA 2 +TTTGACGAGCAGGGG 1 +GACGTAACTGATTGA 1 +CAGGCGACCGGGCCT 1 +ACTCTGACCCTGGCC 2 +CCGGGCCTGCTTTGA 1 +AGTGGAAAGCCTTCC 1 +TTGAAACGCTATTAG 1 +CGTTCTAACATCTAG 1 +TTGAGGATGCAGCAA 1 +CTCAAAGCGAGTGAC 1 +GGACATTGGCTGCAG 2 +ACCTGTCTCTCTCGC 2 +TTTGAACACTCTAAT 1 +TGACACACACAACAG 2 +CACATTCCTCTGTCC 2 +GTTTTCGACCGAAAC 2 +GCAGAAACATGAGCA 2 +CTGGCCACCTCAACC 2 +GATCCGCAGAAACAT 2 +TCCGATCCGCAGAAA 2 +AACCACTACTTCAAC 1 +GCTGTTATAGCAGCA 1 +AACTAGCTATCCATT 1 +CATGAGCAACCCTAC 2 +TCGACCGAAACTACA 2 +CCTACAGTTGACACA 2 +AAGGGGAACCACTAC 1 +AAAGCCTTCCCAGAA 1 +CACAACAGTTTTCGA 2 +GGAACCACTACTTCA 1 +CCTCAACCTGTCTCT 2 +AAACCTCCGATCCGC 2 +CTTCGGACCCCGCGG 1 +GGGGAACCACTACTT 1 +GGCTGCAGTACTCTG 2 +TTGGCTGCAGTACTC 2 +GAGCAAGGGGAACCA 1 +AGCAACGTTCTAACA 1 +TGCAGTACTCTGACC 2 +TGGCCACCTCAACCT 2 +GACTTTTGAATGAGA 1 +CAGAAGAAAAACCCA 1 +CACCTCAACCTGTCT 2 +CCGATCCGCAGAAAC 2 +GTTATAGCAGCAAAG 1 +TGGCTGCAGTACTCT 2 +CTGTCCCATTTCCTC 2 +AGATGTTTGACGAGC 1 +TACTTCAACTCAAGG 1 +AGCAGGGGTCGACAT 1 +CTTTTGAATGAGATG 1 +TCAACTCAAGGTCTC 1 +ACCGCTAACTAGCTA 1 +GCCACCTCAACCTGT 2 +TGACTTTTGAATGAG 1 +ACCACTACTTCAACT 1 +CCACCTCAACCTGTC 2 +CCATATTATTGCCCA 1 +CATTGGCTGCAGTAC 2 +CTCTAATTTTTTCAA 1 +ATCTAGTGGAAAGCC 1 +AGTACTCTGACCCTG 2 +TTCGACCGAAACTAC 2 +CGGAAACCTCCGATC 2 +AGCTATCCATTTCAC 1 +TGACCCTGGCCACCT 2 +CCGCGGCCCTTGCAG 1 +CTTCAACTCAAGGTC 1 +TTAGCGCGCACCACC 1 +GTGGAGGCTGTTATA 1 +CACCGGAAACCTCCG 2 +GATTGAGGATGCAGC 1 +AAACGCTTCGGACCC 1 +ACAGTTGACACACAC 2 +CGACCGGGCCTGCTT 1 +AGTCCTATTCCATTA 1 +AAGGTCTCAAAGCGA 1 +ACCCTGGCCACCTCA 2 +ACAACAGTTTTCGAC 2 +CTCTCTCGCACCGGA 2 +TGTCTCTCTCGCACC 2 +CAAAGCGAGTGACGT 1 +TTTTTCAAAGTAAAC 1 +TCTCGCACCGGAAAC 2 +GGCCACCTCAACCTG 2 +ATGCCGCGGCCCTTG 1 +TGTTTGACGAGCAGG 1 +CCCACAAAATAGAAC 1 +GAGGATGCAGCAACG 1 +AACCCTACAGTTGAC 2 +CCTGTCTCTCTCGCA 2 +TGCAGCAACGTTCTA 1 +AGCAAGGGGAACCAC 1 +TCTGACCCTGGCCAC 2 +GGCCCTTGCAGAGCA 1 +TAATTTTTTCAAAGT 1 +CTGTCTCTCTCGCAC 2 +ATAGAACCGGAGTCC 1 +GACCCTGGCCACCTC 2 +TGATTGAAACGCTAT 1 +GCGGGCCACTCAGTT 1 +TCAGAAGAAAAACCC 1 +TGGAGGCTGTTATAG 1 +AGCGCGCACCACCGC 1 +TCGCACCGGAAACCT 2 +TCAGTTAAGAGCATC 1 +ATTCCTCTGTCCCAT 2 +GACGAGCAGGGGTCG 1 +CCACTACTTCAACTC 1 +TCAACCTGTCTCTCT 2 +ACCGGAAACCTCCGA 2 +CCCTGGCCACCTCAA 2 +GAAACATGAGCAACC 2 +GCTAACTAGCTATCC 1 +AGTTTTCGACCGAAA 2 +CACACACAACAGTTT 2 +ATTTCACATCCGTTA 1 +TATTCCATTATTCCT 1 +CGATCCGCAGAAACA 2 +GGCTGTTATAGCAGC 1 +GGAAACCTCCGATCC 2 +GCAACGTTCTAACAT 1 +AGCAACCCTACAGTT 2 +TTATTCCTAGCTGCG 1 +CTCTGACCCTGGCCA 2 +CAGCAAAGGGGGGAC 1 +CAGAAGAGTGGAGGC 1 +TACAGTTGACACACA 2 +TAGCGCGCACCACCG 1 +AAACATGAGCAACCC 2 +AAACGCTATTAGCGC 1 +GAAACCTCCGATCCG 2 +GCCCATGACTTTTGA 1 +TCCATATTATTGCCC 1 +ACATTGGCTGCAGTA 2 +CAGTTGACACACACA 2 +TCAAGGTCTCAAAGC 1 +ATCCGCAGAAACATG 2 +CACACAACAGTTTTC 2 +GGATGCAGCAACGTT 1 +TTCAAAGTAAACGCT 1 +AACATGAGCAACCCT 2 +GAGTGACGTAACTGA 1 +CTGTTATAGCAGCAA 1 +TAACATCTAGTGGAA 1 +CTCTCGCACCGGAAA 2 +CGCAGAAACATGAGC 2 +GGATTGAGGATGCAG 1 +TCCTCTGTCCCATTT 2 +GGTATTCAGGCGACC 1 +ATGAGCAACCCTACA 2 +TGGAAAGCCTTCCCA 1 +AGGCGACCGGGCCTG 1 +ACCTCCGATCCGCAG 2 +TCCATTATTCCTAGC 1 +ACTACTTCAACTCAA 1 +GAGCAACCCTACAGT 2 +GCAACCCTACAGTTG 2 +AGCTGCGGTATTCAG 1 +ACTAGCTATCCATTT 1 +AGAAGAAAAACCCAC 1 +CCCTACAGTTGACAC 2 +CTACAGTTGACACAC 2 +CAAGGGGAACCACTA 1 +AGGATGCAGCAACGT 1 +TTATAGCAGCAAAGG 1 +GTTAAGAGCATCGAG 1 +TATAGCAGCAAAGGG 1 +GCAAAGGGGGGACCA 1 +CAAAGGGGGGACCAA 1 +ACCCCGCGGGCCACT 1 +GACCAACTCCATATT 1 +ACATATGTATGTATG 1 +AAGGGGGGACCAACT 1 +ACATTCCTCTGTCCC 2 +GCACCACCGCTAACT 1 +CCGCAGAAACATGAG 2 +AGGGGGGACCAACTC 1 +GGGGGGACCAACTCC 1 +GGGGGACCAACTCCA 1 +GCACCGGAAACCTCC 2 +GGGGACCAACTCCAT 1 +GCAGTACTCTGACCC 2 +ACCAACTCCATATTA 1 +CCAACTCCATATTAT 1 +ACATCCGTTACATAT 1 +TATTGCCCATGACTT 1 +CAACTCCATATTATT 1 +CTCAAGGTCTCAAAG 1 +CTATTAGCGCGCACC 1 +AACTCCATATTATTG 1 +ATTGAGGATGCAGCA 1 +CATATTATTGCCCAT 1 +GGGAACCACTACTTC 1 +ATATTATTGCCCATG 1 +TATTATTGCCCATGA 1 +TCTAATTTTTTCAAA 1 +TTTCGACCGAAACTA 2 +ATTATTGCCCATGAC 1 +ACTCCATATTATTGC 1 +CCGCTAACTAGCTAT 1 +TTATTGCCCATGACT 1 +CAACAGTTTTCGACC 2 +ATTGCCCATGACTTT 1 +TTGCCCATGACTTTT 1 +TGAGGATGCAGCAAC 1 +TGCCCATGACTTTTG 1 +CGCACCACCGCTAAC 1 +CCCATGACTTTTGAA 1 +TTCACATCCGTTACA 1 +CCATGACTTTTGAAT 1 +ATGACTTTTGAATGA 1 +ATGCAGCAACGTTCT 1 +GCGCACCACCGCTAA 1 +ACTTTTGAATGAGAT 1 +CATGACTTTTGAATG 1 +TTTGAATGAGATGTT 1 +AATTTTTTCAAAGTA 1 +ATGTATGTACACACA 1 +TCCGCAGAAACATGA 2 +TTGAATGAGATGTTT 1 +GATGTTTGACGAGCA 1 +TCACATCCGTTACAT 1 +ACGAGCAGGGGTCGA 1 +GAAGAAAAACCCACA 1 +CCCAGAAGAGTGGAG 1 +CGAGCAGGGGTCGAC 1 +GAGCAGGGGTCGACA 1 +GCAGAGCAAGGGGAA 1 +GTTGACACACACAAC 2 +GCAGGGGTCGACATA 1 +CGGACCCCGCGGGCC 1 +GATTGAAACGCTATT 1 +CAGGGGTCGACATAC 1 +ATTGAAACGCTATTA 1 +CAGCAACGTTCTAAC 1 +CAGAGCAAGGGGAAC 1 +AATGAGATGTTTGAC 1 +AGGGGTCGACATACT 1 +CTCCGATCCGCAGAA 2 +GGGGTCGACATACTT 1 +CAACCTGTCTCTCTC 2 +GGGTCGACATACTTT 1 +CAAAATAGAACCGGA 1 +TACACATTCCTCTGT 2 +AACTCAAGGTCTCAA 1 +TGCCGCGGCCCTTGC 1 +GCCGCGGCCCTTGCA 1 +CGCGGCCCTTGCAGA 1 +GCGGCCCTTGCAGAG 1 +CGGCCCTTGCAGAGC 1 +TTTCAAAGTAAACGC 1 +GCCTTCCCAGAAGAG 1 +TTTCACATCCGTTAC 1 +CCCTTGCAGAGCAAG 1 +CGGGCCTGCTTTGAA 1 +CTTGCAGAGCAAGGG 1 +TTGCAGAGCAAGGGG 1 +AAAGGGGGGACCAAC 1 +CCTTGCAGAGCAAGG 1 +TGCAGAGCAAGGGGA 1 +AGAGCAAGGGGAACC 1 +CCGGAGTCCTATTCC 1 +GCAAGGGGAACCACT 1 +TCCGTTACATATGTA 1 +AGGGGAACCACTACT 1 +CCTCCGATCCGCAGA 2 +CCTGGCCACCTCAAC 2 +CACTACTTCAACTCA 1 +TTCCCAGAAGAGTGG 1 +TTCAACTCAAGGTCT 1 +CAACTCAAGGTCTCA 1 +ACTCAAGGTCTCAAA 1 +GGTCTCAAAGCGAGT 1 +ACGCTTCGGACCCCG 1 +ATTGGCTGCAGTACT 2 +CAAGGTCTCAAAGCG 1 +GCTATCCATTTCACA 1 +GGGACCAACTCCATA 1 +TATCCATTTCACATC 1 +AACTACACATTCCTC 2 +AGGTCTCAAAGCGAG 1 +GATGCAGCAACGTTC 1 +GTCTCAAAGCGAGTG 1 +TGAACACTCTAATTT 1 +TGTATGTATGTACAC 1 +TCTCAAAGCGAGTGA 1 +TCAAAGCGAGTGACG 1 +AAGCGAGTGACGTAA 1 +CACCGCTAACTAGCT 1 +AGCGAGTGACGTAAC 1 +TTCAGGCGACCGGGC 1 +CACCACCGCTAACTA 1 +GCGAGTGACGTAACT 1 +AGTGACGTAACTGAT 1 +GTGACGTAACTGATT 1 +TATGTATGTATGTAC 1 +TGACGTAACTGATTG 1 +ACGTAACTGATTGAA 1 +CTCAACCTGTCTCTC 2 +CGTAACTGATTGAAA 1 +TAGTGGAAAGCCTTC 1 +TAACTGATTGAAACG 1 +GGAAAGCCTTCCCAG 1 +AACTGATTGAAACGC 1 +AGCAGCAAAGGGGGG 1 +TGAAACGCTATTAGC 1 +GAAACGCTATTAGCG 1 +CGCTATTAGCGCGCA 1 +GCTATTAGCGCGCAC 1 +AAAGCGAGTGACGTA 1 +TATTAGCGCGCACCA 1 +TCAGGCGACCGGGCC 1 +ATTAGCGCGCACCAC 1 +GCGCGCACCACCGCT 1 +ACCGGAGTCCTATTC 1 +CGCGCACCACCGCTA 1 +CCACCGCTAACTAGC 1 +GAAAGCCTTCCCAGA 1 +CGCTAACTAGCTATC 1 +CTAACTAGCTATCCA 1 +TAACTAGCTATCCAT 1 +GACACACACAACAGT 2 +CTAGCTATCCATTTC 1 +GCCCTTGCAGAGCAA 1 +TAGCTATCCATTTCA 1 +TGACGAGCAGGGGTC 1 +CTATCCATTTCACAT 1 +TCCATTTCACATCCG 1 +GTTTGACGAGCAGGG 1 +CATTTCACATCCGTT 1 +AGTTAAGAGCATCGA 1 +CACATCCGTTACATA 1 +ATGTATGTATGTACA 1 +CTACACATTCCTCTG 2 +CATCCGTTACATATG 1 +CCGTTACATATGTAT 1 +CGAGTGACGTAACTG 1 +CGTTACATATGTATG 1 +CCATTTCACATCCGT 1 +GTTACATATGTATGT 1 +TTACATATGTATGTA 1 +CGAAACTACACATTC 2 +CATATGTATGTATGT 1 +ATATGTATGTATGTA 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/populations/populations.CP.joinmap.loc Wed Jul 15 17:29:50 2020 -0400 @@ -0,0 +1,13 @@ +# Stacks v2.52; JoinMap; March 17, 2020 +# Parent: PopA_01 +name = populations.20200317 +popt = CP +nloc = 3 +nind = 1 + +1 <lmxll> lm +2 <lmxll> -- +3 <lmxll> lm + +individual names: +PopA_02
--- a/test-data/populations/populations.log Mon Sep 30 14:20:19 2019 -0400 +++ b/test-data/populations/populations.log Wed Jul 15 17:29:50 2020 -0400 @@ -1,4 +1,4 @@ -populations v2.4, executed 2019-06-18 10:34:45 (zlib-1.2.11) +populations v2.52, executed 2020-03-16 15:39:40 (zlib-1.2.11) populations -P stacks_outputs -M denovo_map/popmap_cstacks.tsv Locus/sample distributions will be written to 'stacks_outputs/populations.log.distribs'. populations parameters selected: @@ -20,8 +20,7 @@ Working on 1 group(s) of populations: defaultgrp: 1 -Genotyping markers will be written to 'stacks_outputs/populations.markers.tsv' -Raw Genotypes/Haplotypes will be written to 'stacks_outputs/populations.haplotypes.tsv' +Raw haplotypes will be written to 'stacks_outputs/populations.haplotypes.tsv' Population-level summary statistics will be written to 'stacks_outputs/populations.sumstats.tsv' Population-level haplotype summary statistics will be written to 'stacks_outputs/populations.hapstats.tsv'
--- a/test-data/populations/populations.phistats_summary.tsv Mon Sep 30 14:20:19 2019 -0400 +++ b/test-data/populations/populations.phistats_summary.tsv Wed Jul 15 17:29:50 2020 -0400 @@ -3,3 +3,6 @@ # Fst' Means 1 + +# Dxy Means + 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/procrad/barcodes-duplicate Wed Jul 15 17:29:50 2020 -0400 @@ -0,0 +1,41 @@ +ATGGGG PopA_01 +GGGTAA PopA_02 +AGGAAA PopA_03 +TTTAAG PopA_04 +GGTGTG PopA_05 +TGATGT PopA_06 +GGTTGT PopA_07 +ATAAGT PopA_08 +AAGATA PopA_09 +TGTGAG PopA_10 +ATAGTT PopA_11 +GGAAGG PopA_12 +TTTGTG PopA_13 +TTAAAT PopA_14 +AATAAG PopA_15 +AAGAGG PopA_16 +TAGTGT PopA_17 +TGGAAG PopA_18 +GGGTTG PopA_19 +CATCAT PopA_20 +GGAGAG PopB_20 +GTTTTA PopB_01 +TGATAA PopB_02 +GTTGAT PopB_03 +AGATTA PopB_04 +GTATAG PopB_05 +TTGGGA PopB_06 +ATATAT PopB_07 +GATGAG PopB_08 +GGGAAT PopB_09 +AGTAAT PopB_10 +GGGATA PopB_11 +GAGAAG PopB_12 +AGTAGA PopB_13 +AAGGAT PopB_14 +AGGGTA PopB_15 +TGTTTT PopB_16 +ATGATG PopB_17 +GAGTTA PopB_18 +ATGTAG PopB_19 +AAAAAA PopA_01
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/procrad/barcodes-duplicate2 Wed Jul 15 17:29:50 2020 -0400 @@ -0,0 +1,41 @@ +ATGGGG +GGGTAA +AGGAAA +TTTAAG +GGTGTG +TGATGT +GGTTGT +ATAAGT +AAGATA +TGTGAG +ATAGTT +GGAAGG +TTTGTG +TTAAAT +AATAAG +AAGAGG +TAGTGT +TGGAAG +GGGTTG +CATCAT +GGAGAG +GTTTTA +TGATAA +GTTGAT +AGATTA +GTATAG +TTGGGA +ATATAT +GATGAG +GGGAAT +AGTAAT +GGGATA +GAGAAG +AGTAGA +AAGGAT +AGGGTA +TGTTTT +ATGATG +GAGTTA +ATGTAG +ATGGGG
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/procrad/barcodes-duplicate3 Wed Jul 15 17:29:50 2020 -0400 @@ -0,0 +1,41 @@ +ATGGGG AAAAAA +GGGTAA AAAAAA +AGGAAA AAAAAA +TTTAAG AAAAAA +GGTGTG AAAAAA +TGATGT AAAAAA +GGTTGT AAAAAA +ATAAGT AAAAAA +AAGATA AAAAAA +TGTGAG AAAAAA +ATAGTT AAAAAA +GGAAGG AAAAAA +TTTGTG AAAAAA +TTAAAT AAAAAA +AATAAG AAAAAA +AAGAGG AAAAAA +TAGTGT AAAAAA +TGGAAG AAAAAA +GGGTTG AAAAAA +CATCAT AAAAAA +GGAGAG AAAAAA +GTTTTA AAAAAA +TGATAA AAAAAA +GTTGAT AAAAAA +AGATTA AAAAAA +GTATAG AAAAAA +TTGGGA AAAAAA +ATATAT AAAAAA +GATGAG AAAAAA +GGGAAT AAAAAA +AGTAAT AAAAAA +GGGATA AAAAAA +GAGAAG AAAAAA +AGTAGA AAAAAA +AAGGAT AAAAAA +AGGGTA AAAAAA +TGTTTT AAAAAA +ATGATG AAAAAA +GAGTTA AAAAAA +ATGTAG AAAAAA +ATGGGG AAAAAA
--- a/test-data/refmap/catalog.calls.vcf Mon Sep 30 14:20:19 2019 -0400 +++ b/test-data/refmap/catalog.calls.vcf Wed Jul 15 17:29:50 2020 -0400 @@ -1,6 +1,6 @@ ##fileformat=VCFv4.2 -##fileDate=20190617 -##source="Stacks v2.4" +##fileDate=20200709 +##source="Stacks v2.53" ##INFO=<ID=AD,Number=R,Type=Integer,Description="Total Depth for Each Allele"> ##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency"> ##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
--- a/test-data/shortreads/process_shortreads.out Mon Sep 30 14:20:19 2019 -0400 +++ b/test-data/shortreads/process_shortreads.out Wed Jul 15 17:29:50 2020 -0400 @@ -1,5 +1,5 @@ -process_shortreads v2.2, executed 2018-12-03 21:27:19 -process_shortreads -p stacks_inputs/ -i fastq -b /tmp/tmpiZT6X4/files/000/dataset_2.dat --inline_null -o stacks_outputs +process_shortreads v2.53, executed 2020-07-10 13:51:53 (zlib-1.2.11) +process_shortreads -p stacks_inputs/ -i fastq -b /tmp/tmpidt4mij_/files/7/7/8/dataset_778705df-5f0c-4210-9ad6-4c43e8aacd52.dat --inline_null -o stacks_outputs File Retained Reads Low Quality Ambiguous Barcodes Trimmed Reads Orphaned paired-end reads Total R1.fastq 7000 0 0 0 0 7000
--- a/test-data/sstacks/PopA_01.matches.tsv Mon Sep 30 14:20:19 2019 -0400 +++ b/test-data/sstacks/PopA_01.matches.tsv Wed Jul 15 17:29:50 2020 -0400 @@ -1,5 +1,6 @@ -# sstacks version 2.4; generated on 2019-06-18 10:34:45 +# sstacks version 2.52; generated on 2020-03-16 15:39:40 1 1 1 AC 9 94M 1 1 1 CA 9 94M 2 1 2 consensus 28 94M 3 1 3 consensus 20 94M +# sstacks completed on 2020-03-16 15:39:40
--- a/test-data/sstacks/PopA_02.matches.tsv Mon Sep 30 14:20:19 2019 -0400 +++ b/test-data/sstacks/PopA_02.matches.tsv Wed Jul 15 17:29:50 2020 -0400 @@ -1,5 +1,6 @@ -# sstacks version 2.4; generated on 2019-06-18 10:34:45 +# sstacks version 2.52; generated on 2020-03-16 15:39:40 1 2 1 AC 6 94M 1 2 1 CA 6 94M 2 2 2 consensus 28 94M 3 2 3 consensus 20 94M +# sstacks completed on 2020-03-16 15:39:40
--- a/test-data/stacks_outputs/tsv2bam.log Mon Sep 30 14:20:19 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,17 +0,0 @@ -tsv2bam v2.4, executed 2019-06-18 10:34:45 (zlib-1.2.11) -tsv2bam -P stacks_outputs -M denovo_map/popmap_cstacks.tsv -R demultiplexed/ -Configuration for this run: - Stacks directory: 'stacks_outputs/' - Population map: 'denovo_map/popmap_cstacks.tsv' - Num. samples: 2 - Paired-end reads directory: 'demultiplexed/' - -Paired-end reads files found, e.g. 'demultiplexed/PopA_01.2.fq'. -Loading the catalog... -Processing sample 'PopA_01'... -Processing sample 'PopA_02'... - -Sample 'PopA_01': matched 3 sample loci to 3 catalog loci; found a paired-end read for 66 (100.0%) of the assembled forward reads; wrote 132 records. -Sample 'PopA_02': matched 3 sample loci to 3 catalog loci; found a paired-end read for 60 (100.0%) of the assembled forward reads; wrote 120 records. - -tsv2bam is done.
--- a/test-data/tsv2bam/tsv2bam.log Mon Sep 30 14:20:19 2019 -0400 +++ b/test-data/tsv2bam/tsv2bam.log Wed Jul 15 17:29:50 2020 -0400 @@ -1,4 +1,4 @@ -tsv2bam v2.4, executed 2019-06-17 21:22:16 (zlib-1.2.11) +tsv2bam v2.52, executed 2020-03-16 15:39:40 (zlib-1.2.11) tsv2bam -P stacks_outputs -M denovo_map/popmap_cstacks.tsv -R demultiplexed/ Configuration for this run: Stacks directory: 'stacks_outputs/'
--- a/test-data/ustacks/PopA_01.alleles.tsv Mon Sep 30 14:20:19 2019 -0400 +++ b/test-data/ustacks/PopA_01.alleles.tsv Wed Jul 15 17:29:50 2020 -0400 @@ -1,3 +1,4 @@ -# ustacks version 2.4; generated on 2019-06-18 10:34:45 +# ustacks version 2.52; generated on 2020-03-16 15:39:40 1 1 AC 50.00 9 1 1 CA 50.00 9 +# ustacks completed on 2020-03-16 15:39:40
--- a/test-data/ustacks/PopA_01.snps.tsv Mon Sep 30 14:20:19 2019 -0400 +++ b/test-data/ustacks/PopA_01.snps.tsv Wed Jul 15 17:29:50 2020 -0400 @@ -1,4 +1,4 @@ -# ustacks version 2.4; generated on 2019-06-18 10:34:45 +# ustacks version 2.52; generated on 2020-03-16 15:39:40 1 1 0 O 24.95 A - 1 1 1 O 24.95 A - 1 1 2 O 24.95 T - @@ -281,3 +281,4 @@ 1 3 91 O 27.73 A - 1 3 92 O 27.73 G - 1 3 93 O 27.73 A - +# ustacks completed on 2020-03-16 15:39:40
--- a/test-data/ustacks/PopA_01.tags.tsv Mon Sep 30 14:20:19 2019 -0400 +++ b/test-data/ustacks/PopA_01.tags.tsv Wed Jul 15 17:29:50 2020 -0400 @@ -1,4 +1,4 @@ -# ustacks version 2.4; generated on 2019-06-18 10:34:45 +# ustacks version 2.52; generated on 2020-03-16 15:39:40 1 1 consensus AATTCGTTTGCTGCTTCAGGAATCTCTCGTATAATCTGAGTATGTGCGTACGTACGCTATTTAGATGGATAACCGACGCTGCCAGACGAGAGAC 0 0 0 1 1 model OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOEOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOEOOOOO 1 1 primary 0 lane1_fakedata7_0 1:N:0:/1 AATTCGTTTGCTGCTTCAGGAATCTCTCGTATACTCTGAGTATGTGCGTACGTACGCTATTTAGATGGATAACCGACGCTGCCAGACGAGAGAC @@ -71,3 +71,4 @@ 1 3 primary 0 lane1_fakedata2_19 1:N:0:/1 AATTCTCTACACCACAGCATCAATTCTAAAAATGACTACCAGAGAGACAACTCCGCAGTTAAACACTCTGACTGCCACGCCAGCTACCTCTAGA 1 3 secondary lane1_fakedata2_5 1:N:0:/1 AATTCTCTACACCACAGCATCAATTCTAAAAATGACTACCAGAGAGACAACTCCGCAGTAAAACACTCTGACTGCCACGCCAGCTACCTCTAGA 1 3 secondary lane1_fakedata2_17 1:N:0:/1 AATTCTCTACACCACAGCATCAATTCTAAAAATGACTACCAGAGAGACAACTCCGCAGTTAAACACTCTGACTGCCACGCCAGCTACCTCCAGA +# ustacks completed on 2020-03-16 15:39:40
--- a/test-data/ustacks/PopA_02.alleles.tsv Mon Sep 30 14:20:19 2019 -0400 +++ b/test-data/ustacks/PopA_02.alleles.tsv Wed Jul 15 17:29:50 2020 -0400 @@ -1,3 +1,4 @@ -# ustacks version 2.4; generated on 2019-06-18 10:34:45 +# ustacks version 2.52; generated on 2020-03-16 15:39:40 2 1 AC 50.00 6 2 1 CA 50.00 6 +# ustacks completed on 2020-03-16 15:39:40
--- a/test-data/ustacks/PopA_02.snps.tsv Mon Sep 30 14:20:19 2019 -0400 +++ b/test-data/ustacks/PopA_02.snps.tsv Wed Jul 15 17:29:50 2020 -0400 @@ -1,4 +1,4 @@ -# ustacks version 2.4; generated on 2019-06-18 10:34:45 +# ustacks version 2.52; generated on 2020-03-16 15:39:40 2 1 0 O 16.64 A - 2 1 1 O 16.64 A - 2 1 2 O 16.64 T - @@ -281,3 +281,4 @@ 2 3 91 O 27.73 A - 2 3 92 O 27.73 G - 2 3 93 O 27.73 A - +# ustacks completed on 2020-03-16 15:39:40
--- a/test-data/ustacks/PopA_02.tags.tsv Mon Sep 30 14:20:19 2019 -0400 +++ b/test-data/ustacks/PopA_02.tags.tsv Wed Jul 15 17:29:50 2020 -0400 @@ -1,4 +1,4 @@ -# ustacks version 2.4; generated on 2019-06-18 10:34:45 +# ustacks version 2.52; generated on 2020-03-16 15:39:40 2 1 consensus AATTCGTTTGCTGCTTCAGGAATCTCTCGTATAATCTGAGTATGTGCGTACGTACGCTATTTAGATGGATAACCGACGCTGCCAGACGAGAGAC 0 0 0 2 1 model OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOEOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOEOOOOO 2 1 primary 0 lane1_fakedata7_7 1:N:0:/1 AATTCGTTTGCTGCTTCAGGAATCTCTCGTATACTCTGAGTATGTGCGTACGTACGCTATTTAGATGGATAACCGACGCTGCCAGACGAGAGAC @@ -65,3 +65,4 @@ 2 3 primary 0 lane1_fakedata2_18 1:N:0:/1 AATTCTCTACACCACAGCATCAATTCTAAAAATGACTACCAGAGAGACAACTCCGCAGTTAAACACTCTGACTGCCACGCCAGCTACCTCTAGA 2 3 primary 0 lane1_fakedata2_19 1:N:0:/1 AATTCTCTACACCACAGCATCAATTCTAAAAATGACTACCAGAGAGACAACTCCGCAGTTAAACACTCTGACTGCCACGCCAGCTACCTCTAGA 2 3 secondary lane1_fakedata2_1 1:N:0:/1 AATTCTCTACACCACTGCATCAATTCTAAAAATGACTACCAGAGAGACAACTCCGCAGTTAAACACTCTGACTGCCACGCCAGCTACCTCTAGA +# ustacks completed on 2020-03-16 15:39:40