Mercurial > repos > iuc > instrain_profile
changeset 0:1f3730540302 draft
"planemo upload for repository https://github.com/MrOlm/inStrain commit e6eae71231e551c08aa96afc9f15b8ba87676101"
author | iuc |
---|---|
date | Wed, 11 Aug 2021 21:11:18 +0000 |
parents | |
children | 4b0418b1f58b |
files | instrain_profile.xml macros.xml test-data/N5_271_010G1.maxbin2.stb test-data/N5_271_010G1_scaffold_min1000.fa-vs-N5_271_010G1.IS.zip test-data/N5_271_010G1_scaffold_min1000.fa-vs-N5_271_010G2.IS.zip test-data/SmallScaffold.fa test-data/SmallScaffold.fa.sorted.bam |
diffstat | 7 files changed, 587 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/instrain_profile.xml Wed Aug 11 21:11:18 2021 +0000 @@ -0,0 +1,378 @@ +<tool id="instrain_profile" name="InStrain Profile" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>Creates an inStrain profile (microdiversity analysis) from a mapping file </description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="edam_ontology"/> + <expand macro="requirements"> + <requirement type="package" version="3.0">zip</requirement> + </expand> + <version_command>inStrain profile --version</version_command> + <command detect_errors="exit_code"><![CDATA[ +#set ext=$mapping_input.datatype.file_ext +ln -s '$mapping_input' 'inputbam.$ext' +#if $gene_profiling.gene_file +&& +ln -s '$gene_profiling.gene_file' 'gene_file.fna' +#end if +#if $stb +&& +ln -s '$stb' 'stb_file.stb' +#end if +&& +inStrain profile + 'inputbam.$ext' + '$sequence_input' + --output 'inStrain.IS' + $use_full_fasta_header + --processes "\${GALAXY_SLOTS:-6}" + --min_mapq $read_filtering.min_mapq + --max_insert_relative $read_filtering.max_insert_relative + --min_insert $read_filtering.min_insert + --pairing_filter '$read_filtering.pairing_filter' +#if $priority_reads + --priority_reads '$read_filtering.priority_reads' +#end if + $output.detailed_mapping_info + --min_cov $variant_calling.min_cov + --min_freq $variant_calling.min_freq + --fdr $variant_calling.fdr +#if $gene_file + --gene_file 'gene_file.fna' +#end if +#if $stb + --stb 'stb_file.stb' +#end if + $mm_level +#if $profile.database_mode + $profile.database_mode +#else + --min_read_ani $read_filtering.min_read_ani + --min_genome_coverage $profile.min_genome_coverage + $skip_mm_profiling +#end if + --min_scaffold_reads $profile.min_scaffold_reads + --min_snp $profile.min_snp + $profile.store_everything +#if $profile.scaffolds_to_profile + --scaffolds_to_profile '$profile.scaffolds_to_profile' +#end if + --rarefied_coverage $profile.rarefied_coverage + --window_length $profile.window_length + $output.skip_genome_wide + $output.skip_plot_generation +&& +cd ./inStrain.IS && zip -r ../inStrain.IS.zip * + ]]></command> + <inputs> + <param name="mapping_input" type="data" format="bam,sam" label="A file containing metagenomic reads mapped to a DNA sequence" help="Sorted Bam file"/> + <param name="sequence_input" type="data" format="fasta" label="A file containing a DNA sequence."/> + <param argument="--use_full_fasta_header" type="boolean" truevalue="--use_full_fasta_header" falsevalue="" checked="false" label="Use full fasta header" help="Instead of using the fasta ID (space in header before space), use the full header. Needed for some mapping tools (including bbMap)"/> + <section name="read_filtering" title="Read Filtering" expanded="true"> + <param argument="--min_read_ani" type="float" value="0.95" min="0" max="1" label="Minimum percent identity" help=" Minimum percent identity of read pairs to consensus to use the reads. Must be >, not >="/> + <param argument="--min_mapq" type="integer" value="-1" label="Minimum mapq score" help="Minimum mapq score of EITHER read in a pair to use that pair. Must be >, not >="/> + <param argument="--max_insert_relative" type="integer" value="3" label="Maximum insert relative" help="Multiplier to determine maximum insert size between two reads - default is to use 3x median insert size. Must be >, not >="/> + <param argument="--min_insert" type="integer" value="50" label="Minimum insert" help="Minimum insert size between two reads - default is 50 bp. If two reads are 50bp each and overlap completely, their insert will be 50. Must be >, not >="/> + <param argument="--pairing_filter" type="select" label="How should paired reads be handled?"> + <option value="paired_only" selected="true">Only paired reads are retained</option> + <option value="non_discordant">Keep all paired reads and singleton reads that map to a single scaffold</option> + <option value="all_reads">Keep all reads regardless of pairing status (NOT RECOMMENDED; See documentation for deatils)</option> + </param> + <param argument="--priority_reads" type="data" format="fastqsanger,fastqsanger.gz" optional="true" label="The location of a list of reads that should be retained regardless of pairing status" help="For example long reads or merged reads. This can be a .fastq file or text file with list of read names (will assume file is compressed if ends in .gz"/> + </section> + <section name="variant_calling" title="Variant Calling" expanded="true"> + <param argument="--min_cov" type="integer" value="5" label="Minimum coverage" help=" Minimum coverage to call a variant"/> + <param argument="--min_freq" type="float" value="0.05" label="Minimum SNP frequency" help="Minimum SNP frequency to confirm a SNV (both this AND the FDR snp count cutoff must be true to call a SNP)."/> + <param argument="--fdr" type="float" value="1e-06" min="0" max="1" label="FDR" help="SNP false discovery rate- based on simulation data with a 0.1 percent error rate (Q30)"/> + </section> + <section name="gene_profiling" title="Gene Profiling" expanded="true"> + <param argument="--gene_file" type="data" format="fasta,genbank" optional="true" label="Path to prodigal .fna genes file. If file ends in .gb or .gbk, will treat as a genbank file" help="EXPERIMENTAL; the name of the gene must be in the gene qualifier"/> + </section> + <param argument="--stb" type="data" format="tabular" optional="true" label="Scaffold to bin" help="This can be a file with each line listing a scaffold and a bin name, tab-seperated. This can also be a space-seperated list of .fasta files, with one genome per .fasta file. If nothing is provided, all scaffolds will be treated as belonging to the same genome"/> + <param argument="--mm_level" type="boolean" truevalue="--mm_level" falsevalue="" checked="false" label="Create output files on the mm level"/> + <param argument="--skip_mm_profiling" type="boolean" truevalue="--skip_mm_profiling" falsevalue="" checked="false" label ="Skip mm profiling" help="Dont perform analysis on an mm level; saves RAM and time; impacts plots and raw_data"/> + <section name="profile" title="Profile" expanded="true"> + <param argument="--database_mode" type="boolean" truevalue="--database_mode" falsevalue="" checked="false" label="Database mode" help="Set a number of parameters to values appropriate for mapping to a large fasta file."/> + <param argument="--min_scaffold_reads" type="integer" value="1" label="Minimum scaffold reads" help="Minimum number of reads mapping to a scaffold to proceed with profiling it"/> + <param argument="--min_genome_coverage" type="integer" value="0" label="Minimum genome coverage" help="Minimum number of reads mapping to a genome to proceed with profiling it. MUST profile .stb if this is set"/> + <param argument="--min_snp" type="integer" value="20" label="Minimum SNP" help="Absolute minimum number of reads connecting two SNPs to calculate LD between them."/> + <param argument="--store_everything" type="boolean" truevalue="--store_everything" falsevalue="" checked="false" label="Store everything" help="Store intermediate dictionaries in the pickle file; will result in significantly more RAM and disk usage"/> + <param argument="--scaffolds_to_profile" type="data" format="fasta" optional="true" label="Scaffolds to profile" help="File containing a list of scaffolds to profile- if provided will ONLY profile those scaffolds"/> + <param argument="--rarefied_coverage" type="integer" value="50" label="Rarefied coverage" help="When calculating nucleotide diversity, also calculate a rarefied version with this much coverage"/> + <param argument="--window_length" type="integer" value="10000" label ="Window length" help="Break scaffolds into windows of this length when profiling"/> + </section> + <section name="output" title="Set Output Parameters" expanded="true"> + <param argument="--detailed_mapping_info" type="boolean" truevalue="--detailed_mapping_info" falsevalue="" checked="false" label="Detailed mapping info" help="Make a detailed read report indicating deatils about each individual mapped read"/> + <param argument="--skip_genome_wide" type="boolean" truevalue="--skip_genome_wide" falsevalue="" checked="false" label="Skip genome wide" help="Do not generate tables that consider groups of scaffolds belonging to genomes"/> + <param argument="--skip_plot_generation" type="boolean" truevalue="--skip_plot_generation" falsevalue="" checked="false" label="Skip plot generation" help="Do not make plots"/> + </section> + </inputs> + <outputs> + <data format="zip" name="inStrain_zip" from_work_dir="inStrain.IS.zip" label="inStrain Profile IS zip" /> + <data name="scaffold_info" format="tabular" from_work_dir="inStrain.IS/output/inStrain.IS_scaffold_info.tsv" label="Scoffold Info, This gives basic information about the scaffolds in your sample at the highest allowed level of read identity." /> + <data name="mapping_info" format="tabular" from_work_dir="inStrain.IS/output/inStrain.IS_mapping_info.tsv" label="Mapping Info, This provides an overview of the number of reads that map to each scaffold, and some basic metrics about their quality." /> + <data name="SNVs" format="tabular" from_work_dir="inStrain.IS/output/inStrain.IS_SNVs.tsv" label="SNV, This describes the SNVs and SNSs that are detected in this mapping." /> + <data format="tabular" name="linkage" from_work_dir="inStrain.IS/output/inStrain.IS_linkage.tsv" label="Linkage, This describes the linkage between pairs of SNPs in the mapping that are found on the same read pair at least min_snp times." /> + <data format="tabular" name="gene_info" from_work_dir="inStrain.IS/output/inStrain.IS_gene_info.tsv" label="Gene Info, This describes some basic information about the genes being profiled" /> + <data format="tabular" name="genome_info" from_work_dir="inStrain.IS/output/inStrain.IS_genome_info.tsv" label="Genome Info, This Describes many of the above metrics on a genome-by-genome level, rather than a scaffold-by-scaffold level." > + <filter>(output['skip_genome_wide'] is False)</filter> + </data> + <collection name="figures_pdfs" type="list" label="Figures" > + <discover_datasets pattern="(?P<designation>.+)" directory="inStrain.IS/figures/" format="pdf"/> + <filter>(output['skip_plot_generation'] is False)</filter> + </collection> + </outputs> + <tests> + <test expect_num_outputs="6"> + <param name="mapping_input" value="SmallScaffold.fa.sorted.bam"/> + <param name="sequence_input" value="SmallScaffold.fa"/> + <param name="use_full_fasta_header" value="false"/> + <param name="mm_level" value="false"/> + <param name="skip_mm_profiling" value="false"/> + <section name="read_filtering"> + <param name="min_read_ani" value="0.95"/> + <param name="min_mapq" value="-1"/> + <param name="max_insert_relative" value="3"/> + <param name="min_insert" value="50"/> + <param name="pairing_filter" value="paired_only"/> + </section> + <section name="variant_calling"> + <param name="min_cov" value="5"/> + <param name="min_freq" value="0.05"/> + <param name="fdr" value="1e-06"/> + </section> + <section name="profile"> + <param name="database_mode" value="false"/> + <param name="min_scaffold_reads" value="1"/> + <param name="min_genome_coverage" value="0"/> + <param name="min_snp" value="20"/> + <param name="store_everything" value="false"/> + <param name="rarefied_coverage" value="50"/> + <param name="window_length" value="10000"/> + </section> + <section name="output"> + <param name="detailed_mapping_info" value="false"/> + <param name="skip_genome_wide" value="true"/> + <param name="skip_plot_generation" value="true"/> + </section> + <output name="inStrain_zip"> + <assert_contents> + <has_size value="21606" delta="1000" /> + </assert_contents> + </output> + <output name="scaffold_info"> + <assert_contents> + <has_text text="length"/> + <has_n_lines n="2"/> + <has_n_columns n="21"/> + </assert_contents> + </output> + <output name="mapping_info"> + <assert_contents> + <has_text text="scaffold"/> + <has_n_lines n="5"/> + </assert_contents> + </output> + <output name="SNVs"> + <assert_contents> + <has_text text="position"/> + <has_n_lines n="5"/> + <has_n_columns n="16"/> + </assert_contents> + </output> + <output name="linkage"> + <assert_contents> + <has_n_lines n="1"/> + </assert_contents> + </output> + <output name="gene_info"> + <assert_contents> + <has_n_lines n="0"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="8"> + <param name="mapping_input" value="SmallScaffold.fa.sorted.bam"/> + <param name="sequence_input" value="SmallScaffold.fa"/> + <param name="use_full_fasta_header" value="false"/> + <param name="mm_level" value="false"/> + <param name="skip_mm_profiling" value="false"/> + <section name="read_filtering"> + <param name="min_read_ani" value="0.95"/> + <param name="min_mapq" value="-1"/> + <param name="max_insert_relative" value="3"/> + <param name="min_insert" value="50"/> + <param name="pairing_filter" value="paired_only"/> + </section> + <section name="variant_calling"> + <param name="min_cov" value="5"/> + <param name="min_freq" value="0.05"/> + <param name="fdr" value="1e-06"/> + </section> + <section name="profile"> + <param name="database_mode" value="false"/> + <param name="min_scaffold_reads" value="1"/> + <param name="min_genome_coverage" value="0"/> + <param name="min_snp" value="20"/> + <param name="store_everything" value="false"/> + <param name="rarefied_coverage" value="50"/> + <param name="window_length" value="10000"/> + </section> + <section name="output"> + <param name="detailed_mapping_info" value="false"/> + <param name="skip_genome_wide" value="false"/> + <param name="skip_plot_generation" value="false"/> + </section> + <output name="inStrain_zip"> + <assert_contents> + <has_size value="1468006" delta="100000" /> + </assert_contents> + </output> + <output name="scaffold_info"> + <assert_contents> + <has_text text="length"/> + <has_n_lines n="2"/> + <has_n_columns n="21"/> + </assert_contents> + </output> + <output name="mapping_info"> + <assert_contents> + <has_text text="scaffold"/> + <has_n_lines n="5"/> + </assert_contents> + </output> + <output name="SNVs"> + <assert_contents> + <has_text text="position"/> + <has_n_lines n="5"/> + <has_n_columns n="16"/> + </assert_contents> + </output> + <output name="linkage"> + <assert_contents> + <has_n_lines n="1"/> + </assert_contents> + </output> + <output name="gene_info"> + <assert_contents> + <has_n_lines n="0"/> + </assert_contents> + </output> + <output name="genome_info"> + <assert_contents> + <has_text text="nucl_diversity"/> + <has_n_lines n="2"/> + <has_n_columns n="26"/> + </assert_contents> + </output> + <output_collection name="figures_pdfs" type="list"> + <element name="inStrain.IS_CoverageAndBreadth_vs_readMismatch.pdf" ftype="pdf"> + <assert_contents> + <has_size value="383078" delta="10000" /> + </assert_contents> + </element> + <element name="inStrain.IS_MajorAllele_frequency_plot.pdf" ftype="pdf"> + <assert_contents> + <has_size value="383590" delta="10000" /> + </assert_contents> + </element> + <element name="inStrain.IS_ReadFiltering_plot.pdf" ftype="pdf"> + <assert_contents> + <has_size value="383078" delta="10000" /> + </assert_contents> + </element> + <element name="inStrain.IS_ScaffoldInspection_plot.pdf" ftype="pdf"> + <assert_contents> + <has_size value="208" delta="10" /> + </assert_contents> + </element> + <element name="inStrain.IS_genomeWide_microdiveristy_metrics.pdf" ftype="pdf"> + <assert_contents> + <has_size value="208" delta="10" /> + </assert_contents> + </element> + <element name="inStrain.IS_readANI_distribution.pdf" ftype="pdf"> + <assert_contents> + <has_size value="382771" delta="10000" /> + </assert_contents> + </element> + </output_collection> + </test> + </tests> + <help><![CDATA[ +@HELP_HEADER@ + +Profile +======= + +is the heart of inStrain tool. + +The functionality of inStrain profile is broken into several steps: + +First, all reads in the .bam file are filtered to only keep those that map with sufficient quality. All non-paired reads will be filtered out by default, and an additional set of filters are applied to each read pair (not the individual reads): + + - Pairs must be mapped in the proper orientation with an expected insert size. The minimum insert distance can be set with the tool's corresponding parameter. The maximum insert distance is a multiple of the median insert distance. So if pairs have a median insert size of 500bp, by default all pairs with insert sizes over 1500bp will be excluded. For the max insert cutoff, the median_insert for all scaffolds is used. + - Pairs must have a minimum mapQ score. MapQ scores are confusing and how they’re calculated varies based on the mapping algorithm being used, but are meant to represent both the number of mismatches in the mapping and how unique that mapping is. With bowtie2, if the read maps equally well to two positions on the genome (multi-mapped read), its mapQ score will be set to 2. The read in the pair with the higher mapQ is used for the pair. + - Pairs must be above some minimum nucleotide identity (ANI) value. For example if reads in a pair are 100bp each, and each read has a single mismatch, the ANI of that pair would be 0.99 + +Next, using only read pairs that pass filters, a number of microdiversity metrics are calculated on a scaffold-by-scaffold basis. This includes: + + - Calculate the coverage at each position along the scaffold + - Calculate the nucleotide diversity at each position along the scaffold in which the coverage is greater than the min_cov argument. + - Identify SNSs and SNVs. The criteria for being reported as a divergent site are 1) More than min_cov number of bases at that position, 2) More than min_freq percentage of reads that are a variant base, 3) The number of reads with the variant base is more than the null model for that coverage. + - Calculate linkage between divergent sites on the same read pair. For each pair harboring a divergent site, calculate the linkage of that site with other divergent sites within that same pair. This is only done for pairs of divergent sites that are both on at least MIN_SNP reads + - Calculate scaffold-level properties. These include things like the overall coverage, breadth of coverage, average nucleotide identity (ANI) between the reads and the reference genome, and the expected breadth of coverage based on that true coverage. + +Finally, this information is stored as an IS_profile object. This includes the locations of divergent sites, the number of read pairs that passed filters (and other information) for each scaffold, the linkage between SNV pairs, ect. + +Inputs +====== + +A fasta file and a bam/sam file, + +Output +====== + +An IS_profile. (Zip file), Containing: + +1. scaffold_info.tsv + + This gives basic information about the scaffolds in your sample at the highest allowed level of read identity. + +2. mapping_info.tsv + + This provides an overview of the number of reads that map to each scaffold, and some basic metrics about their quality. + +3. SNVs.tsv + + This describes the SNVs and SNSs that are detected in this mapping. While we should refer to these mutations as divergent sites, sometimes SNV is used to refer to both SNVs and SNSs. + +4. linkage.tsv + + This describes the linkage between pairs of SNPs in the mapping that are found on the same read pair at least min_snp times. + +5. gene_info.tsv + + This describes some basic information about the genes being profiled. + +6. genome_info.tsv + + Describes many of the above metrics on a genome-by-genome level, rather than a scaffold-by-scaffold level. (To output it, set --skip_genome_wide to false) + +7. Figures/Plots (When --skip_plot_generation is set to false): + + - Coverage and breadth vs. read mismatches + - Genome-wide microdiversity metrics + - Read-level ANI distribution + - Major allele frequencies + - Linkage decay + - Read filtering plots + - Scaffold inspection plot (large) + - Linkage with SNP type (GENES REQUIRED) + - Gene histograms (GENES REQUIRED) + + ]]></help> + <citations> + <citation type="doi">10.1101/2020.01.22.915579</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Wed Aug 11 21:11:18 2021 +0000 @@ -0,0 +1,36 @@ +<?xml version="1.0"?> +<macros> + <token name="@TOOL_VERSION@">1.5.3</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">20.01</token> + <xml name="edam_ontology"> + <edam_topics> + <edam_topic>topic_0796</edam_topic> + <edam_topic>topic_3174</edam_topic> + </edam_topics> + <edam_operations> + <edam_operation>operation_0484</edam_operation> + <edam_operation>operation_3209</edam_operation> + <edam_operation>operation_3730</edam_operation> + </edam_operations> + </xml> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">instrain</requirement> + <yield/> + </requirements> + </xml> + <token name="@HELP_HEADER@"> +What it does +============ + +inStrain is python program for analysis of co-occurring genome populations from metagenomes that allows highly accurate genome comparisons, analysis of coverage, microdiversity, and linkage, and sensitive SNP detection with gene localization and synonymous non-synonymous identification. + +Read more about the tool: https://instrain.readthedocs.io/en/latest/ + </token> + <xml name="citations"> + <citations> + <citation type="doi">10.1101/2020.01.22.915579</citation> + </citations> + </xml> +</macros> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/N5_271_010G1.maxbin2.stb Wed Aug 11 21:11:18 2021 +0000 @@ -0,0 +1,167 @@ +N5_271_010G1_scaffold_0 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_1 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_2 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_3 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_4 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_5 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_6 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_7 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_8 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_9 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_10 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_11 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_12 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_13 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_14 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_15 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_16 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_17 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_18 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_19 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_20 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_21 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_22 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_23 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_24 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_25 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_26 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_27 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_28 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_29 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_30 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_31 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_32 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_33 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_34 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_35 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_36 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_37 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_39 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_40 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_41 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_42 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_43 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_44 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_45 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_46 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_47 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_48 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_49 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_50 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_51 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_52 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_53 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_54 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_55 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_56 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_57 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_58 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_59 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_60 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_61 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_63 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_64 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_65 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_66 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_67 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_68 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_69 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_70 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_71 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_73 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_74 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_75 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_76 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_77 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_78 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_79 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_80 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_81 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_82 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_83 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_84 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_85 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_86 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_87 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_88 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_89 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_90 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_91 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_92 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_94 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_95 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_96 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_97 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_98 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_99 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_100 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_101 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_102 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_103 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_104 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_105 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_106 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_107 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_108 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_109 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_111 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_112 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_113 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_114 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_116 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_117 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_118 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_119 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_120 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_121 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_122 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_123 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_125 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_126 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_127 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_128 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_129 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_130 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_131 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_132 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_133 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_134 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_135 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_136 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_137 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_138 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_139 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_141 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_142 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_143 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_144 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_145 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_147 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_148 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_149 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_150 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_151 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_152 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_153 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_154 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_155 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_156 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_157 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_158 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_159 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_160 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_161 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_162 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_163 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_185 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_197 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_341 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_350 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_362 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_376 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_419 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_443 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_484 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_618 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_836 maxbin2.maxbin.001.fasta +N5_271_010G1_scaffold_963 fobin.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/SmallScaffold.fa Wed Aug 11 21:11:18 2021 +0000 @@ -0,0 +1,6 @@ +>WeirdBoi +AAAAAAAAAAAAAAAAAAAAAAA +>N5_271_010G1_scaffold_963 read_length_150 read_count_3782 +TCTCCATTACATTCCATTCCATTCGGGTTGTTCCATTCCATTCCATTCCA +TTCCACTCCATTCCATTGCACTCGGGTTGATTCCATTCCATTCCATTCCG +GATGATTCCATTCCATTGCATTCCGT