changeset 0:2973994fecd6 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vcf2maf commit 30046d5e0df4d80ac687edd03cf44b2afaa04550
author iuc
date Tue, 28 Jun 2022 21:07:04 +0000
parents
children e8510e04a86a
files test-data/fasta_indexes.loc test-data/input_test1.vcf test-data/input_test2.vcf test-data/output_test1.tabular test-data/output_test2.tabular test-data/test-cache/drosophila_melanogaster/106_BDGP6.32/chr_synonyms.txt test-data/test-cache/drosophila_melanogaster/106_BDGP6.32/info.txt test-data/test-cache/drosophila_melanogaster/106_BDGP6.32/mitochondrion_genome/1-1000000.gz test-data/test1.fa test-data/test2.fa test-data/vep_versioned_annotation_cache.loc tool-data/dbkeys.loc.sample tool-data/fasta_indexes.loc.sample tool-data/vep_versioned_annotation_cache.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test vcf2maf.xml
diffstat 17 files changed, 460 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fasta_indexes.loc	Tue Jun 28 21:07:04 2022 +0000
@@ -0,0 +1,30 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Samtools indexed sequences data files.  You will need
+#to create these data files and then create a fasta_indexes.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The fasta_indexes.loc
+#file has this format (white space characters are TAB characters):
+#
+# <unique_build_id>     <dbkey> <display_name>  <file_base_path>
+#
+#So, for example, if you had hg19 Canonical indexed stored in
+#
+# /depot/data2/galaxy/hg19/sam/,
+#
+#then the fasta_indexes.loc entry would look like this:
+#
+#hg19canon      hg19    Human (Homo sapiens): hg19 Canonical    /depot/data2/galaxy/hg19/sam/hg19canon.fa
+#
+#and your /depot/data2/galaxy/hg19/sam/ directory
+#would contain hg19canon.fa and hg19canon.fa.fai files.
+#
+#Your fasta_indexes.loc file should include an entry per line for
+#each index set you have stored.  The file in the path does actually
+#exist, but it should never be directly used. Instead, the name serves
+#as a prefix for the index file.  For example:
+#
+#hg18canon      hg18    Human (Homo sapiens): hg18 Canonical    /depot/data2/galaxy/hg18/sam/hg18canon.fa
+#hg18full       hg18    Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa
+#hg19canon      hg19    Human (Homo sapiens): hg19 Canonical    /depot/data2/galaxy/hg19/sam/hg19canon.fa
+#hg19full       hg19    Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa
+hg19test	hg19	Human (Homo sapiens): hg19 Test	${__HERE__}/test1.fa
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_test1.vcf	Tue Jun 28 21:07:04 2022 +0000
@@ -0,0 +1,11 @@
+##fileformat=VCFv4.1
+##contig=<ID=21,assembly=GCF_000001405.26,length=46709983>
+##contig=<ID=22,assembly=GCF_000001405.26,length=50818468>
+##ALT=<ID=CNV,Description="Copy Number Polymorphism">
+##INFO=<ID=SVLEN,Number=.,Type=Integer,Description="Difference in length between REF and ALT alleles">
+##INFO=<ID=SVTYPE,Number=1,Type=String,Description="Type of structural variant">
+##VEP="v105" time="2022-03-30 19:24:59" ensembl=105.525fbcb ensembl-io=105.2a0a40c ensembl-funcgen=105.660df8f ensembl-variation=105.ac8178e
+##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence annotations from Ensembl VEP. Format: Allele|Consequence|IMPACT|SYMBOL|Gene|Feature_type|Feature|BIOTYPE|EXON|INTRON|HGVSc|HGVSp|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|DISTANCE|STRAND|FLAGS|SYMBOL_SOURCE|HGNC_ID|SOURCE|custom_annotation.gtf.gz">
+##INFO=<ID=custom_annotation.gtf.gz,Number=.,Type=String,Description="custom_annotation.gtf.gz (overlap)">
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	HG00096
+21	5733	rs142513484	C	T	.	.	CSQ=T|3_prime_UTR_variant|MODIFIER|MRPL39|ENSG00000154719|Transcript|ENST00000307301|protein_coding|3/3||||159|||||||-1||||custom_annotation.gtf.gz|,T|missense_variant|MODERATE|MRPL39|ENSG00000154719|Transcript|ENST00000352957|protein_coding|2/2||||70|70|24|A/T|Gca/Aca|||-1||||custom_annotation.gtf.gz|,T|upstream_gene_variant|MODIFIER|AP000223.42|ENSG00000260583|Transcript|ENST00000567517|antisense|||||||||||2407|-1||||custom_annotation.gtf.gz|	GT	0|0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_test2.vcf	Tue Jun 28 21:07:04 2022 +0000
@@ -0,0 +1,3 @@
+##fileformat=VCFv4.0
+#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO
+chrM	5701	.	GA	G	.	.	.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_test1.tabular	Tue Jun 28 21:07:04 2022 +0000
@@ -0,0 +1,3 @@
+#version 2.4
+Hugo_Symbol	Entrez_Gene_Id	Center	NCBI_Build	Chromosome	Start_Position	End_Position	Strand	Variant_Classification	Variant_Type	Reference_Allele	Tumor_Seq_Allele1	Tumor_Seq_Allele2	dbSNP_RS	dbSNP_Val_Status	Tumor_Sample_Barcode	Matched_Norm_Sample_Barcode	Match_Norm_Seq_Allele1	Match_Norm_Seq_Allele2	Tumor_Validation_Allele1	Tumor_Validation_Allele2	Match_Norm_Validation_Allele1	Match_Norm_Validation_Allele2	Verification_Status	Validation_Status	Mutation_Status	Sequencing_Phase	Sequence_Source	Validation_Method	Score	BAM_File	Sequencer	Tumor_Sample_UUID	Matched_Norm_Sample_UUID	HGVSc	HGVSp	HGVSp_Short	Transcript_ID	Exon_Number	t_depth	t_ref_count	t_alt_count	n_depth	n_ref_count	n_alt_count	all_effects	Allele	Gene	Feature	Feature_type	Consequence	cDNA_position	CDS_position	Protein_position	Amino_acids	Codons	Existing_variation	ALLELE_NUM	DISTANCE	STRAND_VEP	SYMBOL	SYMBOL_SOURCE	HGNC_ID	BIOTYPE	CANONICAL	CCDS	ENSP	SWISSPROT	TREMBL	UNIPARC	RefSeq	SIFT	PolyPhen	EXON	INTRON	DOMAINS	AF	AFR_AF	AMR_AF	ASN_AF	EAS_AF	EUR_AF	SAS_AF	AA_AF	EA_AF	CLIN_SIG	SOMATIC	PUBMED	MOTIF_NAME	MOTIF_POS	HIGH_INF_POS	MOTIF_SCORE_CHANGE	IMPACT	PICK	VARIANT_CLASS	TSL	HGVS_OFFSET	PHENO	MINIMISED	GENE_PHENO	FILTER	flanking_bps	vcf_id	vcf_qual	gnomAD_AF	gnomAD_AFR_AF	gnomAD_AMR_AF	gnomAD_ASJ_AF	gnomAD_EAS_AF	gnomAD_FIN_AF	gnomAD_NFE_AF	gnomAD_OTH_AF	gnomAD_SAS_AF	vcf_pos
+MRPL39	0	.	GRCh37	21	5733	5733	+	Missense_Mutation	SNP	C	C	T	novel		TUMOR	NORMAL	C	C																			ENST00000352957	2/2							MRPL39,missense_variant,,ENST00000352957,;MRPL39,3_prime_UTR_variant,,ENST00000307301,;AP000223.42,upstream_gene_variant,,ENST00000567517,;	T	ENSG00000154719	ENST00000352957	Transcript	missense_variant	70	70	24	A/T	Gca/Aca				-1	MRPL39			protein_coding										2/2																			MODERATE								.	GCT	rs142513484	.										5733
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_test2.tabular	Tue Jun 28 21:07:04 2022 +0000
@@ -0,0 +1,3 @@
+#version 2.4
+Hugo_Symbol	Entrez_Gene_Id	Center	NCBI_Build	Chromosome	Start_Position	End_Position	Strand	Variant_Classification	Variant_Type	Reference_Allele	Tumor_Seq_Allele1	Tumor_Seq_Allele2	dbSNP_RS	dbSNP_Val_Status	Tumor_Sample_Barcode	Matched_Norm_Sample_Barcode	Match_Norm_Seq_Allele1	Match_Norm_Seq_Allele2	Tumor_Validation_Allele1	Tumor_Validation_Allele2	Match_Norm_Validation_Allele1	Match_Norm_Validation_Allele2	Verification_Status	Validation_Status	Mutation_Status	Sequencing_Phase	Sequence_Source	Validation_Method	Score	BAM_File	Sequencer	Tumor_Sample_UUID	Matched_Norm_Sample_UUID	HGVSc	HGVSp	HGVSp_Short	Transcript_ID	Exon_Number	t_depth	t_ref_count	t_alt_count	n_depth	n_ref_count	n_alt_count	all_effects	Allele	Gene	Feature	Feature_type	Consequence	cDNA_position	CDS_position	Protein_position	Amino_acids	Codons	Existing_variation	ALLELE_NUM	DISTANCE	STRAND_VEP	SYMBOL	SYMBOL_SOURCE	HGNC_ID	BIOTYPE	CANONICAL	CCDS	ENSP	SWISSPROT	TREMBL	UNIPARC	RefSeq	SIFT	PolyPhen	EXON	INTRON	DOMAINS	AF	AFR_AF	AMR_AF	ASN_AF	EAS_AF	EUR_AF	SAS_AF	AA_AF	EA_AF	CLIN_SIG	SOMATIC	PUBMED	MOTIF_NAME	MOTIF_POS	HIGH_INF_POS	MOTIF_SCORE_CHANGE	IMPACT	PICK	VARIANT_CLASS	TSL	HGVS_OFFSET	PHENO	MINIMISED	GENE_PHENO	FILTER	flanking_bps	vcf_id	vcf_qual	gnomAD_AF	gnomAD_AFR_AF	gnomAD_AMR_AF	gnomAD_ASJ_AF	gnomAD_EAS_AF	gnomAD_FIN_AF	gnomAD_NFE_AF	gnomAD_OTH_AF	gnomAD_SAS_AF	vcf_pos
+mt:ND3	0	.	BDGP6.32	chrM	5702	5702	+	Frame_Shift_Del	DEL	A	A	-	novel		TUMOR	NORMAL	A	A																c.95del	p.Asp32AlafsTer28	p.D32Afs*28	FBtr0100870	1/1							mt:ND3,frameshift_variant,p.Asp32AlafsTer28,FBtr0100870,;mt:ND2,downstream_gene_variant,,FBtr0100857,;mt:CoI,downstream_gene_variant,,FBtr0100861,;mt:CoII,downstream_gene_variant,,FBtr0100863,;mt:ATPase8,downstream_gene_variant,,FBtr0100866,;mt:CoIII,downstream_gene_variant,,FBtr0100868,;mt:ND4L,downstream_gene_variant,,FBtr0100880,;mt:ND6,upstream_gene_variant,,FBtr0100883,;mt:ATPase6,downstream_gene_variant,,FBtr0433498,;mt:ND4,downstream_gene_variant,,FBtr0433500,;mt:ND5,downstream_gene_variant,,FBtr0433501,;mt:Cyt-b,upstream_gene_variant,,FBtr0433502,;mt:tRNA:Trp-TCA,downstream_gene_variant,,FBtr0100858,;mt:tRNA:Cys-GCA,upstream_gene_variant,,FBtr0100859,;mt:tRNA:Tyr-GTA,upstream_gene_variant,,FBtr0100860,;mt:tRNA:Leu-TAA,downstream_gene_variant,,FBtr0100862,;mt:tRNA:Lys-CTT,downstream_gene_variant,,FBtr0100864,;mt:tRNA:Asp-GTC,downstream_gene_variant,,FBtr0100865,;mt:tRNA:Gly-TCC,downstream_gene_variant,,FBtr0100869,;mt:tRNA:Ala-TGC,upstream_gene_variant,,FBtr0100871,;mt:tRNA:Arg-TCG,upstream_gene_variant,,FBtr0100872,;mt:tRNA:Asn-GTT,upstream_gene_variant,,FBtr0100873,;mt:tRNA:Ser-GCT,upstream_gene_variant,,FBtr0100874,;mt:tRNA:Glu-TTC,upstream_gene_variant,,FBtr0100875,;mt:tRNA:Phe-GAA,downstream_gene_variant,,FBtr0100876,;mt:tRNA:His-GTG,downstream_gene_variant,,FBtr0100878,;mt:tRNA:Thr-TGT,upstream_gene_variant,,FBtr0100881,;mt:tRNA:Pro-TGG,downstream_gene_variant,,FBtr0100882,;	-	FBgn0013681	FBtr0100870	Transcript	frameshift_variant	95/354	95/354	32/117	D/X	gAc/gc		1		1	mt:ND3	FlyBaseName_gene		protein_coding	YES		FBpp0100181	P18930,P18930	B6E0P8,J7FKZ6	UPI0000000AE1				1/1		Gene3D:1.20.58.1610,Pfam:PF00507,PANTHER:PTHR11058,PANTHER:PTHR11058:SF9																	HIGH	1	deletion						.	CGAC	.	.										5701
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/drosophila_melanogaster/106_BDGP6.32/chr_synonyms.txt	Tue Jun 28 21:07:04 2022 +0000
@@ -0,0 +1,7 @@
+KJ947872	chrM
+NC_024511.2	chrM
+KJ947872	mitochondrion_genome
+KJ947872.2	chrM
+KJ947872.2	mitochondrion_genome
+NC_024511.2	mitochondrion_genome
+chrM	mitochondrion_genome
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test-cache/drosophila_melanogaster/106_BDGP6.32/info.txt	Tue Jun 28 21:07:04 2022 +0000
@@ -0,0 +1,7 @@
+species	drosophila_melanogaster
+assembly	BDGP6.32
+sift	b
+polyphen	
+source_sift	sift
+source_genebuild	dmel_r6.32_FB2020_01
+source_assembly	BDGP6.32
Binary file test-data/test-cache/drosophila_melanogaster/106_BDGP6.32/mitochondrion_genome/1-1000000.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test1.fa	Tue Jun 28 21:07:04 2022 +0000
@@ -0,0 +1,168 @@
+>21 dna:chromosome chromosome:GRCh38:21:1:10000:1 REF
+CCACAATCATTTTAGGAGAATGGGTTTAAGAAAGGAAAAAAAAAAAAAAGATTTCTGTAT
+GCTCTTAAGAGAAAATCTAAAAAATAATGACATGAAAAAGTTGAAAGGAATGGAAAAATA
+TGTACCATTAAAAGGAAACCCGACGTATGAATGCCATTATCAGACAAAACAGATTTTTTT
+CTTTTTGAGATGGAGTCTCACTCTGTGGCCCAGGCTGGAGTGCAGTGGCACAATCTCTGC
+TCACAGCAAGCTCCGCCTCCCAGGTTCATGCCATTCTCCTGCCTCAGCCTCCCAAGTAGC
+TGGGACTACAGGCACCCACCACCACACCAGGCTAGTATTTGTATTTTTAGTAGAGACGGG
+GTTTCATCGTGTTAGCCAGGATGGTCTCAATCTTCTGACCTCGTGATCTGCCCACCTCAG
+CCTCCCAAAGTGCTGGGATTACAGGCATGAGCCACCGCGCCCAGCCAGACAAAACAGATT
+TTAAGACAACTAAGAAGTTAACAAGCTGACCCTACAATAAGCATGAAAATTTTGAAAAAG
+AATAGGAAAGGAGAACTCACCATAAGAGAAATTGAAACTTGTTATAAAGCTATAGTTGTT
+AAAACGGTGTTACTACAGTGGTACATGGACAGATAAATGGACCAATGAAGCAGACCCAGG
+CACTGAAAGGAACCTTTTATATGACAGCATGGCACAATCAGTAAGAATAGAGAGGAAATA
+GGCCAGGCACGGTGGCTCACGCCTGTAATCCCAGTACTTTGGGAGGCCAAGGCAGGCAGA
+TCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGTCAACATGGTGAAACCGCGTCTCTACT
+AAAAATACAAAAATTAGCTGGTCGTGGTGGCAGGCACCTGTAATCCCAGCTACTCGGGAG
+GCTGAGGCAGGAGAATCACTTGAACCCGGCTAGCGGAGGCTGCAGTGAGCCGAGATCACG
+CCATTGCACTCCAGCCTAGGCAACAAGAGTGAAACTCCATCACAAAAAAAAAAGAAAAAG
+AGTAGACAGGAAATAAATGGTCCAGAATAACTGCCTATCCTTGTGGAGGAGAGGGTGATT
+CAAAATTAGGTCCCTTTCCTCACTCTATATGCAAAAAACAAACTTCAAATAAATTATACA
+ATTAAATGTGAAAATCAAGACTTTAAAATAAACAATGCAGTAGGCTGCTTTATAATATCA
+AGTTAGGGAAGGCTTTCTTAAATTTCATAAACATAAATCATAGAGGAAAAGATGAACTGT
+CTACCTTAAAATTAAAGACGATATAAACAAAATTAAAAGGTAAGCCAGACAAAAGAAATA
+TTTGTAGTGACAACGGTTTAACTTTCTTTCTTTCTTTTTTTTTTTTTGAGACGGAATCTC
+ACTCTGTCACCCAGGCTGGAGTACACTGGTGCAATCTCAGCTCACTGCAACCTCCACCTC
+CCAGGTTCAAGCGATTCTTGTGCCTCAGCCTCCCAAGTAGCTGGGATGACAGGTACGCAC
+CACCACACCCAGCTAATTTTTTGTATTTTTAGCAGAGACGGGGTTTCACCATGTTGGCCA
+GGCTGGTCTCGAATTCCTGACCTCAGGTGATCCATCTGCCTCAGCCTCCCAAAGTGCTGG
+GATTACAGGAGTGAGCCACTGCACCTGGCCCACAAGGGTTTCACTTTCTAAAAATATAAA
+GAACTGGCCAGGTGCAATGGCTCACACTTGTAATCCTAGCACTTTGGGAGGGCAAGGAGA
+GCAGATCGCTAGAGGCCAGAAGCTGGAGAACAGCCTGGCCAACATGGTGAAACCCCATCT
+CTACTTTAAAAATATAAAAATTAGTTGGGTGTGGTGGCACTTGCCTATAATCCCAGCTAC
+TTGGCTACTCAGGAGGCCGAGGCAGGGGAATCGCTTGAATCCAGTAGGCAGAGGCTACAG
+TGAGCCAAGATCATGCCACTGCACTACAGCTTGGGCAACAGAGTGAGACTTGGTCTCAAA
+AAAAAAAAAAATTATATATATATATCTTATATATATACACTATTATATATATACACACAC
+ACACACACACACACACACACACACACACACACAATTAATATGAGATGCCCAAAAATCCAA
+TTGTAAAAAGGGGCAAAGGTTGTAAACTGGTAATTCATAAAAACAAATGAAGAGATGCTT
+ATTGGTACTATATGCTCAGTATTAAGCAAATTAAATGAGATAGGATCGTGCATATTCAAC
+CAACAAAATATCTGAATGTCTGAAAATAATAAATGTTAATGAGGGAGTGGAGAAAATGGG
+AATGCTCATACTGCTGATAGAGAGTAAACTGGTACAACTATTGTGGCAGCCAATTAATAT
+TTAGTAAAGCTGAAGATGCATGGTCCACTGTGGTACAGGCCCTGGAGATATTATCAAATG
+TGTACACAAAGAAACACGCACAAGGATATTTTCTGCGATACTGTAATACTCAAAAGCCAA
+TGACATCCTCAGTGGTCATCAATAAGAAAATGAATTAATGATGGGATTAATCATATAATG
+AAATACTATATAGCAGTTGAAATGAATGTACTCTTTACATGTATCAACATGCTATACATA
+AAAAACAATGATGAGCAATAAAAGCAAATTGCAAAAGGATATATATTATGAAACCAATTA
+TGTTTAGTTTTAAAACACAGAGAATACTATGGATTGTAGTAAAAAAAAATATAAAATCAT
+GAAGAGTAAGGACAGGTACAAACAGGATAGTGGTTCTTCTTGAGAGGAAGGAAATGACAT
+AACAAGACCTTCAATGGATGTGCAGCTTTTCCTTTATTTAAACACAAAAGGATCTGAAGG
+AAATAAGGAAAAAGGTTGACAGTGGTTACAATTAAATAGTGGCTGTATGTCAACACTCTT
+GGTTACAAACAACAGGATCTACACTAGCTAGTTTAAACAAACAGAATAAAATGGCTCACA
+AGTCACAGGCGTGGAGGTCAGGACTACGGAGCCCCCAAAAATGCCCATTTTTATACCTTG
+GAGCAGCTGCGGGGGAAAAACTGCTAAGCAAAGCCTCCACACCTTGCACCATTACATGGG
+ACCTCTGCCACTGCTGCCTTGAAAACCACATCACTGCTCCGTTCAACAAAATGTATCTCA
+TACTACTCTTGTCTGCAAAGTACTTGCTTCCAGATTTCACACAGTTATGTCTAATTGGTG
+AGCCCATGCTACCTGTCTGAGCCACAGCTGCAAGGAAGGGCAGGAGATTAAATTTCATTC
+TTCTACTGGGTAAGGCGAGATCCACAGAGTGGGAAGTTGCCAAAAAGCAGGTGTTCAAAC
+AGTGCTAGCTGCCCAAAAAGCACGAAAAGTGCCCACTCAAACAAGAGTTGGTGAAAATAT
+TCTCTCTACTTTTCTGTATGCTCAAATATTTCACAATTTTTTTAAAGAAAAAATGTCGAA
+GTATGTAAATTCACAAACAACAAAGGGAATGGAAAAAAAATCAATAAACAAGAGACGTCA
+ACCAAATTCTAAAAGACAAAAAGCTAAGTGACTAATAAAACACTAGAGAATGTCACCACC
+TAGAAAACATGTGGAGGAAGTTCCATCAGAGGCAGCCAACCGGCCCAGCTGGGCCTTAGC
+TCAGAGGCAGCAAAAGTTCATAGATGATAGAAGACAGTAGAGGGATTAGTTAAATCTAAA
+CTAATTAAAAAGAAAGGAATTAATTAAAGGTCTGTATACAAACTGGTTGAACACTACCCC
+ATCTTCAAATATAGAAGGGCCTAAATCCAGGAAGTGCCCCATGCAAAATATTGAAGGGAT
+CTTTATTATAAAGTAAGTGAAAGAACAGTCTGGGTAAAACTAGGATAGTCAATTTAGAAA
+TCGGTAGCCAAGACAAGACCTCTTCATTCTAGCATTTTAAAAACCCTCATCCTACCAGCC
+AGATTTACCTACTTATCCTTAGTGATGCTTATGAGAAGCCAAACCTTCCTCAAACCACAG
+AGCTGCCACTCAACCTCCCTACATCCACATCTTCAACTACAAACTGACAGCCAACAAACG
+TAAGACATTTGAAAAAAGCCTGAAACATGTAGATGAAAGACTAAGAAAAAAATCTGTCAC
+TGGAGAAAAGAGATGATTCAGGGACTAGAAAGTGAATGAAGAAATGAATAAAAATTATAT
+TGATATCTTCAGAGAGCTTCATGAAAATACTTCACATCCAGAAAACATGAAAAGGATTCT
+ATGAAACAGAAGCAAAAGAGAGCTTTCAGAAATTAAAATATGCTTGACATAATGTGAGTA
+AAATATAAATGCAGGAAAAATAATAGAGAAAGGGAAAAAAATACAGAGGACCAATACAGA
+CGATGCAAATACAGCTGAAAGGTATTAGAGAAACAAAGAACCTCCAGAGAAAAGAAAGAA
+CAAATAAAGTAAAAAAACATAAAAGAGAGATGTCAAAAATTGAAAGGATCGCCTACGTAC
+CAAGCAGAATAACTGACTGAAGACCTAGATACAGGTACAGGATTGTCGAATTTTACAACA
+CCAAGAATAATAAGATCCTAAGTGTCCAGAGTGTGTTGAGAGAAGGGGATGCAGAGAACA
+GGTTATATAAAAAAGGAACAAGAATCAGACTGGCATCAGACCTTTCATTAGCAAAATCAG
+ATGCCAGAAAATAACTGAGCTGCTTTCAAAGTTCTAAGAGAAAACTACTTTTCAATCTAG
+AATTCTGTATCTAGCTGAATAGTTCATCAAATGTAAGCGCATAACCCACTTTAGACATGC
+AATGGCTCACCTCTTAGACATTTTTTCCTAGAAAGGAGCTTAGAGACAAATGCTAGCAAA
+ATGAGAAAGTTAATCAAGAAACAGGAAGATATGGACTCTAAGAAAAAGTTGACCTCACCC
+AGAAAAAAAAAAAAAAAAATACGGCCTTCAGAGGAAGCCTGTGCACAAGATGCAGAAAAC
+TTGCTGACAAAGAAGCAGAAACTTTTTGGCAAAATATTTTTGTAAGACTGATAACCAAGG
+CTTACTCAAGGTGTGGGGAAACAGGCACTCTTAGACAGTTATTGGGAGTAACACTTGGCA
+AAACCTTTGGGAAGCCAGCATCTGGTAGCACCTGACAGTAATTCCACTGCCAGAAATCTA
+TTTCTACAGAATTATTTATACGTGAGGCAACATAGAGTAGAACATAAGAGCATGTGAGTG
+AAACTACCTCCGTTCAAATTCCCACTTTACCACTTTCAAGCTCTATGCTGTTTCCTCATT
+TAAAAAATAGAAACATCTTCTCAACAGGTCACTATGAGGATTATAGAAAAGGTTCCACAC
+GTGATGGGCTTCGCACAAGACCCAGCACATGATAAATACTCAGTTAATATTACTTATCTA
+CAAACATTTGCCATTTTTGCACAAGGATGTTTACTGTGCCACTGTTCGGCAAAAAACGCA
+AGCAAGTATAGTATGGTCCTATTTATGTTTTTTAGAAATTACATTTATATTTGTATGTAT
+CTGCCTAGATCTGTAGGAAAAAATAGAGAAAACATGCTAAAGTGTTGCATCCATGGAGCA
+GGAAGGGAGTGACCTGTTTTTGTACAGTTTTTACTTAAGCAACATGAAGTGTTTCATACT
+TTAGAGAAAAGAAAAAGTAAAGATCACTATCAATCTTGGGGAGAAAGGGTTAATCTGTAT
+AAGCACACACAAACATTATATTTAAAACATTTATTTTATTATACATATTTAAATTTTAGA
+AAGTTATTAGGTAGATGTACATTCCTCTGTTGCTTTACTTTGATCTTCAGTTACCTGTAA
+AAACATGAATAGCTTTACTTTCCTAATAAACACTTTCAGTTTTACCCTTCACCCACCATT
+TTAAATCATTCCACAGCCCTCTTATAATGACCCTTTATCTTTCCCCTTAAATAGTAGAAT
+AGAAAGTTAGGTAAGAAGAAAGATTTAAAGGCCAAGTAACCAATATAAATTATCAGGTAA
+AGAACAATAATTCCATTCAAAAATATAATCTAAGGCATATTATACCTAGGCTTCTCAAGT
+TGGCCATTTTACTTTTCAGCCGTAAAACAAAATCATTAGTGTAAGAAAAAACACCTGAAG
+TGCTGTAACAAGCTATTTTAAATCAGTTTATTTTCTTTTGGAGTATAATTCCATTTAGAA
+TAGAACATATTACAGATTTAGTCTAAATAGATATCTCTATGTCCCTATTTCTATTCTGTG
+TCACTCAGACTCATCAAACAGACATGGCTAAATCTGAATTCAACACCCCAAAACCTGTCC
+TACCTGCATCATCTATCTTGGTAAAGGGCTCCACCCAGATACCTAAGGACAGCATTCAAG
+AATTCACTCTCTTCTTCCTACCTGCATATTCCATCAACAGTCCTACTAATTCAGCCTAAG
+TTAACTCTCACAGTCATTCTCCCTATCGCGGCTGAACTTTAGGACCTCATTCTCTTCTAT
+CCCCCTAACAGAACTCTTGGCTTAAGTCACAGCTATATCCCATGCTACCCCTAGAATGAG
+CTTTTAAACAAGAATAGCTGATCAACAGGTCACTTCCCTACTCAAAATTCTCCAGTTACT
+CTCTAGCATCTCAGTGGGGTGGAGGAGGAAAGTCAGGAATGCAAGTTCCACAACACGGGA
+CACAAGCCCCTCATATTCTGTTTTTGGACTGACTCTTTACCTATCCTACATGTGACCTGT
+ACATTCAAATTCATCTTACATTCATCATGTTCTCTATGTAGGCAAGTGACTTCCTCTGAG
+ATAACCTCCAACCCATTTTCCAACAGATTATCTGCATGGTCCACTTTTATTCACCCTTTA
+AAATCAGCTTAAGAGACTCCAATTCCTGGAAGCCTTCCTTAAGCAACTCCCCAGCTTGGT
+CTGAGTCCTCTTCTTTATGCCAGAGCCCCACATGTACACCTGAACTGTCTGCTTATAAGT
+CTAATTATCCAATCAACATTACTTCCCTGAGGCAGCAAGTAAATTTTGTGCATTTTTTTG
+TACCTCCAGACCATAACAGTACCTGGCACACAATAAACAATCAGTAAGTCTGTGTTGCAT
+CAATGAACCACGTTTAATCTAATCCCTTCAGAAAAGCAAAATGGACAGTAGAAAAAAGCA
+ACTAAACCAGAATAAATGTTTGAAAAAATATGTATTGACTAACATCACTAGATATTCTAG
+GTAAGACACTGTCTCTGCCGTTTTTCTACATTTTTCATTCCTTTAAAGTGTTCAGTTACT
+ACTGCTTCTTTAATTTTCCAAACTATTTCTTTAATATTCTTTAGCTTTTTAAATTTTTTT
+TAATTTTACTTAACTCCAAGTAAACATGCTGAACATTTTTTCTTTAGTTTCTTCATCTAA
+AATGATCACTGTGACCAACTAAAAAGAGAAAATTACTCCATCCATTCAAAGGTAATTTCT
+TCTCATGAGAAAAAAAATTATATAACCCTTACTCAAGAACATTATGGTGAAGTCCCTAAA
+CAATAATTCCTATCTACATAGTCATTAATAAAGACTAGGTTTATATATCATTATGAAAAA
+TTGTCAATAAATATTTCTTGGGTAACCCCTTCATGTGCTAATGTTGTGTTCAAAATTTGG
+GCTTACACAAATATAACAAATCCTGAAAGGCTTAACAGACAGAAAATAAATGTACTGGCA
+AAGAAAACAATCAAAAAATAGTATGTGGACATATACAATCAAGCACAAACTTATATGAAT
+AATTATTTGTTAGAGATTACTCTGAGACTGTTCCATGGAGGAGGTACGACTCAGGCGAGG
+TAGTGAAGAATGACTGCGTAGTAAATAGAAGGAGAATGGGAAATGGAGTCTAGAAAGAAA
+AACTATCATGAAGAAATAAGCAAACTTGAAAATAAGTTTAATGTGTGATGCTTAATGTGC
+CTGCAAGCTTAACCAGTGTTTGTGGCATTGTGGGAAATTAGGATAAGGAGGAAGTCCTAA
+TGAGGACAGATTACACAGGACAGCTATCAGATAACCTAAGTAAAAGAGGCACACTTAGCC
+TATGAAATAGGAAACAGGAATAGTTGTGGTCAAGAAATACAATCACCCGGGCCGGGCGCG
+GTGGCTCACGCCTGTAATCCCAGCACCTTGGGAGGCTGAGGGGGGTGGATCACGAGGTCA
+GGAGATCGAGACCATCCTGGCTAACACGGTAAAACCCTGTCTCTACTAAAAATACAAAAA
+AATTAGTTGGACGTAGTGGTGGGCACCCATAGTCCCAGCTACTTGGGAGGCTGAGGCACG
+AGAATCGCTTGAGCCCGGGAGGTGGAGGTTGCAGTGAGCTGAGATCGCGCCACTGCACTC
+CAGCCTGGGAGACAGAACAAGACTCTGTCTCAAAAAAAAAAAGAAAAGAAATACTATCAT
+TCCTACTCATTAAGTCTAAATGCTAATGACAAAGAATAAACTACAAAAACACAAACATTC
+AAATCTGGTTTTTCAGGACATTAGGATATTTGCACATTTACTGTTAATGGCTAAAGGCGG
+CTTTAAAATACACCTGACAAATCTTAATCCTCTAAATATTCATTAAGTTTACAAAATAAA
+TCCTAAGACCTAACTGATACCATTACAACTATTAAGAACTATTACTATTACATAGTAATA
+TTCTTATTTCTCTTCAAGTTCTACAATATCATTTTCACGATATTAACAAATAAAATCAAA
+AGAGAACTGCCAAGTTACAACTTTACCCTTACCAACACACTAGTGAACAATCACAACACT
+GCTTTGACTATTGAGACAACTTACTTTTTTCCTTTCTCTTTGTTTCTTTCATTTTTCCCC
+CCTTTGGTTGTTATTGGTTAATTTTGATGAATTTCTTTATAGAAGAGTACTCAATAGCTG
+TCAAAAACACTTACCATTTTCCGAGATCTTTCCAATAGCTTATCCCATATTGTAAAATGT
+GCCTTGAAAAGAAAAGATTTGCGATGAACTAAATGAAGCAGTAATGTCAAAGTAAAAAGG
+ACAGAGTCTTTATATAGCTTTTAGAACAAAATAAATAAAAAGCTAGGCAGTACTAATTTA
+GATAAGTGGTTATAAAAGTGGATCATAGTAGCAAAAAAAGGTGCTACATGGGGATAGCTG
+CTTTTTTGCTTTCTTCAATCGCATGAACCCGGGAGGCAGAGGTTGCAGTGAGCATTTAAG
+AAAAAAACGCTTAAGGATTGAACGTGCCCTTCTTTCACTTTTGCCCATCTTAATCATTTC
+CATCATAGAAAGCATTTCTTCTAATAGGAAGCAAAAATCTGCCCTACTTTCCCATAGACT
+GTGGTTCAATCCTTAAACAGCCAGTTTCAACATTCTAGAAGTCATTCTTCAAATCATAAG
+CAATCACAAGAAGACAGTCATTAAAAACCAACTGTGACATCTTCACACTGAAATGGCAGA
+AGCAGAGGATTCATCAAGTTACCCTAAAGAATAAGTAAAACCTAACAAAGACAATAGTAA
+ATTTTTTTTTTTTTTGAAATGGAGTTTCACTCTGTCGCCCAGGCTGGAGTGCAGTGGTGC
+GATCTCAGCTCACTGCAACCTCCGCCTCCCAGGTTCAAGCAATTCTCCTGCCTCAGCCTC
+CCAAGACAATAGTAAAATTTAAACTCAATTTCCTTAGTCCATAACAACCTCCTGTAGCAG
+AGGATTATCAGAATGCATTAAAGAAAACTGTGCAAAGTGTATCATGACCACATCAATTTT
+ATTAGGTCAACTGAAAGTGGTAACAAGACATATGGGCAGGCCAGTGACTACTCCACACTG
+AATGAGCTCATAAAATCTATAATAAAAGGTAAAATTAATAAATATCAACATACAAACCCT
+TCCAGGGAAAGAGCTGACTGGTATGTTTAAAGGGAAAACCATGCCTGACTCAGGCGGAAT
+GAACTGCTGGTGCAGAGACCTTAAGCTGTGGCTGGAATATAGTGAGTGAGGAAAGGAGTG
+GTGTTAGATAAAGTCAGAGAAGCAGGCAGGGACCAGATAATGCAGGGCTTTGTGAGACAG
+GGTAAAGAGTTGGGATTTGTTCAAGGGAAGCCATTGGAAA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test2.fa	Tue Jun 28 21:07:04 2022 +0000
@@ -0,0 +1,2 @@
+>chrM
+AATGAATTGCCTGATAAAAAGGATTACCTTGATAGGGTAAATCATGCAGTTTTCTGCATTCATTGACTGATTTATATATTATTTATAAAGATGATTTTATATTTAATAGAATTAAACTATTTCTAAAAGTATCAAAAACTTTTGTGCATCATACACCAAAATATATTTACAAAAAGATAAGCTAATTAAGCTACTGGGTTCATACCCCATTTATAAAGGTTATAATCCTTTTCTTTTTAATTTTTAATAATTCGTCAAAAATTTTATTTATTACAATTATAATTATTGGGACATTAATTACAGTTACATCTAATTCTTGGTTAGGAGCTTGAATAGGTTTAGAAATTAATTTATTATCTTTTATCCCCCTATTAAGAGATAATAATAATTTAATATCTACAGAAGCTTCTTTAAAATATTTTTTAACCCAAGTTTTAGCTTCAACTGTTTTATTATTTTCTTCAATTTTATTAATATTAAAAAATAATATAAATAATGAAATTAATGAATCTTTTACATCCATAATTATTATATCAGCTTTATTATTAAAAAGTGGAGCCGCTCCTTTCCATTTTTGATTTCCTAATATAATAGAAGGTTTAACATGAATAAATGCTTTAATATTAATAACTTGACAAAAAATTGCACCTTTAATATTAATTTCTTATCTTAATATTAAATATTTATTATTAATTAGAGTAATTTTATCAGTTATTATTGGAGCTATTGGAGGATTAAATCAAACTTCTTTACGAAAATTAATAGCATTTTCTTCAATTAATCATTTAGGGTGAATATTAAGATCTTTAATAATTAGAGAATCAATTTGATTAATTTATTTTTTTTTTTATTCATTTTTATCATTTGTATTAACATTTATATTTAATATTTTTAAATTATTTCATTTAAATCAATTATTTTCTTGATTTGTTAATAGAAAAATTTTGAAATTTACATTATTTATAAATTTTTTATCATTAGGAGGATTACCTCCATTTTTAGGATTTTTACCAAAATGACTTGTAATTCAACAATTAACATTATGTAATCAATATTTTATATTAACACTTATAATAATATCAACTTTAATTACATTATTTTTTTATTTACGAATTTGTTATTCCGCTTTTATAATAAATTATTTTGAAAATAACTGAATCATAAAGATAAATATAAATAGTATTAATTATAATATATATATAATTATAACttttttttcaatttttggattatttttaatttctttattttattttatattTTAAGGCTTTAAGTTAATAAAACTAATAACCTTCAAAGCTATAAATAAAGAAATTTCTTTAAGCCTTAGTAAAACTTACTCCTTCAAAATTGCAGTTTGATATCATTATTGACTATAAGACCTAATTAATTTGTCCTTATTTGATTAAGAAGAATAAATCTTATATATAGATTTACAATCTATCGCCTAAACTTCAGCCACTTAATCAATAATCGCGACAATGATTATTTTCTACAAATCATAAAGATATCGGAACTTTATATTTTATTTTTGGAGCTTGAGCTGGAATAGTTGGAACATCTTTAAGAATTTTAATTCGAGCTGAATTAGGACATCCTGGAGCATTAATTGGAGATGATCAAATTTATAATGTAATTGTAACTGCACATGCTTTTATTATAATTTTTTTTATAGTTATACCTATTATAATTGGTGGATTTGGAAATTGATTAGTGCCTTTAATATTAGGTGCTCCTGATATAGCATTCCCACGAATAAATAATATAAGATTTTGACTTCTACCTCCTGCTCTTTCTTTACTATTAGTAAGTAGAATAGTTGAAAATGGAGCTGGGACAGGATGAACTGTTTATCCACCTCTATCCGCTGGAATTGCTCATGGTGGAGCTTCAGTTGATTTAGCTATTTTTTCTCTACATTTAGCAGGAATTTCTTCAATTTTAGGAGCTGTAAATTTTATTACAACTGTAATTAATATACGATCAACAGGAATTTCATTAGATCGTATACCTttatttgtttgatcagtagttattactgctttattattattattatCACTTCCAGTACTAGCAGGAGCTATTACTATATTATTAACAGATCGAAATTTAAATACATCATTTTTTGACCCAGCGGGAGGAGGAGATCCTATTTTATACCAACATTTATTTTGATTTTTTGGTCATCCTGAAGTTTATATTTTAATTTTACCTGGATTTGGAATAATTTCTCATATTATTAGACAAGAATCAGGAAAAAAGGAAACTTTTGGTTCTCTAGGAATAATTTATGCTATATTAGCTATTGGATTATTAGGATTTATTGTATGAGCTCATCATATATTTACCGTTGGAATAGATGTAGATACTCGAGCTTATTTTACCTCAGCTACTATAATTATTGCAGTTCCTACTGGAATTAAAATTTTTAGTTGATTAGCTACTTTACATGGAACTCAACTTTCTTATTCTCCAGCTATTTTATGAGCTTTAGGATTTGTTTTTTTATTTACAGTAGGAGGATTAACAGGAGTTGTTTTAGCTAATTCATCAGTAGATATTATTTTACATGATACTTATTATGTAGTAGCTCATTTTCATTATGTTTTATCTATAGGAGCTGTATTTGCTATTATAGCAGGTTTTATTCACTGATACCCCTTATTTACTGGATTAACGTTAAATAATAAATGATTAAAAAGTCATTTCATTATTATATTTATTGGAGTTAATTTAACATTTTTTCCTCAACATTTTTTAGGATTGGCTGGAATACCTCGACGTTATTCAGATTACCCAGATGCTTACACAACATGAAATATTGTATCAACTATTGGATCAACTATTTCATTATTAGGAATTTTATTCTTTTTTTTTATTATTTGAGAAAGTTTAGTATCACAACGACAAGTAATTTACCCAATTCAACTAAATTCATCAATTGAATGATACCAAAATACTCCACCAGCTGAACATAGATATTCTGAATTACCACTTTTAACAAATTAATTTCTAATATGGCAGATTAGTGCAATAGATTTAAGCTCTATATATAAAGTATTTTACTTTTATTAGAAAATAAATGTCTACATGAGCTAATTTAGGTTTACAAGATAGAGCTTCTCCTTTAATAGAACAATTAATTTTTTTTCATGATCATGCATTATTAATTTTAGTAATAATTACAGTATTGGTGGGATATTTAATATTTATATTATTTTTTAATAATTATGTAAATCGATTTCTTTTACATGGACAACTTATTGAAATAATTTGAACTATTTTACCAGCAATTATTTTACTATTTATTGCTCTTCCTTCTTTACGTTTACTTTATTTATTAGATGAAATTAATGAACCATCTGTAACTTTAAAAAGAATCGGCCATCAATGATATTGAAGTTACGAATATTCAGATTTTAATAATATTGAATTTGATTCATATATAATTCCAACAAATGAATTAATAACTGATGGATTTCGATTATTAGATGTTGATAACCGAGTAGTTTTACCCATAAACTCACAAATTCGAATTTTAGTAACAGCTGCTGATGTTATTCATTCTTGAACAGTACCTGCTTTAGGAGTAAAAGTTGACGGTACACCTGGACGATTAAATCAAACTAATTTTTTTATTAATCGACCGGGTTTATTTTATGGTCAATGTTCAGAAATCTGTGGAGCTAATCATAGATTTATACCGATTGTAATTGAAAGTGTTCCTGTAAATTACTTTATTAAATGAATTTCTAGAAATAACTCTTCATTAGATGACTGAAAGCAAGTACTGGTCTCTTAAACCATTTAATAGTAAATTAGCACTTACTTCTAATGATAAAAAATTAGTTAAAATCATAACATTAGTATGTCAAACTAAAATTATTAAATAATTAATATTTTTTAATTCCACAAATAGCACCtattagatgattattattatttattattttttctattacatttattttattttgttctattaactattattcttatATACCAAATTCACCTAAATCTAATGAATTAAAAAATATCAACTTAAATTCAATAAATTGAAAATGATAACAAATTTATTTTCTGTATTCGACCCCTCAGCTATTTTTAATTTTTCACTTAATTGATTAAGAACATTTTTAGGACTTTTAATAATTCCGTCAATTTATTGATTAATACCTTCTCGTTACAATATTATATGAAATTCAATTTTATTAACTCTTCATAAAGAATTTAAAACTTTATTAGGCCCATCAGGTCATAATGGATCTACTTTTATTTTTATTTCTTTATTTTCATTAATTTTATTTAATAATTTCATAGGATTATTTCCATATATTTTTACAAGAACAAGACATTTAACTTTAACTTTATCTTTAGCTTTACCTTTATGATTATGTTTTATATTATATGGATGAATTAATCATACACAACATATATTTGCTCATTTAGTTCCTCAAGGAACACCCGCTATTCTTATACCTTTTATAGTATGTATTGAAACTATTAGAAATATTATTCGACCTGGAACATTAGCTGTTCGATTAACTGCTAATATAATTGCTGGACATTTATTATTAACTCTTTTAGGAAATACAGGACCTTCTATATCTTATATTTTAGTAACATTTTTATTAATAGCTCAAATTGCTTTATTAGTATTAGAATCAGCTGTAGCTATAATTCAATCTTATGTGTTTGCTGTATTAAGAACTTTATATTCTAGAGAAGTAAATTAATGTCTACACACTCAAATCACCCTTTTCATTTAGTGGATTATAGTCCATGACCATTAACAGGAGCTATCGGAGCTATAACAACTGTATCAGGTATAGTAAAATGATTTCATCAATATGATATTTCATTATTTGTATTAGGTAATATTATTACTATTTTAACTGTATATCAATGATGACGAGATGTATCACGAGAAGGAACATACCAAGGATTACATACTTATGCAGTAACTATTGGTTTACGATGAGGAATAATTTTATTTATTTTATCAGAAGTTTTATTTTTTGTGAGATTTTTTTGAGCTTTTTTTCACAGAAGTTTATCACCCGCTATTGAATTAGGAGCATCATGACCTCCTATAGGAATTATCTCATTTAATCCATTTCAAATTCCTTTATTAAATACAGCTATTTTATTAGCTTCAGGAGTTACTGTAACTTGAGCCCACCATAGACTTATAGAAAATAATCATTCACAGACTACTCAAGGATTATTTTTTACAGTTTTACTAGGAATCTATTTTACAATTCTTCAAGCTTATGAATATATTGAAGCTCCATTTACTATTGCAGACTCAATTTATGGATCAACATTTTTTATAGCAACAGGATTTCACGGAATTCATGTATTAATCGGAACAACTTTTTTATTAGTATGTTTACTACGACATTTAAATAATCACTTCTCAAAAAATCATCATTTTGGTTTTGAAGCAGCTGCATGATATTGACATTTTGTCGATGTAGTTTGATTATTTTTATATATCACAATTTACTGATGAGGAGGATAATTATATTATTAATTAAATATCTATATAGTATAAAAGTATATTTGACTTCCAATCATAAGGTCTATTAATTAATAGTATAGATAATTTTTTCTATTATTTTTATTGCTTTATTAATTTTACTAATTACAACTATTGTTATATTTTTAGCTTCAATTTTATCAAAAAAAGCTTTAATCGACCGAGAAAAAAGATCCCCATTTGAATGTGGATTTGATCCAAAATCTTCATCTCGATTACCATTTTCTTTACGTTTTTTTTTAATTACTATTATTTTTTTAATTTTTGATGTAGAGATTGCATTAATTCTACCTATAATTATTATTATAAAATATTCTAATATTATAATTTGAACAATTACTTCAATTATTTTTATTTTAATTTTATTAATTGGATTATACCATGAATGAAATCAAGGAATGTTAAATTGATCAAACTAatatatttatatatatatatataGGGTTGTAGTTAAATATAACATTTGATTTGCATTCAAAAAGTATTGAATATTCAATCTACCTTATTAATTTAATAACTGAATATGAAGCGATTGATTGCAATTAGTTTCGACCTAATCTTAGGTAATTATACCCTTATTCTTTAATTGAAGCCAAAAAGAGGCATATCACTGTTAATGATATAATTGAATTTTAAATTCCAATTAAGGAAATATGATGATCAAGTAAAAGCTGCTAACTTTTTTCTTTTAATGGTTAAATTCCatttatatttctatttatatagtttaaataaaaccttacattttcattgtaataataaaatcttatatttttatAAATTACTAAAATTAATTCACTATATCCAAAGATTTAATAATCTCCATAACATCTTCAATGTCAAACTCTAGTATAAGCTATTTGGATATAAAAATAATAAAATTAATAAAATTAAAATTCAAAATACAAATAATAATAAATAAATTTTCAAAGAATTATTATGTATTAAAAATAAAGTTTTAGAATATATAGATAATTTTTGATATAAATGTTGACCTCCAAAATATTCTGATCAACCTTGATCAAAACTTTTTACAACTAATTGACCATAATTTAAAGGATAAAAAATTATACCATAAGTTCTAATATAAGGTATAAATCATATAGACCCTAAAAAAGTTCTTAAATTATATATAAATAAAGATTTATTTAAAAAAAATAAATTTCTTAAAGAAATTAAATATCCAAATAAACCCCCTACAATACATACAAATAATGTTAACAATTTTATATAAATAGGTAAACAAATTATATAAGGAAAAGGAAAAATCAATCAATTTAATATTCTACCTCCAATAATTCTTATAATTAATAATCCTATTATACCACGGAGTATAATTCAACTTTCATCATTTAATATATTCAATCTACCGCAATTTAAATCACCGGTTATTGAATAATAAACTAATCGAAATGAATAACTAACAGTTAAACCCGTAGAAAAATAGTATAAAAAAAATGAAAACATATTAACATTTCTAATTCTAACAATTTCTAAAATTATATCCTTAGAATAGAATCCAGCTAAAAAAGGTATTCCACATAAAGCTAAATTAGATACGTTAAAACAAGCTGAAGTTAAAGGTATATGAATTCTTAACCCCCCTATTAAACGAATATCTTGAGAATTATTTATATTATGAATAATAGCCCCAGCACATATAAACAATAATGCTTTAAATAAAGCATGAGTTAATAAATGAAATATAGCTAATTTTAAAAATCCTATAGACAAAATTCTTATTATTAAACCTAATTGACTTAAAGTAGATAAAGCAATAATTTTTTTTAAATCAAATTCAAAATTAGCTCCTAATCCAGCTATAAATATTGTTAATCCAGATAATAATAATATTAATTGTCCTAACCAAGAAGTTCTTAAGATAATATTAAATCGAATTAATAAATATACACCAGCTGTAACTAATGTAGAAGAATGAACTAAAGCAGAAACAGGTGTAGGAGCAGCTATAGCTGCAGGTAACCAAGAAGAAAAAGGAATCTGAGCTCTTTTAGTTATAGCAGCTAATATTACTAATCTTCCAATTATTAACATTTCAAATTCATTTTGTATAATTTCTAAATAAAAAATATAATTTCATCTTCCATAATTTAATATTCAAGCAATAGAAAGAAGTAAAGCTACATCCCCAATTCGATTAGATAACGCAGTTAATATACCAGCATTATAAGATTTAATATTTTGAAAATAAATTACTAAACAATAAGAAACAAGTCCTAAACCATCTCACCCTAATAAAATTCTAATTAAATTTGGTCTAATAATTAACAATATTATTGATAAAACAAATATTAATACTAATATAATGAATCGATTAATATGATTATCATTTATTATGTATTCTTTTCTATAAAAAATCACTAAAGAAGAAATTATAAGAACAAAAGATATAAATAATAAACTTATTCAATCAAAAAGAAAAGTTATAACAATTCTTATAGAATTTAAAGAAACTAATTCTCACTCAATAAAATAAATCATATCATTTAACAAAAAATATAAACTTAATAAAAAACATGATAAACTTATAGAAATTAAATTAACAAATCTAATTCTACAAATAGATAAATATTTCATGATTTAAAATGAATATTTTCATATCACTAACACCACAAATTAGTATTTTTTTTAAACTATTTAAATATAATCATAATATAAATGATTCTCTTTTTAAAATTAATAAATTTAAAGGCAATCAATGTAACAATATTAATAAATATTCTCGAATTTTACCTCTTCTAAATGAATATACTCCAGAAAATAATTTACCATGCTGACTAAAAGAATATAAATATAAAGTATAAGCAGCTCTAAAAAAAGATAAAAAAGATAATAAAATTATAGAAATTCAAGATCAAGAAACAATTCTATTTAATAAATAAATTTCTCCTAATAAATTTAATGTTGGAGGAGCTGCTATATTAGCTGATCTTAATAAAAATCATCATAAAGTTATCGAAGGTATAAAATTTAATAAACCTTTATTAATTAATATTCTTCGACTTCCAAGACGTTCATAAGATACATTAGCTAAACAAAATAACCCAGAAGAACATAAACCATGAGCAATTATTAATGTATAAGAACCACATAAACCTCAATAAGTTATAGTTAAAAGTCCTGATAGAACAATTCCTATATGAGCAACAGATGAATAAGCAATTAAAGCCTTTAAATCAGTTTGACGTAAACAAACTAATCTAACTAATACACCTCCTActaatctaattctaattcaaacaaatctatacttcaaattTATTAACTGTAAAAAACTAATAACTCGTAATATTCCATAACCTCCTAATTTTAATATAATACCTGCTAAAATTATAGACCCAGAAACTGGAGCTTCAACATGAGCTTTAGGTAATCATAAATGAACTAAAAATATTGGTATTTTTACTAAAAAGGCACACAATAAACAAAAATATAATAAATCGTAATTAAACATAAAATTATTTATTAAATAAAAATTTATAGAACCAATTTTATTTATTaaataaaaaataccaattaatataggtaaagaaactaataaagtataaaataataaatataaaCCAGCTTGTAAACGTTCTGGCTGATAACCTCAACCTAAAATTAAAAATAATGTAGGAATTAGTCTTCTTTCAAAAAATAAATAAAATATAAATAATCTTATTCTTGAAAAAGTTAAAATCAACAATAATAATAAAATAATAATATTTAATAAAAATAAATTTTTATAATTATTATGTTTATTAATTATTTCTCTAGCTAATAATATTAATGAACAAATTCATAAACTTAATAAAATTAATCCATAAGATAATATATCACAACCTAAAAAATAAGAAATTTCTGATCAATAATTTATAAAATTATTTATTAATAAAAAAATAAATCTAATAAAAAATATTATAATTTGTACCATTCAATATATATTATTAATAAAACAAAAAGGAATTAAAAATAATAAAAAAAAAATAATTTTTAACATTATATAATTCTAAAAGATTGAAAATAATCATTACCATGAGTACGAATTATAGAAACTAAAATTGATAAACCTAAGGCCCCTTCACATACTCTAAATGTCAAAAATATTATTCTAAAATAACTTTCATAATTTAATATATTTAAATAAATAAATAATATAAAAAATAATATTAAAACAATAAATTCTAAACTTAAAAGTATTGAAAGTAAATGTTTCCGATTAGAAACAAAACAAAATAACCCTAAAATAAATAAAATTATAGGTAAACTTCAATATAAAATTATAATCATTAGTTTTAATAGTTTAATAAAAACATTGGTCTTGTAAATCAAAAATAAGATTATTTCTTTTAAAACTTCAAGAGAAAAGAAATTTCTTTTTCATTAATCCCCAAAATTAATATTTTAAATAAACTACCTCTTGAAATTATTCAATTAATATTATATTCATTAATTATTACTACTTCCATTATTTTTCTAAATATAATTCATCCATTAGCTTTAGGATTAACTTTATTAATTCAAACAATTTTTGTATGTTTACTAACTGGATTAATAACTAAAAGTTTTTGATATTCATATATTTTATTTTTAATTTTTTTAGGAGGAATACTTGTATTATTTATTTACGTAACATCTTTAGCCTCTAATGAAATATTTAATTTATCAATAAAATTAACTCTATTTTCTTCATTAATTTTAATTTTTATATTAATTTTATCATTTATTATAGATAAAACTTCTTCTTCTTTATTTTTAATAAATAATGATATACAATCTATTATTAATATAAATTCTTATTTTATAGAAAATTCTTTATCTTTAAATAAATTATATAATTTTCCTACAAATTTTATTACAATTTTATTAATAAATTATTTATTAATTACTTTAATTGTTATTGTAAAAATTACAAAATTATTTAAAGGACCTATTCGAATAATATCTTAATTAATGAATAAACCTTTACGAAATTCCCATCCTCTATTTAAAATTGCCAATAATGCTTTAGTAGATTTACCAGCTCCAATTAATATTTCAAGATGATGAAATTTTGGATCATTACTTGGATTATGTTTAATTATTCAAATTTTAACCGGATTATTTTTAGCTATACATTACACAGCTGATATTAATCTAGCTTTCTATAGTGTTAATCATATTTGTCGAGACGTTAATTATGGTTGATTATTACGAACTTTACATGCTAACGGTGCATCATTTTTTTTTATTTGTATTTACTTACATGTAGGACGAGGAATTTATTACGGTTCATATAAATTTACTCCAACTTGATTAATTGGAGTAATTATTTTATTTTTAGTAATAGGAACAGCTTTTATAGGATACGTATTACCTTGAGGACAAATATCATTTTGAGGAGCTACTGTAATTACTAATTTATTATCAGCTATCCCTTACTTAGGTATAGATTTAGTTCAATGATTATGAGGTGGATTTGCTGTTGATAATGCCACTTTAACTCGATTTTTTACATTCCATTTTATTTTACCTTTTATTGTTCTTGCTATAACTATAATTCATTTATTATTCCTTCATCAAACAGGATCTAATAATCCTATCGGATTAAATTCTAATATTGATAAAATTCCTTTTCATCCTTATTTTACATTTAAAGATATTGTAGGATTTATTGTAATAATTTTTATTTTAATTTCATTAGTATTAATTAGACCAAATTTATTGGGAGACCCTGATAATTTTATTCCAGCAAATCCTTTAGTAACACCTGCCCATATTCAACCAGAATGATATTTTTTATTTGCTTATGCTATTTTACGATCTATTCCAAATAAATTAGGAGGAGTTATTGCATTAGTTTTATCAATTGCAATTTTAATAATCCTTCCTTTTTATAATTTAAGAAAATTCCGAGGGATTCAATTTTATCCTATTAATCAAGTAATATTCTGATCTATATTAGTAACAGTAATTTTATTAACTTGAATTGGAGCTCGACCAGTTGAAGAACCTTATGTATTAATTGGACAAATTCTAACTGTTGTATATTTCTTATATTATTTAGTAAACCCATTAATTACAAAATGATGAGATAATTTATTAAATTAAATAGTTAATGAGCTTGAATAAGCATATGTTTTGAAAACATAAGATAGAATTTAATTTTCTATTAACTTTTACTAAAAAAAATTCACTataataaagaaaataataaaattttaaacccaataaaaaataataaataatTTAAAGAAAAAGATAAAAAACATTTTCAAGCTAAATATATTAATTTATCATAACGAAATCGAGGTAAAGTTCCTCGAACTCAAATAAAAACAAAAGAAATAAAAGTTAATTTTATATAAAATAATAAATTAAACACATCACAACCTAAAAAAATAACGCAAAATAATATTCTTATAAATAAAATTCTCGCATATTCAGCTATAAAAATTAAAGCAAAACCCCCTCTTCTATATTCTACATTAAATCCTGAAACTAATTCTGATTCTCCTTCAGCAAAATCAAAAGGAGTCCGATTAGTTTCAGCTAATGAAATAGATATTCAAACTAAAGCTATAGGAAATAAAATAATTAAAAATCACATATAAACTTGATAAAAAAAAAAATAAATTATATTATAACTTCCAATTAAAAAAATAAAAGATAATAAAATTAAAGCTAAACTAACTTCATAAGAAATAGTCTGAGCCACAGCTCGCAAACCTCCTAATAAAGCATAATTAGAATTAGACGACCAACCAGCTACTATAACAGTATAAACCCCCAATCTAGTACAACATAAAAAAAATAAACCCCCCAAATTAAAAGAATATAATTTTACAAAAAAAGGTATACATATTCAAACAAATAATGATAAAAATAAAGAAAAAATTGGAGAAATATAATATCTTAAATAATTAGATAATAAAGGATAAGTTTGTTCTTTTGTAAATAATTTAATCGCATCACAAAAAGGTTGAGGAATTCCTATTAAACCAACTTTATTAGGACCTTTACGAATTTGAATATATCCTAAAACTTTTCGTTCTAATAAAGTTAAAAAAGCTACACTTACTAATACACAAATAATTAATAACAAACTACCAATTAATGACAAAATAAATTCTATATAAAACAAGTACTATTTGTAATAAAAATCACATATATAAATTCTAAATTTATTGCACTAATCTGCCAAAATAGTTTTATATTAATAATATTCTTATAAAAAATATAATTATTTTGATATTTGGTCCTTTCGTACTAAAATATCATAATTTTTTAAAGATAGAAACCAACCTGGCTTACACCGGTTTGAACTCAGATCATGTAAGAATTTAAAAGTCGAACAGACTTAAAATTTGAACGGCTACACCCAAAATTATATCTTAATCCAACATCGAGGTCGCAATCTTTTTTATCGATATGAACTCTCCAAAAAAATTACGCTGTTATCCCTAAAGTAACTTAATTTTTTAATCATTATTAATGGATCAAATATTCATAAATTTATGTTTTTAAAAAATTAAAAGTTTTTTAAATTTTAATATCACCCCAATAAAATATTTTTATTTATTAAAATTTAATTAATCTATATAATTAAAATAAAAAAAAATATAAAGATTTATAGGGTCTTCTCGTCTTTTAAATAAATTTTAGCTTTTTGACTAAAAAATAAAATTCTATAAAAATTTTAAATGAAACAGTTAATATTTCGTCCAACCATTCATTCCAGCCTTCAATTAAAAGACTAATGATTATGCTACCTTTGCACAGTCAAAATACTGCGGCCATTTAAAATTTTCAGTGGGCAGGTTAGACTTTATATATAATTCAAAAAGACATGTTTTTGTTAAACAGGCGAATATTATTTTTGCCGAATTCTTTATTTAAACTTTTCATATAAATTAATTTTAACATTATTATATACTAATTTTATCATTATTACTTAATTTTAATAATTAAAACTAACATTTTAATAAATAATTAAAATTTAATAAATAATTTAATTTATAAAATAAATTATAACATATTTTTTAATAATTGCTAATTCTAAGCATATATTTATTAAATCTATTTAATATTTTTAAAAATTTATTTTATAGCTTATCCCATAAAACATTAAAATTATAAATTAATTAATTAAATAAATAATTAAGTAAATTTATAATTTCTAAATTAAATTTATTTCTTAAAAAACTAGATACCTTTAAAAACGAATAACATTTCATTTCTAATATAATATTATAAATAATTTTATCACATTAACTTAAATATTATATTAACTCTTTTAAAATCGAGAAAAATAAATATTTATTTTTTATTTAATAAACACTGATACACAAGGTACAATAAATTAAATTTTCTTTTAAAATAAAATTTTTTCAAATTATTTCAATTTTCTTTTACAATACTAATAAACTATTATTAAAATTATTTTTTCTTTAAACAATACTAAAACTTTAAATTTTATAGTTATTTCTAATAATTTTTTAAAAAATAATAAAAATTAATAAATAAAAACTAACTCAATTTATATTGATTTGCACAAAAATCTTTTCAATGTAAATGAAATACTTTACTTAATAAGCTTTAAATTGTCATTCTAGATACACTTTCCAGTACATCTACTATGTTACGACTTATCTTACCTTAATAATAAGAGCGACGGGCGATGTGTACATATTTTAGAGCTAAAATCAAATTATTAATCTTTATAATTTTACTACTAAATCCACTTTCAAAAATTTTTTCATAATTTTATTCATATAAATAAATTTATTGTAACCCATTATTACTTAAATATAAGCTACACCTTGATCTGATATAAatttttattaaaattattgaatattattattcttatAAAATATTCTGATAACGACGGTATATAAACTGATTACAAATTTAAGTAAGGTCCATCGTGGATTATCGATTAAAAAACAGGTTCCTCTAGATAGACTAAAATACCGCCAAATTTTTTAAGTTTCAAGAACATAACTATTACTACTTTAGCAATTTATTTACATTTTAAATAATAGGGTATCTAATCCTAGTTTTTTATTAAAATTTTTTAACCTCAATTACATTTTTATATAATAATTTAAATATAAAATTTCACTTAATATATTTAATTTTATTATTATTAATAAATTTAATTTAATTAATACTAAAAAAATTTATTTGTATTAATGGTATAACCGCGACTGCTGGCACCAATTTAGTCAATACTTTTTTATATTGCTATTTCTAAATTTCTTTAATTAATAATATTAATTACTGCGAATAAATTTTCATATTTATTTTTTAAATAAATATAAAATCACACAAAAATTTACATATAAATCAAATTAATAACAAATTTTTAAGCCAAAATAAAACTTTAAATTTTTATTTTTGATTTTTTATTATTAATTAAATATTAATAATTTTTATTAAAATAATTTTTTAAAGAAAAATTAAAATTAATTTTAATTAAATATTAAAATAATTTAATTTTATAATAAAATTTTTATCATATTATAATAATATAAAAATTTTATAAATTTATTTTTTAAATTTTACAAAATTTTTAAAATTTTTATTTTTTTTAAAAAAAATAATTTTTAACAAAAAAAATTTTTATCAAAAATTAATATAAAATAAATTTTAATTTAAAAATTAAAAATTTTAATTTTACACTTTTTTAAAAATATTTTTTTTTAAAAAAAAAATTTTTTTTTAAAAAAATTTTTTTTTAAAAAAAATTTAAAAAATTATAGATTAATTTCTTTTAAATGACTAAAAAAAATTTTTTTTTTTAAGTATTTTAAAACTTTTTTTTTACAATTTTTAAAAAAATATATAAATATAAATTTTAAAAAAAATTTTTTTTTTAAAAAAAATGAAAATTATATTATAAAAATATTTTTTTTACAAAAATGAAAATTTAATCTATTAAAAAAAATTATTAAAATTTTTATAAATAAATAAAAAAAGTAATAAATTTATTAAAAATCAATATATATATAATAATAAATAATTTGATTATTAATTAAATTATACGAATAATAAATATAATAAATAATTTATTTTAATCAATAAATCTGAAATAATTAATTATATACATATATATATATATGTAAATAAATAAAAATAAATTTATTCCCCCTATTTATAAATTTATTATATAATTAAAACTTAAAAAATATTTTTTTTAAAAAAATAGTTTATTAAATTATACTTAATAAACTATTTTTATAATAAATTATTTTATAAATAAAATTATTTAAAATAATTAATAAAAATATTTTTATTGTAATAAAAATTAAAAATAATTTTAAAAAAATTAAATTTATATATTTATATATATATATATATAATTTTTAATTTTCAATTAAATTATATAAATATAATAAAATAATTTTATTTAATCACTAAATCTGAAATAATTAATtataaatatatatatatatatatatatatatatatatatataAATGAAAATAAATTTATTCCCCCTATTCATAAATTTATTGTATAATTAAAACTTAAAAAATATTTTTTTTTAAAAAAAAATTATTTATTAAATTATACTTAATAAACTATTTTTATAATAAATTATTTTATAAATAAAATTATTTTAAATAATTAATAAAAATATTTTTAATATAATAAAAATTTAAAATGATTTTTTATAAAAATTAAATTCATATTTATATATATATATATATAATTTAATTTTCAATTAAATTATATAAGTATAATAAAATAATTTATTTTAATCACTAAATCTGAATTAATTAATTGTATATATATATATATATATAAAAAAAATGAAAATAAATTTATTCCCCCTATTCATAAATTTATTATATAATTAAATCTTAAAAAGTATTTTTTTTTTAAAAAAAAATTATTTATTAAATTATACTTAATAAACTATTTTTATAATAAATTATTTTATGAATAAAATTATTTAAAATAATTTATAAAAATATTTTTAATATAATGAAAATTTAAAATGATTTTTTATTATTAATTAAATTCatatatttacatatatatatatatatatatatatatataGATAATTTAATTTTCAATTAAATTATATAAGTATAATAAAATAATTTATTTTAATCACTAAATCTGAATTAATTAATTGTATGTATATATATATATATATATATAAAAAAATGAAAATAAATTTATTCCCCCTATTCATAAATTTATTATATAATTAAATCTTAAAAAGTATTTTTTTTTAAAAAAAAAATTATTTATTAAATTATACTTAATAAACTATTTTTATAATAAATTATTTTATGAATAAAATTATTTAAAATAATTAATAAAAATATTTTTAATATAATGAAAATTTAAAATGATTTTTTATTATTAATTAAATTCatatatttatatatatatatatatatatatatataGATAATTTAATTTTCAATTAAATTATATAAGTATAATAAAATAATTTATTTTAATCACTAAATCTGAATTAATTAATTGTATGtatatatatatatatatatatatatatatataAAAAATGAAAATAAATTTATTCCCCCTATTCATAAATTTATTATATAATTAAATCTTAAAAAGTATTTTTTTTTAAAAAAAAATTATTTATTAAATTATACTTAATAAACTATTTTTATAATAAATTATTTTATGAATAAAATTATTTAAAATAATTAATAAAAATTATATATATATATAAATGAAAATAATTTTTAAATTTTAATAATAAATAAATTTAATAATTAATAATTAAATAAAATCTATTCATTATTAATATTTAATTAATAATAAATAAATTTAATAACTAATAATTAAATAAAATTTATTTATTATTAATATTTAATTAATAATAAAAAATCATCAttttttttttttttttttttatttAATTAATTATtatatatttataaatttatatattattcaatatttataatatatatatatatatatatatataAAAATTAAATTATTTAAATAATTTAATATAAATTTTTAAAAAATTTCTTAAATGTATTATTTTTATAAAAAATATTTATATAATAAAATTATTTTTTTTTAAAAATAAACAAAAAATTTTTAATAAATAAATTTTATAATGAAATATAATTTATTTATTTTTTATTTTTTTAAAAAAAATTTAAAAAAAAATAATTTTTTTTTAAAAAAAAACTATATACTAATTATAAATTAATAGATATTTATATATATATAAATATTTAATATATTATTATATATCTAATAATTTAAATAAAAAATTTTAAAATTTAAAAATGTAGATATAATTTATAAAAATTTATATTCTCATATTTATTTATTATTAATTTAATTTATATAAATAATATAATAATTTAATTAATTATTATATATTTATAAATTTATATATTATTGAATATTTATATAATATATATATATATATAGAAAAATTAAATTATTTAAATAATTTAATATAAATTTTTAAAAAATTTCTTAAATGTATTATTTTTATAAAAAATATTTATATAATAAAATCATTTTTTTTTAAAAATAAACAAAAAATTTTTAATAAATAAATTTTATAATGAAATATAATTTATTTATTTTTTATTTTTTTAAAAAAAAATTTTTTAAAAAAAAATAATTTTTTTTTTAAAAAAACTATATACTAATTATAAATTAATAGATATTTATATATATATAAATATTTAATATATTATTATATATCTAATAATTTAAATAAAAAATTTTAAAATTTAAAAATGTAGATATAATTTATAAAAATTTATATTCTCATATTTATTTATTATTAATTTAATTTATATAAATAATATAATAATTTAATTAATTATTATATATTTATAAATTTATATATTATTGAATATTTATATAATATATATATATATATAGAAAAATTAAATTATTTAAATAATTTAATATAAATTTTTAAAAAATTTCTTAAATGTATTATTTTTATAAAAAATATTTATATAATAAAATCATTTTTTTTAAAAATAAACAAAAAATTTTTAATAAATAAATTTTATAATGAAATATAATTTATTTATTTTTTATTTTTTTAAAAAAAATTTTTTAAAAAAAAATAATTTTTTTTTAAAAAAACTATATACTAAATATAAATTAATAGATATTTATATATATATAAATATTTAATATATTATTATATATCTAATAATTTAAATAAAAAATTTTAAAATTTAAAAATGTAGATATAATTTATAAAAATTTATATTCTCATATTTATTTATTATTAATTTAATTTATATAAATAATATAATAATTTAATTAATTATTATATATTTATAAATTTATATATTATTGAATATTTATATAATATATATATATATATAGAAAAATTAAATTATTTAAATAATTTAATATAAATTTTTAAAAAATTTCTTAAATGTATTATTTTTATAAAAAATATTTATATAATAAAATCATTTTTTTTTAAAAATAAACAAAAAATTTTTAATAAATAAATTTTATAATGAAATATAATTTATTTATTTTTTATTTTTTTTAAAAAAAATTTTTTAAAAAAAATAATTTTTTTTTAAAAAAACTATATACTAAATATAAATTAATAGATATTTATATATATATAAATATTTAATATATTATTATATATCTAATAATTTAAATAAAAAATTTTAAAATTTAAAAATGTAGATATAATTTATAAAAATTTATATTCTCATATTTATTTATTATTAATTTAATTTATATAAATAATATAATAATTTAATTAATTATTATATATTTATAAATTTATATATTATTGAATATTTATATATAATATATATATATATAGAAAAATAAAATTATTTAAATAATTTTACATAAAATTTTAAAAAATTTCTTAAATGTATTATTTAATAAAAAATTACTTTTTAAAAAAAATAATTTTAATTTTTTaaaaaaaatagtaaataataaaaaaaaaaaaaaaaaaaaatgaaaaTTATATTATT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/vep_versioned_annotation_cache.loc	Tue Jun 28 21:07:04 2022 +0000
@@ -0,0 +1,3 @@
+#<value>									<dbkey>			<version>		<cachetype>		<name>		<species>						<path>
+#
+drosophila_melanogaster_vep_106_BDGP6.32	dm6	106	default	Drosophila melanogaster dm6 (V106)	drosophila_melanogaster	${__HERE__}/test-cache
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/dbkeys.loc.sample	Tue Jun 28 21:07:04 2022 +0000
@@ -0,0 +1,1 @@
+#<dbkey>		<display_name>	<len_file_path>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/fasta_indexes.loc.sample	Tue Jun 28 21:07:04 2022 +0000
@@ -0,0 +1,29 @@
+#This is a sample file distributed with Galaxy that enables tools
+#to use a directory of Samtools indexed sequences data files.  You will need
+#to create these data files and then create a fasta_indexes.loc file
+#similar to this one (store it in this directory) that points to
+#the directories in which those files are stored. The fasta_indexes.loc
+#file has this format (white space characters are TAB characters):
+#
+# <unique_build_id>	<dbkey>	<display_name>	<file_base_path>
+#
+#So, for example, if you had hg19 Canonical indexed stored in
+#
+# /depot/data2/galaxy/hg19/sam/,
+#
+#then the fasta_indexes.loc entry would look like this:
+#
+#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/depot/data2/galaxy/hg19/sam/hg19canon.fa
+#
+#and your /depot/data2/galaxy/hg19/sam/ directory
+#would contain hg19canon.fa and hg19canon.fa.fai files.
+#
+#Your fasta_indexes.loc file should include an entry per line for
+#each index set you have stored.  The file in the path does actually
+#exist, but it should never be directly used. Instead, the name serves
+#as a prefix for the index file.  For example:
+#
+#hg18canon	hg18	Human (Homo sapiens): hg18 Canonical	/depot/data2/galaxy/hg18/sam/hg18canon.fa
+#hg18full	hg18	Human (Homo sapiens): hg18 Full	/depot/data2/galaxy/hg18/sam/hg18full.fa
+#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/depot/data2/galaxy/hg19/sam/hg19canon.fa
+#hg19full	hg19	Human (Homo sapiens): hg19 Full	/depot/data2/galaxy/hg19/sam/hg19full.fa
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/vep_versioned_annotation_cache.loc.sample	Tue Jun 28 21:07:04 2022 +0000
@@ -0,0 +1,11 @@
+#This file describes vep cache data and its metadata available on the server.
+#The data table has the format (white space characters are TAB characters):
+#
+#<value>									<dbkey>			<version>		<cachetype>		<name>								<species>			<path>
+#
+#So, vep_versioned_annotation_cache.loc tables could look like this:
+#
+#homo_sapiens_vep_105_GRCh38				hg38			105				default			Homo sapiens hg38 (V105)			homo_sapiens		/path/to/vep_versioned_annotation_cache/105/hg38/default
+#homo_sapiens_refseq_vep_105_GRCh38			hg38			105				refseq			Homo sapiens hg38 (V105, Refseq)	homo_sapiens		/path/to/vep_versioned_annotation_cache/105/hg38/refseq
+#homo_sapiens_merged_vep_105_GRCh38			hg38			105				merged			Homo sapiens hg38 (V105, Merged)	homo_sapiens		/path/to/vep_versioned_annotation_cache/105/hg38/merged
+#
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Tue Jun 28 21:07:04 2022 +0000
@@ -0,0 +1,17 @@
+<tables>
+    <!-- Table of installed versioned vep cache data -->
+    <table name="vep_versioned_annotation_cache" comment_char="#">
+        <columns>value, dbkey, version, cachetype, name, species, path</columns>
+        <file path="tool-data/vep_versioned_annotation_cache.loc" />
+    </table>
+    <!-- Locations of dbkeys and len files under genome directory -->
+    <table name="__dbkeys__" comment_char="#">
+        <columns>value, name, len_path</columns>
+        <file path="tool-data/dbkeys.loc" />
+    </table>
+    <!-- Location of SAMTools indexes for FASTA files -->
+    <table name="fasta_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/fasta_indexes.loc" />
+    </table>
+</tables>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Tue Jun 28 21:07:04 2022 +0000
@@ -0,0 +1,12 @@
+<tables>
+    <!-- Table of versioned vep cache data for testing -->
+    <table name="vep_versioned_annotation_cache" comment_char="#">
+        <columns>value, dbkey, version, cachetype, name, species, path</columns>
+        <file path="${__HERE__}/test-data/vep_versioned_annotation_cache.loc" />
+    </table>
+    <!-- Locations of all sam indexes under genome directory -->
+    <table name="fasta_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/fasta_indexes.loc" />
+    </table>
+</tables>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vcf2maf.xml	Tue Jun 28 21:07:04 2022 +0000
@@ -0,0 +1,153 @@
+<tool id="vcf2maf" name="Convert VCF to MAF" version="@TOOL_VERSION@">
+	<description>with vcf2maf</description>
+	<macros>
+		<token name="@TOOL_VERSION@">1.6.21</token>
+		<token name="@DB_VERSION@">106</token>
+	</macros>
+	<requirements>
+		<requirement type="package" version="@TOOL_VERSION@">vcf2maf</requirement>
+		<requirement type="package" version="@DB_VERSION@.1">ensembl-vep</requirement>
+	</requirements>
+	<command detect_errors="exit_code"><![CDATA[
+		ln -s '${input1}' MainInput.vcf &&
+		#if $ref_seq.ref_source == "cached":
+			ln -s '${ref_seq.ref.fields.path}' reference.fa &&
+		#elif $ref_seq.ref_source == "history":
+			ln -s '${ref_seq.ref}' reference.fa &&
+		#end if
+		vcf2maf.pl --input-vcf MainInput.vcf --output-maf MainOutput.maf --ref-fasta reference.fa
+		#if $annotation_cache.source == "no_vep":
+			--inhibit-vep
+		#else:
+			--vep-path \$(dirname \$(which vep))
+			--vep-data '${annotation_cache.cache_file.fields.path}'
+			--species '${annotation_cache.cache_file.fields.species}'
+			--ncbi-build '${annotation_cache.cache_file.fields.value.split($annotation_cache.cache_file.fields.version + "_")[-1]}'
+			#if $annotation_cache.cache_file.fields.version != "@DB_VERSION@": --cache-version $annotation_cache.cache_file.fields.version
+		#end if
+
+		#if $tumor_id:
+			--tumor-id '${tumor_id}'
+		#end if
+		#if $normal_id:
+			--normal-id '${normal_id}'
+		#end if
+		#if $vcf_tumor_id:
+			--vcf-tumor-id '${vcf_tumor_id}'
+		#end if
+		#if $vcf_normal_id:
+			--vcf-normal-id '${vcf_normal_id}'
+		#end if
+
+		#if $adv_opt.any_allele:
+			--any-allele
+		#end if
+		#if $adv_opt.min_hom_vaf:
+			--min-hom-vaf $adv_opt.min_hom_vaf
+		#end if
+		#if $adv_opt.maf_center:
+			--maf-center '${adv_opt.maf_center}'
+		#end if
+		#if $adv_opt.retain_info:
+			--retain-info '${adv_opt.retain_info}'
+		#end if
+		#if $adv_opt.retain_fmt:
+			--retain-fmt '${adv_opt.retain_fmt}'
+		#end if
+		#if $adv_opt.retain_ann:
+			--retain-ann '${adv_opt.retain_ann}'
+		#end if
+	]]></command>
+	<inputs>
+		<param type="data" name="input1" label="VCF input file" format="vcf">
+			<validator type="unspecified_build" />
+		</param>
+		<conditional name="ref_seq">
+			<param name="ref_source" type="select" label="Select FASTA file as reference sequence">
+				<option value="cached">Locally cached</option>
+				<option value="history">History</option>
+			</param>
+			<when value="cached">
+				<param name="ref" type="select" label="Select reference sequence">
+					<options from_data_table="fasta_indexes">
+						<validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file" />
+					</options>
+				</param>
+			</when>
+			<when value="history">
+				<param name="ref" type="data" format="fasta" label="Select reference sequence" />
+			</when>
+		</conditional>
+		<conditional name="annotation_cache">
+			<param name="source" type="select" label="Select the source of annotation data if you want to use VEP" help="vcf2maf can utilize Ensembl's VEP to select a single effect per variant. VEP can only be used if SIFT is available for the selected genome assembly. Ensembl strongly recommends to only use annotation cache files with a version number matching the VEP version. You can disable the corresponding filtering of available cache files at your own risk.">
+				<option value="no_vep" selected="true">Do not use VEP</option>
+				<option value="restricted">Use VEP with a cache file with matching version number</option>
+				<option value="unrestricted">Use VEP with any cache file</option>
+			</param>
+			<when value="no_vep"/>
+			<when value="restricted">
+				<param name="cache_file" type="select" label="Select annotation cache file" help="If the annotation data of interest is not listed, have a look at all available cache files regardless of their version number or contact your Galaxy admin.">
+					<options from_data_table="vep_versioned_annotation_cache">
+						<filter type="static_value" value="@DB_VERSION@" column="2" />
+						<filter type="sort_by" column="4"/>
+					</options>
+					<validator type="no_options" message="No annotation caches are available"/>
+				</param>
+			</when>
+			<when value="unrestricted">
+				<param name="cache_file" type="select" label="Select annotation cache file" help="If the annotation data of interest is not listed, contact your Galaxy admin.">
+					<options from_data_table="vep_versioned_annotation_cache">
+						<filter type="sort_by" column="4"/>
+					</options>
+					<validator type="no_options" message="No annotation caches are available"/>
+				</param>
+			</when>
+		</conditional>
+		
+		<param argument="--tumor-id" type="text" optional="true" label="Enter tumor sample ID (optional)" help="Used to fill the Tumor_Sample_Barcode column of the output MAF with the tumor sample ID."/>
+		<param argument="--normal-id" type="text" optional="true" label="Enter normal sample ID (optional)" help="Used to fill the Matched_Norm_Sample_Barcode column of the output MAF with the normal sample ID."/>
+		<param argument="--vcf-tumor-id" type="text" optional="true" label="Enter name of tumor genotype column (optional)" help="VCFs from variant callers like VarScan use hardcoded sample IDs TUMOR/NORMAL to name genotype columns. Use this parameter to have vcf2maf correctly locate these columns to parse genotypes, while still printing proper sample IDs in the output MAF."/>
+		<param argument="--vcf-normal-id" type="text" optional="true" label="Enter name of normal genotype column (optional)" help="VCFs from variant callers like VarScan use hardcoded sample IDs TUMOR/NORMAL to name genotype columns. Use this parameter to have vcf2maf correctly locate these columns to parse genotypes, while still printing proper sample IDs in the output MAF."/>
+		
+		<section name="adv_opt" title="Advanced options">
+			<param argument="--any-allele" type="boolean" optional="true" checked="false" label="Allow also mismatched variant alleles when reporting co-located variants"/>
+			<param argument="--min-hom-vaf" type="float" optional="true" min="0" max="1" label="Enter minimum allele fraction to call a variant homozygous if GT is undefined in VCF" help="Default value is 0.7"/>
+			<param argument="--maf-center" type="text" optional="true" label="Enter variant calling center to report in MAF"/>
+			<param argument="--retain-info" type="text" optional="true" label="Enter comma-delimited names of INFO fields to retain as extra columns in MAF"/>
+			<param argument="--retain-fmt" type="text" optional="true" label="Enter comma-delimited names of FORMAT fields to retain as extra columns in MAF"/>
+			<param argument="--retain-ann" type="text" optional="true" label="Enter comma-delimited names of VEP annotations (within the VEP CSQ/ANN) to retain as extra columns in MAF"/>
+		</section>
+	</inputs>
+	<outputs>
+		<data name="output1" format="tabular" from_work_dir="MainOutput.maf" />
+	</outputs>
+	<tests>
+		<test expect_num_outputs="1">
+			<param name="input1" dbkey="hg19" value="input_test1.vcf" ftype="vcf" />
+			<param name="ref_source" value="history" />
+			<param name="ref" dbkey="hg19" value="test1.fa" ftype="fasta" />
+			<param name="annotation_cache.source" value="no_vep" />
+			<output name="output1" file="output_test1.tabular" ftype="tabular" />
+		</test>
+		<test expect_num_outputs="1">
+			<param name="input1" dbkey="hg19" value="input_test1.vcf" ftype="vcf" />
+			<param name="ref_source" value="cached" />
+			<param name="ref" value="hg19test" />
+			<param name="annotation_cache.source" value="no_vep" />
+			<output name="output1" file="output_test1.tabular" ftype="tabular" />
+		</test>
+		<test expect_num_outputs="1">
+			<param name="input1" dbkey="dm6" value="input_test2.vcf" ftype="vcf" />
+			<param name="ref_source" value="history" />
+			<param name="ref" dbkey="dm6" value="test2.fa" ftype="fasta" />
+			<param name="source" value="restricted" />
+			<param name="cache_file" value="drosophila_melanogaster_vep_106_BDGP6.32" />
+			<output name="output1" file="output_test2.tabular" ftype="tabular" />
+		</test>
+	</tests>
+	<help><![CDATA[
+		The tool vcf2maf can parse a wide range of VCF-like formats and convert these into the `Mutation Annotation Format (MAF) <https://docs.gdc.cancer.gov/Data/File_Formats/MAF_Format/>`__. A central part of the conversion process is the selection of a single effect per variant. While this is often a subjective decision, vcf2maf offers a standardized way to achieve this by optionally utilizing Ensembl's `Variant Effect Predictor (VEP) <https://www.ensembl.org/info/docs/tools/vep/index.html>`__.	]]></help>
+	<citations>
+		<citation type="doi">10.5281/zenodo.593251</citation>
+	</citations>
+</tool>
\ No newline at end of file