Mercurial > repos > iuc > arriba_draw_fusions
changeset 0:2d4e3aff9dc7 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/arriba commit b12158e6cc9b1b2bd6e7522dfc183e9055575823
author | iuc |
---|---|
date | Wed, 27 Jul 2022 11:25:43 +0000 |
parents | |
children | cf18a5993aa2 |
files | arriba_draw_fusions.xml macros.xml static/images/draw-fusions-example.png test-data/Aligned.out.bam test-data/Aligned.out.bam.bai test-data/Aligned.out.sam test-data/cytobands.tsv test-data/fusions.tsv test-data/genome.fasta.gz test-data/genome.gtf.gz test-data/protein_domains.gff3 tool-data/all_fasta.loc.sample tool_data_table_conf.xml.sample |
diffstat | 13 files changed, 663 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/arriba_draw_fusions.xml Wed Jul 27 11:25:43 2022 +0000 @@ -0,0 +1,148 @@ +<tool id="arriba_draw_fusions" name="Arriba Draw Fusions" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01" license="MIT"> + <description></description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements" /> + <expand macro="version_command" /> + <command detect_errors="exit_code"><![CDATA[ + #if $alignments.extension == 'sam' + ln -sf '$genome.assembly' genome.fa && + samtools faidx genome.fa && + samtools view -b -@ \${GALAXY_SLOTS:-1} -t genome.fa.fai '$alignments' | + samtools sort -O bam -@ \${GALAXY_SLOTS:-1} -T "\${TMPDIR:-.}" -o Aligned.sortedByCoord.out.bam && + samtools index Aligned.sortedByCoord.out.bam && + #else + ln -sf '${alignments}' 'Aligned.sortedByCoord.out.bam' && + ln -sf '$alignments.metadata.bam_index' 'Aligned.sortedByCoord.out.bam.bai' && + #end if + @DRAW_FUSIONS@ + ]]></command> + <inputs> + <param argument="--fusions" type="data" format="tabular" label="Arriba fusions.tsv"/> + <param argument="--alignments" type="data" format="sam,bam" label="STAR Aligned.out.bam"/> + <expand macro="genome_source" assembly_optional="true"/> + <expand macro="gtf_source"/> + <param name="protein_domains" argument="-p" type="data" format="gff3" optional="true" label="File containing protein domains"/> + <section name="visualization" expanded="true" title="Visualization Options"> + <expand macro="visualization_options" /> + </section> + </inputs> + <outputs> + <data name="fusions_pdf" format="pdf" label="${tool.name} on ${on_string}: fusions.pdf" from_work_dir="fusions.pdf"/> + </outputs> + <tests> + <!-- Test 1 - From SAM --> + <test> + <param name="fusions" ftype="tabular" value="fusions.tsv"/> + <param name="alignments" ftype="sam" value="Aligned.out.sam"/> + <conditional name="genome"> + <param name="genome_source" value="history"/> + <param name="assembly" ftype="fasta" value="genome.fasta.gz"/> + </conditional> + <conditional name="genome_gtf"> + <param name="gtf_source" value="history"/> + <param name="annotation" ftype="gtf" value="genome.gtf.gz"/> + </conditional> + <param name="protein_domains" ftype="gff3" value="protein_domains.gff3"/> + <section name="visualization"> + <param name="cytobands" ftype="tabular" value="cytobands.tsv"/> + </section> + <output name="fusions_pdf"> + <assert_contents> + <has_size value="64000" delta="5000" /> + </assert_contents> + </output> + </test> + <!-- Test 2 - From BAM --> + <test> + <param name="fusions" ftype="tabular" value="fusions.tsv"/> + <param name="alignments" ftype="bam" value="Aligned.out.bam"/> + <conditional name="genome"> + <param name="genome_source" value="history"/> + <param name="assembly" ftype="fasta" value="genome.fasta.gz"/> + </conditional> + <conditional name="genome_gtf"> + <param name="gtf_source" value="history"/> + <param name="annotation" ftype="gtf" value="genome.gtf.gz"/> + </conditional> + <param name="protein_domains" ftype="gff3" value="protein_domains.gff3"/> + <section name="visualization"> + <param name="cytobands" ftype="tabular" value="cytobands.tsv"/> + </section> + <section name="options"> + <param name="sampleName" value="My Test"/> + </section> + <output name="fusions_pdf"> + <assert_contents> + <has_size value="64000" delta="5000" /> + </assert_contents> + </output> + </test> + + </tests> + <help><![CDATA[ +**Arriba Draw Fusions** + +Arriba_Draw_Fusions_ (draw_fusions.R) renders publication-quality visualizations of the transcripts involved in predicted fusions. It generates a PDF file with one page for each predicted fusion. Each page depicts the fusion partners, their orientation, the retained exons in the fusion transcript, statistics about the number of supporting reads, and - if the column fusion_transcript has a value - an excerpt of the sequence around the breakpoint. + + +**INPUTS** + +See: https://arriba.readthedocs.io/en/latest/command-line-options/#draw_fusionsr + + - Fusions + + File containing fusion predictions from Arriba_ (fusions.tsv) or STAR-Fusion (star-fusion.fusion_predictions.tsv or star-fusion.fusion_predictions.abridged.coding_effect.tsv). + + - Annotation + + Gene annotation in GTF format that was used by the STAR aligner. + + - Alignments + + BAM file containing normal alignments from STAR. + + - Annotation + + The gene annotation (parameter -g) is used for multiple purposes: + + - Assembly (Optional) + + Only required when alignments are not sorted bam format. The genonme assembly will be used by samtools to produce a sorted bam file. + + - Protein domains (Optional) + + GFF3 file containing the genomic coordinates of protein domains. Distributions of Arriba offer protein domain annotations for all supported assemblies in the database directory. When this file is given, a plot is generated, which shows the protein domains retained in the fusion transcript. + + - Cytobands (Optional) + + Coordinates of the Giemsa staining bands. This information is used to draw ideograms. If the argument is omitted, then no ideograms are rendered. The file must have the following columns: contig, start, end, name, giemsa. Recognized values for the Giemsa staining intensity are: gneg, gpos followed by a percentage, acen, stalk. Cytobands forahuman and mouse reference can be retrieved from the Arriba distribution with the **Arriba Get Filters** tool. + + +**OPTIONS** + + See: https://arriba.readthedocs.io/en/latest/command-line-options/#draw_fusionsr + + +**OUTPUTS** + +See: https://arriba.readthedocs.io/en/latest/visualization/ + + - fusions.pdf + + A PDF file with one page for each predicted fusion. Each page depicts the fusion partners, their orientation, the retained exons in the fusion transcript, statistics about the number of supporting reads, and if the column fusion_transcript has a value an excerpt of the sequence around the breakpoint. + +.. image:: draw-fusions-example.png + :width: 800 + :height: 467 + + + + +.. _Arriba_Draw_Fusions: https://arriba.readthedocs.io/en/latest/visualization/ +.. _Arriba: https://arriba.readthedocs.io/en/latest/ + + ]]></help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Wed Jul 27 11:25:43 2022 +0000 @@ -0,0 +1,311 @@ +<macros> + <token name="@TOOL_VERSION@">2.3.0</token> + <token name="@VERSION_SUFFIX@">0</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">arriba</requirement> + <yield/> + </requirements> + </xml> + <xml name="citations"> + <citations> + <citation type="doi">10.1101/gr.257246.119</citation> + <yield /> + </citations> + </xml> + <xml name="version_command"> + <version_command>arriba -h | grep Version | sed 's/^.* //'</version_command> + </xml> + <xml name="genome_source" token_assembly_optional="false" > + <conditional name="genome"> + <param name="genome_source" type="select" label="Genome assembly fasta (that was used for STAR alignment)"> + <option value="history">From your history</option> + <option value="cached">Use built-in Genome reference</option> + </param> + <when value="history"> + <param name="assembly" argument="-a" type="data" format="fasta" optional="@ASSEMBLY_OPTIONAL@" label="Genome assembly fasta"/> + </when> + <when value="cached"> + <param name="ref_file" type="select" label="Using reference genome" help="Select genome from the list"> + <options from_data_table="all_fasta"> + <validator type="no_options" message="No reference genomes are available" /> + </options> + </param> + </when> + </conditional> + </xml> + <xml name="gtf_source" token_assembly_optional="false" > + <conditional name="genome_gtf"> + <param name="gtf_source" type="select" label="Genome GTF annotation source"> + <option value="history">From your history</option> + <!-- <option value="cached">Use built-in Gtf annotation</option> --> + </param> + <when value="history"> + <param name="annotation" argument="-g" type="data" format="gtf" label="Gene annotation in GTF format"/> + </when> + </conditional> + </xml> + + <token name="@GENOME_SOURCE@"><![CDATA[ +#if str($genome.genome_source) == "history" + #if $genome.assembly + #set $genome_assembly = 'genome.fa' + ln -sf '$genome.assembly' $genome_assembly && + #end if +#elif str($genome.genome_source) == "cached" + #set $genome_assembly = $genome.ref_file.fields.fasta +#end if + ]]></token> + <token name="@GTF_SOURCE@"><![CDATA[ +#if str($genome_gtf.gtf_source) == "history" + #if $genome_gtf.annotation.is_of_type('gtf.gz') + #set $genome_annotation = 'genome.gtf.gz' + #else + #set $genome_annotation = 'genome.gtf' + #end if + ln -sf '$genome_gtf.annotation' $genome_annotation && +#end if + ]]></token> + + <xml name="visualization_options"> + <param name="cytobands" argument="--cytobands" type="data" format="tabular" optional="true" label="Cytobands"/> + <section name="options" expanded="false" title="Draw Fusion Options"> + <param argument="--sampleName" type="text" value="" optional="true" label="Sample Name printed as the title on every page"/> + <param argument="--transcriptSelection" type="select" optional="true" label="Transcript selection"> + <help>By default the transcript isoform with the highest coverage is drawn. + Alternatively, the transcript isoform that is provided in the columns + transcript_id1 and transcript_id2 in the given fusions file can be drawn. + Selecting the isoform with the highest coverage usually produces nicer plots, + in the sense that the coverage track is smooth and shows a visible increase in coverage after the fusion breakpoint. + However, the isoform with the highest coverage may not be the one that is involved in the fusion. + Often, genomic rearrangements lead to non-canonical isoforms being transcribed. + For this reason, it can make sense to rely on the transcript selection provided by the columns transcript_id1/2, + which reflect the actual isoforms involved in a fusion. +\ As a third option, the transcripts that are annotated as canonical can be drawn. + Transcript isoforms tagged with appris_principal, appris_candidate, or CCDS are considered canonical. + </help> + <option value="coverage">coverage</option> + <option value="provided">provided</option> + <option value="canonical">canonical</option> + </param> + <param argument="--minConfidenceForCircosPlot" type="select" optional="true" label="Transcript selection"> + <help>The fusion of interest is drawn as a solid line in the circos plot. + To give an impression of the overall degree of rearrangement, + all other fusions are drawn as semi-transparent lines in the background. + This option determines which other fusions should be included in the circos plot. + Values specify the minimum confidence a fusion must have to be included. + It usually makes no sense to include low-confidence fusions in circos plots, + because they are abundant and unreliable, and would clutter up the circos plot. + Default: medium + </help> + <option value="none">none - only the fusion of interest is drawn</option> + <option value="low">low</option> + <option value="medium">medium</option> + <option value="high">high</option> + </param> + <param argument="--squishIntrons" type="select" optional="true" label="Squish introns"> + <help>Exons usually make up only a small fraction of a gene. + They may be hard to see in the plot. i + Since introns are in most situations of no interest in the context of gene fusions, + this switch can be used to shrink the size of introns to a fixed, negligible size. + It makes sense to disable this feature, if breakpoints in introns are of importance. + Default: TRUE + </help> + <option value="TRUE">True</option> + <option value="FALSE">False</option> + </param> + <param argument="--showIntergenicVicinity" type="text" value="" optional="true" label="Intergenic Vicinity"> + <help>This option only applies to intergenic breakpoints. + If it is set to a value greater than 0, then the script draws the genes + which are no more than the given distance away from an intergenic breakpoint. + The keywords closestGene and closestProteinCodingGene instruct the script + to dynamically determine the distance to the next (protein-coding) gene for each breakpoint. + Alternatively, instead of specifying a single distance + that is applied upstream and downstream of both breakpoints alike, + more fine-grained control over the region to be shown is possible by specifying four comma-separated values. + The first two values determine the region to the left and to the right of breakpoint 1; + the third and fourth values determine the region to the left and to the right of breakpoint 2. + Note that this option is incompatible with squishIntrons. + Default: 0 + </help> + <option value="closestGene">closestGene</option> + <option value="closestProteinCodingGene">closestProteinCodingGene</option> + <validator type="regex" message="">^(closestGene|closestProteinCodingGene|\d+|\d+,\d+,\d+,\d+)$</validator> + </param> + <param argument="--mergeDomainsOverlappingBy" type="float" value="" min="0." max="1.0" optional="true" label="Merge Domains Overlapping By"> + <help>Occasionally, domains are annotated redundantly. + For example, tyrosine kinase domains are frequently annotated as + Protein tyrosine kinase and Protein kinase domain. + In order to simplify the visualization, such domains can be merged into one, + given that they overlap by the given fraction. + The description of the larger domain is used. + Default: 0.9 + </help> + </param> + <param argument="--printExonLabels" type="select" optional="true" label="Print Exon Labels"> + <help>By default the number of an exon is printed inside each exon, + which is taken from the attribute exon_number of the GTF annotation. + When a gene has many exons, the boxes may be too narrow to contain the labels, + resulting in unreadable exon labels. In these situations, i + it may be better to turn off exon labels. + Default: TRUE + </help> + <option value="TRUE">True</option> + <option value="FALSE">False</option> + </param> + <param argument="--render3dEffect" type="select" optional="true" label="Render 3D effect"> + <help>Whether light and shadow should be rendered to give objects a 3D effect. + Default: TRUE + </help> + <option value="TRUE">True</option> + <option value="FALSE">False</option> + </param> + <param argument="--optimizeDomainColors" type="select" optional="true" label="Optimize Domain Colors"> + <help>By default, the script colorizes domains according to the colors + specified in the file given in --annotation. + This way, coloring of domains is consistent across all proteins. + But since there are more distinct domains than colors, + this can lead to different domains having the same color. + If this option is set to TRUE, the colors are recomputed for each fusion separately. + This ensures that the colors have the maximum distance for each individual fusion, + but they are no longer consistent across different fusions. + Default: FALSE + </help> + <option value="TRUE">True</option> + <option value="FALSE">False</option> + </param> + <param argument="--color1" type="color" value="" optional="true" label="Color of the 5' end of the fusion."/> + <param argument="--color2" type="color" value="" optional="true" label="Color of the 3' end of the fusion."/> + <param argument="--pdfWidth" type="float" value="" min="1." optional="true" label="Width of PDF output file in inches" + help="Default: 11.692"/> + <param argument="--pdfHeight" type="float" value="" min="1." optional="true" label="Height of PDF output file in inches" + help="Default: 8.267"/> + <param argument="--fontSize" type="float" value="" min="0." optional="true" label="Scale the size of text" + help="Default: 1.0"/> + <param argument="--fontFamily" type="text" value="" optional="true" label="Font to use for all labels in the plots."> + <help>Default: Helvetica + </help> + <option value="serif">serif</option> + <option value="sans">sans</option> + <option value="mono">mono</option> + <option value="AvantGarde">AvantGarde</option> + <option value="Bookman">Bookman</option> + <option value="Courier">Courier</option> + <option value="Helvetica">Helvetica</option> + <option value="Helvetica-Narrow">Helvetica-Narrow</option> + <option value="NewCenturySchoolbook">NewCenturySchoolbook</option> + <option value="Palatino">Palatino</option> + <option value="Times">Times</option> + <option value="URWGothic">URWGothic</option> + <option value="URWBookman">URWBookman</option> + <option value="NimbusMon">NimbusMon</option> + <option value="NimbusSan">NimbusSan</option> + <option value="URWHelvetica">URWHelvetica</option> + <option value="NimbusSanCond">NimbusSanCond</option> + <option value="CenturySch">CenturySch</option> + <option value="URWPalladio">URWPalladio</option> + <option value="NimbusRom">NimbusRom</option> + <option value="URWTimes">URWTimes</option> + <option value="ArialMT">ArialMT</option> + <option value="Japan1">Japan1</option> + <option value="Japan1HeiMin">Japan1HeiMin</option> + <option value="Japan1GothicBBB">Japan1GothicBBB</option> + <option value="Japan1Ryumin">Japan1Ryumin</option> + <option value="Korea1">Korea1</option> + <option value="Korea1deb">Korea1deb</option> + <option value="CNS1">CNS1</option> + <option value="GB1">GB1</option> + </param> + <param argument="--fixedScale" type="integer" value="" min="0" optional="true" label="Apply a fixed scale to all fusions"> + <help>By default, transcripts are scaled automatically to fill the entire page. + This parameter enforces a fixed scale to be applied to all fusions, + which is useful when a collection of fusions should be visualized and the sizes of all transcripts should be comparable. + A common use case is the visualization of a gene that is found to be fused to multiple partners. + By forcing all fusion plots to use the same scale, the fusions can be summarized as a collage + in a single plot one above the other with matching scales. + Note: The scale must be bigger than the sum of the biggest pair of transcripts to be drawn, + or else dynamic scaling is applied, because display errors would occur otherwise. + The default value is 0, which means that no fixed scale should be used + and that the scale should be adapted dynamically for each fusion. Default: 0 + </help> + </param> + <param argument="--coverageRange" type="text" value="" optional="true" label="Maximum coverage for plot"> + <help>When the parameter --alignments is used, coverage plots are drawn above the transcripts of the fused genes. + The plots can be cropped at a fixed level by passing a non-zero value to this parameter. + When only a single value is given, both coverage plots (for gene1 and gene2) are cropped at the same level. + When two comma-separated values are given, the cutoffs can be specified independently for the two plots. + A value of 0 indicates that no cropping should be applied (i.e., the cutoff is set to the peak coverage) + and that the coverage plots of both genes should be on the same scale. This is the default behavior. + A value of 0,0 also indicates that no cropping should be applied, + but the coverage plots of the two genes have different scales: + each one is scaled individually to the peak coverage of the respective gene. + Default: 0 + </help> + <validator type="regex" message="">^\d+(,\d+)?$</validator> + </param> + </section> + </xml> + <token name="@DRAW_FUSIONS@"> +draw_fusions.R + --fusions='$fusions' + --alignments='Aligned.sortedByCoord.out.bam' + --annotation='$genome_gtf.annotation' + --output=fusions.pdf + #if $visualization.cytobands + --cytobands='$visualization.cytobands' + #end if + #if $protein_domains + --proteinDomains='$protein_domains' + #end if + ## Visualization Options + #if $visualization.options.transcriptSelection + --transcriptSelection=$visualization.options.transcriptSelection + #end if + #if $visualization.options.minConfidenceForCircosPlot + --minConfidenceForCircosPlot=$visualization.options.minConfidenceForCircosPlot + #end if + #if $visualization.options.squishIntrons + --squishIntrons=$visualization.options.squishIntrons + #if $visualization.options.squishIntrons == 'FALSE' and $visualization.options.showIntergenicVicinity + --showIntergenicVicinity='$visualization.options.showIntergenicVicinity' + #end if + #end if + #if $visualization.options.mergeDomainsOverlappingBy + --mergeDomainsOverlappingBy=$visualization.options.mergeDomainsOverlappingBy + #end if + #if $visualization.options.sampleName + --sampleName='$visualization.options.sampleName' + #end if + #if $visualization.options.printExonLabels + --printExonLabels=$visualization.options.printExonLabels + #end if + #if $visualization.options.coverageRange + --coverageRange='$visualization.options.coverageRange' + #end if + #if $visualization.options.render3dEffect + --render3dEffect=$visualization.options.render3dEffect + #end if + #if $visualization.options.optimizeDomainColors + --optimizeDomainColors=$visualization.options.optimizeDomainColors + #end if + #if $visualization.options.color1 + --color1='$visualization.options.color1' + #end if + #if $visualization.options.color2 + --color2='$visualization.options.color2' + #end if + #if str($visualization.options.pdfWidth) + --pdfWidth=$visualization.options.pdfWidth + #end if + #if str($visualization.options.pdfHeight) + --pdfHeight=$visualization.options.pdfHeight + #end if + # fontFamily + #if $visualization.options.fontFamily + --fontFamily=$visualization.options.fontFamily + #end if + #if str($visualization.options.fontSize) + --fontSize=$visualization.options.fontSize + #end if +</token> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Aligned.out.sam Wed Jul 27 11:25:43 2022 +0000 @@ -0,0 +1,89 @@ +@HD VN:1.4 SO:coordinate +@SQ SN:22 LN:269079 +@SQ SN:9 LN:515509 +@PG ID:STAR PN:STAR VN:2.7.8a CL:STAR --runThreadN 12 --genomeDir tempstargenomedir --genomeLoad NoSharedMemory --readFilesIn /panfs/roc/galaxy/PRODUCTION/database/files/001/368/dataset_1368710.dat /panfs/roc/galaxy/PRODUCTION/database/files/001/368/dataset_1368711.dat --readFilesCommand zcat --limitBAMsortRAM 122880000000 --outSAMtype BAM SortedByCoordinate --outSAMstrandField intronMotif --outSAMattributes NH HI AS nM ch --outSAMunmapped Within --outSAMprimaryFlag OneBestScore --outSAMmapqUnique 60 --outBAMsortingThreadN 12 --outBAMsortingBinsN 50 --outSAMattrIHstart 1 --winAnchorMultimapNmax 50 --chimSegmentMin 12 --chimOutType WithinBAM Junctions --chimOutJunctionFormat 1 --quantMode TranscriptomeSAM GeneCounts --quantTranscriptomeBan Singleend --twopassMode Basic +@CO user command line: STAR --runThreadN 12 --genomeLoad NoSharedMemory --genomeDir tempstargenomedir --readFilesIn /panfs/roc/galaxy/PRODUCTION/database/files/001/368/dataset_1368710.dat /panfs/roc/galaxy/PRODUCTION/database/files/001/368/dataset_1368711.dat --readFilesCommand zcat --outSAMtype BAM SortedByCoordinate --twopassMode Basic --quantMode TranscriptomeSAM GeneCounts --quantTranscriptomeBan Singleend --outSAMstrandField intronMotif --outSAMattrIHstart 1 --outSAMattributes NH HI AS nM ch --outSAMprimaryFlag OneBestScore --outSAMmapqUnique 60 --outSAMunmapped Within --chimSegmentMin 12 --outBAMsortingThreadN 12 --outBAMsortingBinsN 50 --winAnchorMultimapNmax 50 --limitBAMsortRAM 122880000000 --chimOutType WithinBAM Junctions --chimOutJunctionFormat 1 +BCR-ABL1-46 163 22 225687 60 71M2994N7M1344N72M = 225737 5255 AACTGGAGGCAGTGCCCAACATCCCCCTGGTGCCCGATGAGGAGCTGGACGCTTTGAACATCAAGATCTCCAAGAAGTGTTTCAGAAGCTTCTCCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTG CCCGGGGGG=GGGJJJGGJJJGGJJJJCJJGGJJGCJGCGGGC8J8JGGJJJJJGJJC(JGCCG=GGJJGCCCGC8GCCGGGGGG=GGCGGG1GG=GC1G=CJCJJCCCGGCGG1CGG1GGGGGGGG=GGGGGCCGCGGG8GGGCGG=GG NH:i:1 HI:i:1 AS:i:285 nM:i:1 XS:A:+ NM:i:1 +BCR-ABL1-72 163 22 225696 60 62M2994N7M1344N81M = 228752 5264 CAGTGCCCAACATCCCCCTGGTGCCCGATGAGGAGCTGCACGCTTTGAAGATCAAGATCTCCAAGAAGTGTTTCAGAAGCTTCTCCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTGTCCACAGCA CCCCGGGGGGGGGGJGJCCCJ1GJJJJGCGGGCJJJ=C1JJGGJGG8JGC=CCGJ1JGG8GGGGGJCGJCCGGGCG=CGGGGGGCGG=GGCGGG=8CCGCGGJJJ=JGGGCGGGGGCCGCCGGGGGGGGC=CCGCG8GGGGGC1GGGGCC NH:i:1 HI:i:1 AS:i:290 nM:i:1 XS:A:+ NM:i:1 +BCR-ABL1-46 83 22 225737 60 21M2994N7M1344N105M717N17M = 225687 -5255 GCTTTGAAGATCAAGATCTCCAAGAAGTGTTTCAGAAGCTTCTCCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTGTCCACAGCATTCCGCTGACCATCAATAAGGAAGATGATGAGTCTCCGGGG =GGCGGGGGGG=GGGCCCGCCCGGGGGGGGGGCCGGGGCGG8CGCGGG1JGGCCGG(C=GCCCGGGGGGCGGGGGCGCGGCGGJCGGGJJGJGGGJJCGGGJJJGJJJJJJJGJJJJGGGJJJJJGGJJJJJGCJJJCGGGGGGGGGCCC NH:i:1 HI:i:1 AS:i:285 nM:i:1 XS:A:+ NM:i:0 +BCR-ABL1-72 83 22 228752 60 3S7M1344N105M717N35M = 225696 -5264 TCCAAGAAGTGTTTCAGAAGCTTCTCCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTGTCCACAGCATTCCGCTGACCATCAATAAGGAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAAT =GGGGGG==GGGGCCCC=GGGGG=GGGGCGGGCGGGGGGG=CGGCCGCCJGGCGGGGG=GGG8GGGCGGC=G=CCJGGGGGGCGJJGJJCGGGGGGJJJGCJCCGJG=JJJGJGJJCJJJJGJJJJJJJ=GCJGJGCGGG=GGGGGGCC= NH:i:1 HI:i:1 AS:i:290 nM:i:1 XS:A:+ NM:i:0 +BCR-ABL1-4 99 22 230111 60 97M717N53M = 230176 889 AGCTTCTCCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTGTCCACAGCATTCCGCTGACCATCAATAAGGAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCC C==GGGGGGGGGGJJJJ1JJJGGJJGGJGGJJGJJCJGJGJJCGGCJGCJJJJCGJGGGGJGGGGGGCCGG8JGGCGCGG=GGGGGGGGGGGGGG=GCCGJGGGCCGGGGGG1GGGGGGCGCGGCGGGGGG=GGGGGGGGGCCGCGGGCC NH:i:1 HI:i:1 AS:i:259 nM:i:0 ch:A:1 XS:A:+ NM:i:0 +BCR-ABL1-18 99 22 230118 60 90M717N60M = 230165 882 CCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTGTCCACAGCATTCCGCTGACCATCAATAAGTAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCAATCAGCCACTGGAT CCCGGGCGGGCGGJGJJJJJJJJJ=GCJJCJJJJJGJJJGJJGJJJCGGJJGGJCGJC=GG8GCGJGCGCG==GGGCGGGGG1CCCGCGGGGGCGGCCGGC=GCGGG=GGGGCGGGGCGCGGGGGGG=GGGCGGGG(GGGCGGGCGCCGG NH:i:1 HI:i:1 AS:i:266 nM:i:2 XS:A:+ NM:i:2 +BCR-ABL1-12 163 22 230132 60 76M717N74M = 230923 868 GAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTGTCCACAGCATTCCGCTGACCATCAATAAGGAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCA CCCGGGGGGGGGGJGGJJJJJCCJJCJJGJJGJJGGCJJJCJCGGGJJ=CGJGJJJJGGCGGGJJJ==GG(GGC=GGGGGGCGCGG(GGGGC1C8GCC=GG=C=CCJGGGGGG8CGGCCCGCGGGGGGGGCGGGG=GGGGGCGGGG=GGC NH:i:1 HI:i:1 AS:i:227 nM:i:0 XS:A:+ NM:i:0 +BCR-ABL1-18 147 22 230165 60 43M717N75M32S = 230118 -882 CTCCAGACTGTCCACAGCATTCCGCTGACCATCAATAAGGAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTT =GCGGGGG(GGGGGGGGGGGCC=GGC=GGCGGCCGGGGCGGG8JJJJ=GCGGGGG1GGGCCGGGCCGGGCGGCGGGJGC8GCCGCGGCG=GJCGJJGC8GC1JGG=GJJCJC1JGJGGJJJGJGCJJJ=1JJJJ=JGGCG=GGCGGGCCC NH:i:1 HI:i:1 AS:i:266 nM:i:2 XS:A:+ NM:i:0 +BCR-ABL1-4 147 22 230176 60 32M717N75M43S = 230111 -889 CCACAGCATTCCGCTGACCATCAATAAGGAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGG CGGCGGGGGCGGCGCCG=GCGCGGGG8GCG881CGG=C=GCCGJJCJCCCGGG8GGCGG=GGGCCCGGCGGCCCCGGCGGGG=GGCGCJJGCGGJG1JGJJJ8JGJJCJJJ(JJGJGJJJGGJJGJCC1JJCGGJGG=GGGGGGGGGCCC NH:i:1 HI:i:1 AS:i:259 nM:i:0 ch:A:1 XS:A:+ NM:i:0 SA:Z:9,275100,-,107H43M,60,0; +BCR-ABL1-12 83 22 230923 60 19S77M54S = 230132 -868 CGCTGACCATCAATAAGGAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAGTGAAG 8CCCGGCCGGCCGGGGGCGG1CCG=GGCGGGGGC1GGGGCCGCGGGGCCJGG=CGGCGGGGCGCGCGGGCGGCGGJG==GGCGCJGCGGGCJGGGGGGGCJGJGGJJJGJGGGGCJJJGJJJGGJGJJJGJJCCJJGGG1GGGGGGG=CC NH:i:1 HI:i:1 AS:i:227 nM:i:0 XS:A:+ NM:i:0 +BCR-ABL1-60 2145 22 230961 60 39M111H 9 275110 0 TCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAA =CCGGCGGGGG=GJJGJGGGCJJCJJGJCGJG(J(JJJG NH:i:1 HI:i:1 AS:i:38 nM:i:0 ch:A:1 NM:i:0 SA:Z:9,275100,+,39S111M,60,0; +BCR-ABL1-76 2145 22 230973 60 27M123H 9 275139 0 CAGCCACTGGATTTAAGCAGAGTTCAA CCCGGGGCGGGCGJJJJJGJJJGJJJJ NH:i:1 HI:i:1 AS:i:26 nM:i:0 ch:A:1 NM:i:0 SA:Z:9,275100,+,27S123M,60,0; +BCR-ABL1-28 2209 22 230978 60 22M128H 9 275128 0 ACTGGATTTAAGCAGAGTTCAA CCCGCGGGGGGGGGJ=GJCGJJ NH:i:1 HI:i:1 AS:i:21 nM:i:0 ch:A:1 NM:i:0 SA:Z:9,275100,+,22S128M,60,1; +BCR-ABL1-64 99 9 275097 60 6S144M = 275140 756 AGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGGAAAACCTTCTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTGCACTGTATGATTTTGTGG CCCGGGGGCGGGGGJJJGJJJGGJJJJJCJJJJGGJJJGJJGJGJG=GGJG=JJJJCGCCC==JGGCGGGCJG1CCCCGG8CGGGGGGGGCCGC=CGCGGJGGGGCGCGGGGGGGGCCGCGGGG=GCGGGGGGG=GGGGCGGGGGGCCGG NH:i:1 HI:i:1 AS:i:290 nM:i:2 XS:A:+ NM:i:1 +BCR-ABL1-54 99 9 275097 60 61S89M = 275097 140 CCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAAAAGCCCTTGAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGGAAAACCTTCTC CCCGGGGGGGGGGGJJJJJGJ=JJJJJJGJJJGGJJJJJJJCJJG8JJJGJJGJ=GG=JJJGGCGGCGGJGC(GGGGGCGC8CGGCGCCGGC=GGGCGGGJG1GGGGGG1CG=GGGGC=1G1CGGGGGCCGGGGCGG=CC=C=CGGGGG8 NH:i:1 HI:i:1 AS:i:219 nM:i:4 NM:i:2 +BCR-ABL1-54 147 9 275097 60 10S140M = 275097 -140 AAGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGGAAAACGTTCTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTGCACTGTATGATTTT =GGGGGGGCCCCGCCGGG=G(GGGG=CGCGGCGCCGG=GGGGGCJJJ=GC8C1GGGGGCG8GCCGC=GGG1GCCGGJC8GCGGCGCGJGJJJG1CGJGG=CJJJGGGGJG=CJGJJJJCJCJJGGJJJJJGGJGGJJCGGGGGGGGG=CC NH:i:1 HI:i:1 AS:i:219 nM:i:4 NM:i:2 +BCR-ABL1-48 163 9 275097 60 3S147M = 275137 753 GTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGGAAAACCTTCTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTGCACTGTATGATTTTGTGGCCA CCCGGGCGGGGGGGJ1JJJJJJJCJJJCGJJCGGJCJJGJGGJJJGGGCGJJ=GJJJG=JCG=GJGGGC8=GCG=G=GCCGGG1CG1GC=GGG8GGGGG1GCJJCJJCCGGCGCCG=CGCGGGCGG=GCGGG1CGC1CGC=CGGGCGGGG NH:i:1 HI:i:1 AS:i:295 nM:i:1 XS:A:+ NM:i:1 +BCR-ABL1-2 99 9 275097 60 62S88M = 275097 134 TCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAAAAGCCCCTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGGAAAACCTTCT CCCGGGGGGGGGGGJ1JJJJJJ=JGGGGJJJCGCJJJCJJJJGGCJGGGJCJGGJJGJCGJGG1GCG=CGG(G=CGGG1GGCCGGGGGGGCGGGG=GCCGJGGCGCGGGGCCCG1GGGCCGGG8GGCGGCGG=CC(G=GC1GGCCGGGCG NH:i:1 HI:i:1 AS:i:214 nM:i:3 NM:i:2 +BCR-ABL1-2 147 9 275097 60 16S134M = 275097 -134 GGATTTAAGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGGAAAACCTTCTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTGCACTGTAT CGGGC=GGGGG=GGGGGGCCCCG=GG8=8CCGCGGGGGCGCGCJJCJ=CCCCC81GGGGC=GGGC8C8GGCGCGJCCGG8JCCGCC1GGCGGCJGGGJJJGGJJJJJCGGJGCJJJJJJG=JGJGJJJJGJJJGGJGCGCGGGGGGGCCC NH:i:1 HI:i:1 AS:i:214 nM:i:3 NM:i:1 +BCR-ABL1-76 99 9 275100 60 27S123M = 275139 752 CAGCCACTGGATTTAAGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGGAAAACCTTCTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTG CCCGGGGCGGGCGJJJJJGJJJGJJJJJJJJJGJ1JCJJGJGGJJJGJJGGJJJ8GGJJGGGJJ=GGCGGGGGG=GGCCGGG8GC=GGGG=GCGGCGGGGJGG=GGGG=GGGGGGGGCGGGGCCGGGCG=GG(G=GCGCCG1CCGGCGGG NH:i:1 HI:i:1 AS:i:273 nM:i:0 ch:A:1 XS:A:+ NM:i:0 SA:Z:22,230973,+,27M123H,60,0; +BCR-ABL1-68 99 9 275100 60 1S149M = 275125 175 AAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGGAAAACCTTCTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTGCACTGTATGATTTTGTGGCCAGTGGA CC1=GC=GGGGGGJJJJGJJJJJGGJJJJJJJGJJJJJ=(GJGGG8CCGJJ=GJGGGGJGJGJ=GGGCCGCGGCG1CGCGGGGGGGCGCCGCGGCGGGGGJGCC8GCGGGGCGGC=GGGGG=GGGCCC=GCGGGGGGCGCGGGCGGCGCG NH:i:1 HI:i:1 AS:i:291 nM:i:3 NM:i:0 +BCR-ABL1-60 99 9 275100 60 39S111M = 275110 160 TCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGGAAAACCTTCTCGCTGGACCCAGTGAAAATGACCCCA =CCGGCGGGGG=GJJGJGGGCJJCJJGJCGJG(J(JJJGGCGGGJJJGCJGGG1G=JGGJJGCJCCGGJ(JJCCGCC=GCGGGCGGGGG1GGGGCGCGG(JCGCGGGGGGGGGGGGCCGGCGCGCGGGGGGGCGGGGCGG1GGGGGGCGC NH:i:1 HI:i:1 AS:i:259 nM:i:0 ch:A:1 NM:i:0 SA:Z:22,230961,+,39M111H,60,0; +BCR-ABL1-28 163 9 275100 60 22S128M = 275128 741 ACTGGATTTAAGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCCCAGGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGGAAAACCTTCTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTGCACTG CCCGCGGGGGGGGGJ=GJCGJJJ1JCJJJJGJJJJJCCJJJJG8JJCC=CGGCGGJGGC(JGGG=GCCGCGCJ8CGGG=GGGCGGGGGCGGCCGGCCGGGCCJ=JC=CGCGCGC1G8GCCGGGGGC=GCGGGCGGGGGGGGGGGGGCGCC NH:i:1 HI:i:1 AS:i:274 nM:i:2 ch:A:1 XS:A:+ NM:i:1 SA:Z:22,230978,+,22M128H,60,0; +BCR-ABL1-4 2193 9 275100 60 107H43M 22 230111 0 AAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGG CJJJ(JJGJGJJJGGJJGJCC1JJCGGJGG=GGGGGGGGGCCC NH:i:1 HI:i:1 AS:i:42 nM:i:0 ch:A:1 NM:i:0 SA:Z:22,230176,-,32M717N75M43S,60,0; +BCR-ABL1-60 147 9 275110 60 150M = 275100 -160 GCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGGAAAACCTTCTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTGCACTGTATGATTTTGTGGCCAGTGGAGATAACACTCT GCGGCCCGCGG=GG8GGCGGGGCCGCC=GCGCGGCG1GGCGG1JCJJ8CCCG=GGGGGGCG=GCGGGG18CCGCG=GGGG1CGG8C=GGGGCGGCJGJGJGJGJJGGJGJJJGJGJJJGGJC(JJJGJJJGJCJJGCGGGGGCGGGGCC= NH:i:1 HI:i:1 AS:i:259 nM:i:0 ch:A:1 NM:i:0 +BCR-ABL1-68 147 9 275125 60 150M = 275100 -175 TGACTTTGAGCCTCAGGGTCTAAGTGAAGCCGCTCGTAGGAACTCCAAGGAAAACCTTCTCGGTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTGCACTGTATGATTTTGTGGCCAGTGGAGATAACACTCTAAGCATAACTAAAGG G=CGGCGGGCCGCGCGGCCGG8GCCGCC(GGC=8=GGG=GGGCJ1=JJCCGGGCGGGGGGGG=GCGGGGCGCG==GGCGGGGGJGCCJJGGCCG=GCCCGGJCGGJJJJJ=GJJJJGJCJ=GCJGJGJJJC1GGJJJGGG=GG1GGGCCC NH:i:1 HI:i:1 AS:i:291 nM:i:3 NM:i:3 +BCR-ABL1-50 99 9 275125 60 150M = 275169 757 TGACTTTGAGCCTCAGGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGGAAAACCTTCTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTGCACTGTATGATTTTGTGGCCAGTGGAGATAACACTCTAAGCATAACTAAAGG CC1GGCGCGGGG=JGCJJJJJGJJJGJJGGJJGJJ8JGCGJJJJJJ8CJJGJJCGJGGGGJJCCG=CGGGGGCCCG=CGGCCGCGGGGGCGG=GGGGGGGCCGGG==GGGGGCCGG=GGGGCCG=GGGGGCCC=GGCGGGGCCGGCCGGG NH:i:1 HI:i:1 AS:i:300 nM:i:0 XS:A:+ NM:i:0 +BCR-ABL1-28 83 9 275128 60 146M563N4M = 275100 -741 CTTTGAGCCTCAAGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGGAAAACCTTCTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTGCACTGTATGATTTTGTGGCCAGTGGAGATAACACTCTAAGCATAACTAAAGGTGA GGGGGCGGGG=C1=8GGCGCGGGCGCGGGGG=GC=GGGGCG1CCCGCGGCCGGGC=GG=GGGGGCCGGGGCGGGCGJJGGGGCGGJ1JGGGGCGGJGJGGJJJCGGCJJCGJ=GJGCGGJJJJGGJJG1JJJGG1JJ=GGCGCGGG1CCC NH:i:1 HI:i:1 AS:i:274 nM:i:2 ch:A:1 XS:A:+ NM:i:1 +BCR-ABL1-48 83 9 275137 60 137M563N13M = 275097 -753 TCAGGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGGAAAACCTTCTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTGCACTGTATGATTTTGTGGCCAGTGGAGATAACACTCTAAGCATAACTAAAGGTGAAAAGCTCCG GGGCGGCGCGGGG8GGGGGCGGCGGGCG1GCGCGG8GCGGCGC1G8CCGCGCGGCCGGGGGCGCCGCC1=CCCCGCCJCGGGGGGJJGJC=CCJ8JJC=JJCGCJJJGJJJJJJJJJJGJJGGGCJJJJJJJGJGJGCGGCGGGCGC=C1 NH:i:1 HI:i:1 AS:i:295 nM:i:1 XS:A:+ NM:i:0 +BCR-ABL1-76 147 9 275139 60 135M563N15M = 275100 -752 AGGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGGAAAACCTTCTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTGCACTGTATGATTTTGTGGCCAGTGGAGATAACACTCTAAGCATAACTAAAGGTGAAAAGCTCCGGG CGGCGGGGCGGGGGGCGCCGCGCGGGGC=GGCGCCGCGCGGGCJJCJC1GGGGG=GCGGGGG=GGGGGGGGGGGGGCGGCGGGGGGJGJCGJGGJGJCJGJJJJJG8JJCJGG1JJJJJJJG8(JJJJJGJJJGJJJGGGGGGGG1GCCC NH:i:1 HI:i:1 AS:i:273 nM:i:0 ch:A:1 XS:A:+ NM:i:0 +BCR-ABL1-64 147 9 275140 60 134M563N16M = 275097 -756 GGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGGAAAACCTTCTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTGCACTGTATGATTGTGTGGCCAGTGGAGATAACACTCTAAGCATAACTAAAGGTGAAAAGCTCCGGGT C=GGGGGCGGCGGGGGGC=GGGGC1CG=1=GGGCCC=GGGCGCCCCJJGCGCGGGGCGGCGCGCCGCCGCGGGGGCGC1GGGGG=GG1CGJGJJJ(CCGGJJGCGJGJGJJJGGGCGJGJJJJJJJJJGJGJGJJJCGCGGGGGGGGCCC NH:i:1 HI:i:1 AS:i:290 nM:i:2 XS:A:+ NM:i:1 +BCR-ABL1-14 99 9 275146 60 128M563N22M = 275170 737 GAGTGAAGCCGCTCGTTGGAACTCCAAGGAAAACCTTCTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTGCACTGTATGATTTTGTGGCCAGTGGAGATAACACTCTAAGCATAACTAAAGGTGAAAAGCTCCGGGTCTTAGG CC=GGGGGGGGGGJGJJCJGJJJGG1JJGJ=JGGGJJJJGGJJJGCJJCGJJGC=GCJGGJGGCGGGCCGGGCGGCGCGGGGGGGGGGGGCC8GGGGCGCJCGGGCCCGCG8GGGGCGGGCGGCGGGGGGCGGGGGCGGG=GGGGGGCCG NH:i:1 HI:i:1 AS:i:302 nM:i:0 XS:A:+ NM:i:0 +BCR-ABL1-78 99 9 275151 60 123M563N27M = 275200 762 AAGCCGCTCGTTGGAACTCCAAGGAAAACCTTCTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTGCACTGTATGATTTTGTGGCCAGTGGAGATAACACTCTAAGCATAACTAAAGGTGAAAAGCTCCGGGTCTTAGGCTATA CC=1G=GGGGCGGJJJJGJJGG8JJCJGJGJJ8JJJJGJCGJGJJ=JGGGCGJCJCGGG=JJJJGGG=JGC=GGGCGGGGGGCGGCG=GCCGGGGGGCGGJCGCCGCGGGGGGGCCGGGGCGCCGGG=GGGCGCGGGGCGGGCCGGGGGG NH:i:1 HI:i:1 AS:i:302 nM:i:0 XS:A:+ NM:i:0 +BCR-ABL1-62 163 9 275157 60 117M563N33M = 275215 771 CTCGTTGGAACTCCAAGGAAAACCTTCTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTGCACTGTATGATTTTGTGGCCAGTGGAGATAACACTCTAAGCATAACTAAAGGTGAAAAGCTCCGGGTCTTAGGCTATAATCACA CCCCGGGGGGGGCJGGGJJGJJJJJJJ=GJJCJJGJCJGJCGCGCGGGCCJGJCGJ81JC1GGGGGCG8GGCGGGGCG1C1GGGGCGGCGCCCGGG=GC=CGCJJJJGGGGGCGGCGC=8GCCGGGGGGGG=GCG=1GGGCGGGCGG1CG NH:i:1 HI:i:1 AS:i:300 nM:i:1 XS:A:+ NM:i:0 +BCR-ABL1-50 147 9 275169 60 105M563N45M = 275125 -757 CCAAGGAAAACCTTCTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTGCACTGTATGATTTTGTGGCCAGTGGAGATAACACTCTAAGCATAACTAAAGGTGAAAAGCTCCGGGTCTTAGGCTATAATCACAATGGGGAATGGT CCGCCCGGGGGCGGGG1GCGGGGGGC8CGG=CGGCCC=CGGGCCJ(JJ=GGCGCGGGGCGGGGGCC8GCCCGGCGCGGGJ8GGGGCC1JJGJCGGJJJGJG8JJGJJJJCJJJGGJGGGCJGGJJJJGGGJJJ=CJCGG=GCGGGGG=CC NH:i:1 HI:i:1 AS:i:300 nM:i:0 XS:A:+ NM:i:0 +BCR-ABL1-14 147 9 275170 60 104M563N46M = 275146 -737 CAAGGAAAACCTTCTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTGCACTGTATGATTTTGTGGCCAGTGGAGATAACACTCTAAGCATAACTAAAGGTGAAAAGCTCCGGGTCTTAGGCTATAATCACAATGGGGAATGGTG 1CGGGGGCGGCCGG1CG=GGGGGGGCGGGGGCCGGG1CGCGCCJJJJJCG1CGGGCCGCGGGGGGGGGGGGCGGGGGGGCCGGGCGJJGG=JJ(J18GJCJGJ8JJGGJ=JJGJJGGGJJJ=JJJJCJJJJJJJJJGGGGGGG1GGGCCC NH:i:1 HI:i:1 AS:i:302 nM:i:0 XS:A:+ NM:i:0 +BCR-ABL1-20 163 9 275172 60 102M563N48M = 275219 760 AGGAAAACCTTCTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTGCACTGTATGATTTTGTGGCCAGTGGAGATAACACTCTAAGCATAACTAAAGGTGAAAAGCTCCGGGTCTTAGGCTATAATCACAATGGGGAATGGTGTG CC=GGGGGGGGGGJJGJJJJJJCJJ=JJJJGGGGJCJJGCGCGJGGCCCJJJGJJJJCGGJG=GGJGGGGGJGJGCCGGGGCCG=GG=C=G=GGCCGGGCGCCCC=JG11GGCCGCCCGCGCC8CGGGGCC1CGCGGG=GG=CCC1GGCG NH:i:1 HI:i:1 AS:i:302 nM:i:0 XS:A:+ NM:i:0 +BCR-ABL1-26 163 9 275174 60 100M563N50M = 275216 755 GAAAACCTTCTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTGCACTGTATGATTTTGTGGCCAGTGGAGATAACACTCTAAGCATAACTAAAGGTGCAAAGCTCCGGGTCTTAGGCTATAATCACAATGGGGAATGGTGTGAA C1CGGGCGGGGGGJJJ=JJGGCGGGJJGJJGGJGGJJGJCJGJJ=GGCJJGJJGGCGCGCGGG=JGGG8GGGGCGGGGGCGCCCGGGCGGGCCCCCG=GGGCJ(JCJ=GGCCGGGGGGGGGGGGGCCGCGGGCCGGGGGCGCGGGGGGGG NH:i:1 HI:i:1 AS:i:298 nM:i:2 XS:A:+ NM:i:1 +BCR-ABL1-34 99 9 275183 60 91M563N59M = 275223 753 CTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTGCCCTGTATGATTTTGTGGCCAGTGGAGATAACACTCTAAGCATAACTAAAGGTGAAAAGCTCCGGGTCTTAGGCTATAATCACAATGGGGAATGGTGTGAAGCCCAAACC CCCGGGGGGGGGGJJJJJGJGJJGG=JJJJJJGJJJGJGJC1JGGGJGGJJGGGJJGG=GCGJGGJCGGGGGGCG=GGCGG1CGGGG=CGGCCGGGCGGGJ88=CGCG=GGGGGC=GGCGGGGG1GGCCGGGG1GGGCGGGGCCCGCGGC NH:i:1 HI:i:1 AS:i:300 nM:i:1 XS:A:+ NM:i:1 +BCR-ABL1-80 99 9 275199 60 75M563N75M = 275250 764 AAAATGACCCCAACCTTTTCGTTGCACTGTATGATTTTGTGGCCAGTGGAGATAACACTCTAAGCATAACTAAAGGTGAAAAGCTCCGGGTCTTAGGCTATAATCACAATGGGGAATGGTGTGAAGCCCAAACCAAAACTGGCCAAGGCT CC=GGGGGGGGGGJJJJCJCJJJJJJGGGJJJCJGCCJJJJJ=JCGJJJ=J8JGJGJ=J=JGG=CJCCGG1GG=CGGG=8GG=GCGCCGGCGCGGGG8GGJG1CGCGCCGCCGCGG=GGGGCGCGGGGGG=G==GGCC(GGCGGGGCGCC NH:i:1 HI:i:1 AS:i:300 nM:i:1 XS:A:+ NM:i:1 +BCR-ABL1-8 163 9 275199 60 75M563N75M = 275246 760 AAAATGACCCCAACCTTTTCGTTGCACTGTATGATTTTGTGGCCAGTGGAGATAACACTCTAAGCATAACTAAAGGTGAAAAGCTCCGGGTCTTAGGCTATAATCACAATGGGGAATGGTGTGAAGCCCAAACCAAAAATGGCCAAGGCT =C=GG(1GGGGGGJGGJGCCJJCJJJJJJ=GJGGJJGJ=JJGCGCGJJJJJGGJ1GGJJG8C8GGG=GCCG8GGGG1=CGG88CG=GG8GG=GGGCGG8GGCJ18CJ=CGGCGGGG1CC=GCCGCGGG=GGGCGGCGCC8GCCGCGCCGG NH:i:1 HI:i:1 AS:i:302 nM:i:0 XS:A:+ NM:i:0 +BCR-ABL1-78 147 9 275200 60 74M563N76M = 275151 -762 AAATGACCCCAACCTTTTCGTTGCACTGTATGATTTTGTGGCCAGTGGAGATAACACTCTAAGCATAACTAAAGGTGAAAAGCTCCGGGTCTTAGGCTATAATCACAATGGGGAATGGTGTGAAGCCCAAACCAAAAATGGCCAAGGCTG GGGGGCG8GGGGGGGCCGCGG=GCCGCGCGCGGGGGGGC=GCGJ88JJ=CGCGGGG=CGGGGG(GG=G(CCGCGGCJGJGGGCCGGCCJJGGJJJGJJGG1G(JJJGCGJJG=J=GJJJGJJJJJ=CGJJJGJGGJJGGCGGCGGGGCCC NH:i:1 HI:i:1 AS:i:302 nM:i:0 XS:A:+ NM:i:0 +BCR-ABL1-42 163 9 275204 60 70M563N80M = 275245 754 GACCCCAACCTTTTCGTTGCACTGTATGATTTTGTGGCCAGTGGAGATAACACTCTAAGCATAACTAAAGGTGAAAAGCTCCGGGTCTTAGGCTATAATCACAATGGGGAATGGTGTGAAGCCCAAACCAAAAATGGCCAAGGCTGGGTC CC=GGGGGGGGGGGGJJJGGJJJ=JJJGGGJJGJGCJJJJ(GJGJJCCJGGGJCGJGGJJJGG=G1C8GCGGGG18GCC=GGGGGCCCGC1GGGGCGGCGG=CCJCCGGG==GGGGGGGG1G(GGG=C=GGGG88=CC=GGCGGGGCGGG NH:i:1 HI:i:1 AS:i:302 nM:i:0 XS:A:+ NM:i:0 +BCR-ABL1-62 83 9 275215 60 59M563N91M = 275157 -771 TTTCGTTGCACTGTATGATTTTGTGGCCGGTGGAGATAACACTCTAAGCATAACTAAAGGTGAAAAGCTCCGGGTCTTAGGCTATAATCACAATGGGGAATGGTGTGAAGCCCAAACCAAAAATGGCCAAGGCTGGGTCCCAAGCAACTA GCCC1CCCCCGCGCCC=GGGGGGCCGGC8CGGC1GGC=CGGGGCCCGGC8GCC=GCGGGCGGGGGGGCCGJGGGGGGGGGJGG=GJGCGJGGJ=JJGJGJCG=JJJJGJJJJGJJJGJJGCGJCJ=JJJJJGJGGJJGGGGCGGGGGCCC NH:i:1 HI:i:1 AS:i:300 nM:i:1 XS:A:+ NM:i:1 +BCR-ABL1-26 83 9 275216 60 58M563N92M = 275174 -755 TTCGTTGTACTGTATGATTTTGTGGCCAGTGGAGATAACACTCTAAGCATAACTAAAGGTGAAAAGCTCCGGGTCTTAGGCTATAATCACAATGGGGAATGGTGTGAAGCCCAAACCAAAAATGGCCAAGGCTGGGTCCCAAGCAACTAC CGGGGGC(GGGCGGCGCCGGGCGGCGCCGGGCGCGGGGCGGC==GGCCG=1GCCGGCGGCGGGCCCGGG=GGGCGCCCCJJGCCC1GJJCJGGGJJJG8JGG=GJJ=GGJJJJCJGCCGJJJJJJGJJJJJGGJGJJGGGGGGGCGCCCC NH:i:1 HI:i:1 AS:i:298 nM:i:2 XS:A:+ NM:i:1 +BCR-ABL1-44 163 9 275217 60 57M563N93M = 275260 756 TCGTTGCACTGTATGATTTTGTGGCCAGTGGAGATAACACTCTAAGCATAACTAAAGGTGAAAAGCTCCGGGTCTTAGGCTATAATCACAATGGGGAATGGTGTGAAGCCCAAACCAAAAATGGCCAAGGCTGGGTCCCAAGCAACTACA C=CGGGGGGCGGGJJJJJGJGJJJJJ1JJJJJJ1JJJGCGGG=JGJGJGGJGGCGGCGJGCJG=JJGGCJCGCCCGGGGCCCGGGC=GG=CGGGGGGGGGGGCJJJJGC=GGGGCGGCGGC=CCGG8GGGCGCGGCCCGGCG=GGCGGGC NH:i:1 HI:i:1 AS:i:302 nM:i:0 XS:A:+ NM:i:0 +BCR-ABL1-40 99 9 275219 60 55M563N94M1S = 275838 769 GTTGCACTGTATGATTTTGTGGCCCGTGGAGATAACACTCTAAGCATAACTAAAGGTGAAAAGCTCCGGGTCTTAGGCTATAATCACAATGGGGAATGGTGTGAAGCCCAAACCAAAAATGGCCAAGGCTGGGTCCCAAGCAACTACATG CCCCGGGGGGCGGJJJJGGJCJJJ(GJJ=JJJJJCJJJGGGGJJJGGGJJCJJJGJCJCCJJ(CJ=CGGGJ=GGGCJCGGGGCGGGCGGGGGCGGCGGGGCCGGCGCCGGGGGGCCGCGGGGGGGG=GGGCGG=GGGGGCGGG8G=CGG1 NH:i:1 HI:i:1 AS:i:297 nM:i:1 XS:A:+ NM:i:1 +BCR-ABL1-20 83 9 275219 60 55M563N95M = 275172 -760 GTTGCACTGTATGATTTTGTGGCCAGTGGAGATAACACTCTAAGCATAACTAAAGGTGAAAAGCTCCGGGTCTTAGGCTATAATCACAATGGGGAATGGTGTGAAGCCCAAACCAAAAATGGCCAAGGCTGGGTCCCAAGCAACTACATC GCG=GGGCC8GCCGCGGGGGGG=GGCCGGCGGGGCCGGGCGG8CGGCGGJGCGGCGGGGGGCGG=GGG1GCGGGGGGGGGC==JJGG=GGGCCJGGGGGGJJJJJGCCGJJGGJGJCJJCJJJJJGGJJJJGJJJJ1GGGGGGGGGGCCC NH:i:1 HI:i:1 AS:i:302 nM:i:0 XS:A:+ NM:i:0 +BCR-ABL1-34 147 9 275223 60 51M563N99M = 275183 -753 CACTGTATGATTTTGTGGCCAGTGGAGATAACACTCTAAGCATAACTAAAGGTGAAAAGCTCCGGGTCTTAGGCTATAATCACAATGGGGAATGGTGTGAAGCCCAAACCAAAAATGGCCAAGGCTGGGTCCCAAGCAACTACATCACGC CGGG8GCC=GGGCGGGGGGCGGGGG=GGGGGGGCGGCGCGGGGJJJJCCCGC1GGGGG8GGCC=GGGGGGGGGGGCCGGCCCG=GCJJJGGGGGG81J8=CGJGGGGJGJJCJGGJGGJGCCJJGJJJJJCJJJJGJGGGCGGGG=GCCC NH:i:1 HI:i:1 AS:i:300 nM:i:1 XS:A:+ NM:i:0 +BCR-ABL1-22 99 9 275237 60 37M563N113M = 275864 777 GTGGCCAGTGGAGATAACACTCTAAGCATAACTAAAGGTGAAAAGCTCCGGGTCTTAGGCTATAATCACAATGGGGAATGGTGTGAAGCCCAAACCAAAAATGGCCAAGGCTGGGTCCCAAGCAACTACATCACGCCAGTCAACAGTCTG CC1GGGGGCGGGGGGCJJGCGJCJJJJJJJJGJJJJGJJGGG1JGCJJCJJGJJJJJGGGGJGJJCGJJGCC8CGGCGCGGCGCCG1GGCCCCGC=GG=GJCGGGGCGCG=GGGGGCGGC=CGGGGGGGGGCGGCCCGGG(G8GGGGGGG NH:i:1 HI:i:1 AS:i:300 nM:i:0 XS:A:+ NM:i:0 +BCR-ABL1-42 83 9 275245 60 29M563N121M = 275204 -754 TGGAGATAACACTCTAAGCATAACTAAAGGTGAAAAGCTCCGGGTCTTAGGCTATAATCACAATGGGGAATGGTGTGAAGCCCAAACCAAAAATGGCCAAGGCTGGGTCCCAAGCAACTACATCACGCCAGTCAACAGTCTGGAGAAACA GGGCGGGCGCCGGGGGCGGGGGG=CGGGGC1CGCGGGCG=GGCG=GGCCCGGGGCGCGGCGGCCGGGGGGG(GG=GGG=GGGCCGGJJGJJ=GGGGGJJGCGJGGJJJJJJGCJGJGG(GGJJJGJJJJJJJ8GJJJCGGGGCGG1G1CC NH:i:1 HI:i:1 AS:i:302 nM:i:0 XS:A:+ NM:i:0 +BCR-ABL1-8 83 9 275246 60 28M563N122M = 275199 -760 GGAGATAACACTCTAAGCATAACTAAAGGTGAAAAGCTCCGGGTCTTAGGCTATAATCACAATGGGGAATGGTGTGAAGCCCAAACCAAAAATGGCCAAGGCTGGGTCCCAAGCAACTACATCACGCCAGTCAACAGTCTGGAGAAACAC G=GGCGGGGG=GGGGGGGCCGCCGCGCGGCCGGCGGGCCGGCGGCGGGGJGG=GGGGGGGC=GCGGGCCCG=JGCG=GGGJGGGJGJGGGJCGCGCGGJGGJJJJJJGJGJGJJJG1JJJJJJJJGJCGGJJJJJJCGGGGGGGGCGCCC NH:i:1 HI:i:1 AS:i:302 nM:i:0 XS:A:+ NM:i:0 +BCR-ABL1-80 147 9 275250 60 24M563N126M = 275199 -764 ATAACACTCTAAGCATAACTAAAGGTGAAAAGCTCCGGGTCTTAGGCTATAATCACAATGGGGAATGGTGTGAAGCCCAAACCAAAAATGGCCAAGGCTGGGTCCCAAGCAACTACATCACGCCAGTCAACAGTCTGGAGAAACACTCCT CCCCCGGCC8GCG=CGCGCGGG(GGGGGGGGC=GGG=GG=CGCJJJJJGGGGC==G8CGGG=GCGGC1GGGC(GG=G=GGCGJC8GCGJGJCGCJGJGCGJGJJJGGJJGJGJCJJJJGJGJJJGGJJJJJJGJJJJ8GGGGGGGGGCCC NH:i:1 HI:i:1 AS:i:300 nM:i:1 XS:A:+ NM:i:0 +BCR-ABL1-30 163 9 275254 60 20M563N130M = 275867 763 CACTCTAAGCATAACTAAAGGTGAAAAGCACCGGGTCTTAGGCTATAATCACAATGGGGAATGGTGTGAAGCCCAAACCAAAAATGGCCAAGGCTGGGTCCCAAGCAACTACATCACGCCAGTCAACAGTCTGGAGAAACACTCCTGGTA CCCGG1GGGGGGGJGJCJGJGJJJ(JGJJ(JGJGC=GJJJGJJGGJGGJJJJGGJGGCCCCJG8CG=GCJCGGGJ=GCGJ1CGGGGCCGCG8CG=GGCGGC(CJJC=CGGCGG(CGGGGGGCGCCGG=GC1GCGG=G1CGGCCCCG===8 NH:i:1 HI:i:1 AS:i:298 nM:i:1 XS:A:+ NM:i:1 +BCR-ABL1-52 99 9 275256 60 18M563N132M = 275877 771 CTCTAAGCATAACTAAAGGTGAAAAGCTCCGGGTCCTAGGCTATAATCACAATGGGGAATGGTGTGAAGCCCAAACCAAAAATGGCCAAGGCTGGGTCCCAAGCAACTACATCACGCCAGTCAACAGTCTGGAGAAACACTCCTGGTACC CCCGGGGGGGGGGGJJGCGJGG8GJJJGJJGJG=G=JJGGCJGGGGGJGGJJGJJGJGGGCJ=GJGGGCGJCGGGGGGG8CGGGGGGC=GGCGGGGCGCGJCGCGCCCCGCG11GGGGGGGGGGGGGGCCCGGGGCCGGCGGGGGCGGCG NH:i:1 HI:i:1 AS:i:298 nM:i:1 XS:A:+ NM:i:1 +BCR-ABL1-44 83 9 275260 60 14M563N136M = 275217 -756 AAGCATAACTAAAGGTGAAAAGCTCCGGGTCTTAGGCTATAATCACAATGGGGAATGGTGTGAAGCCCAAACCAAAAATGGCCAAGGCTGGGTCCCAAGCAACTACATCACGCCAGTCAACAGTCTGGAGAAACACTCCTGGTACCATGG CGGGCGCGGGGCGCGGGGGCGGGGGGGGGCGGGGGGGG=G1GGGCGCGGCGCGGGGGGGCGGCGCCGCGGC==GGG=1GGG=CJJGCGJGJJJCGCJJ8GJJGGGJJGGJJJGJJGJJJGJJJJJJCJGJ=GGJJ=JGGGGGCGGCGCCC NH:i:1 HI:i:1 AS:i:302 nM:i:0 XS:A:+ NM:i:0 +BCR-ABL1-40 147 9 275838 60 150M = 275219 -769 TGAAAAGCTCCGGGTCTTAGGCTATAATCACAATGGGGAATGGTGTGAAGCCCAAACCAAAAATGGCCAAGGCTGGGTCCCAAGCAACTACATCACGCCAGTCAACAGTCTGGAGAAACACTCCTGGTACCATGGGCCTGTGTCCCGCAA =GCCGGGGCGGGG=GG1G8G=G(GGCGGGGGGGGC1C=GCGG=J8JJ1GGGGGGGGGGGGGGGGGC=1G8CGGCJ8GC1GGCGCCGJG1GGJGGJGJ8JJCJJJGJ8GGJJJ=JGJJG=CGG=JJJJJJJ8JJ=JJGGGGGGGGGCGC=C NH:i:1 HI:i:1 AS:i:297 nM:i:1 XS:A:+ NM:i:0 +BCR-ABL1-16 163 9 275840 60 150M = 275888 198 AAAAGCTCCGGGTCTTAGGCTATAATCACAATGGGGAATGGTGTGAAGCCCAAACCAAAAATGGCCAAGGCTGGGTCCCAAGCAACTACATCACGCCAGTCACCAGTCTGGAGAAACACTCCTGGTACCATGGGCCTGTGTCCCGCAATG CC8GGG1CCGGGCCJJGJG(JJJJJ(JGJCJGJJJJJJJJGGJGJGJJCCJJJGJGGGGCGGG=C=JGJJG=C8GGGCGGGGGC(GGCGCGCGGCG(CGG=1(C=JCGGGCCGGGGGCGCGGGCGGCGGGCGGCCGGGGGGGG8GGGCGC NH:i:1 HI:i:1 AS:i:296 nM:i:1 NM:i:1 +BCR-ABL1-6 99 9 275846 60 150M = 275903 207 TCCGGGTCTTAGGCTATAATCACAATGGGGAATGGTGTGAAGCCCAAACCAAAAATGGCCAAGGCTGGGTCCCAAGCAACTACATCACGCCAGTCAACAGTCTGGAGAAACACTCCTGGTACCATGGGCCTGTGTCCCGCAATGCCGCTG CCCGGGGGGCGGGGJJGJGJ=JJJJJJJJJJGJGGJJJGGJC=GGGGJ=GGCJJJGCGJGJGJJGJGCGJGGJGJCGCGGGGGGGGGG8GC8GC8G=GGGJ=GCGGGGGCCGCCGGGGGGGG1G1=GCGC81CGGGGGCCCGGGGGCGGG NH:i:1 HI:i:1 AS:i:298 nM:i:0 NM:i:0 +BCR-ABL1-32 99 9 275850 60 150M = 275894 194 GGTCTTAGGCTATAATCACAATGGGGAATGGTGTGAAGCCCAAACCAAAAATGGCCAAGGCTGGGTCCCAAGCAACTACATCACGCCAGTCAACAGTCTGGAGAAACACTCCTGGTACCATGGGCCTGTGTCCCGCAATGCCGCTGAGTA CCCGGGGGGGGGGJJGGJJJ=JJGJ8JGJ=JGJG=GGGJGCCJJJC=J8JJ=GCCJC8JC=GGGJGG(GGCJGGGG1GC=GJ=GCCGGCCGGG=GGGGGGJGCGGCGG=CGGCGCG=GGC=CGGGGCCGGGGGGCCCCGCCGGGCCGGGG NH:i:1 HI:i:1 AS:i:298 nM:i:0 NM:i:0 +BCR-ABL1-38 163 9 275854 60 150M = 275900 196 TTAGGCTATAATCACAATGGGGAATGGTGTGAAGCCCAAACCAAAAATGGCCAAGGCTGGGTCCCAAGCAACTACATCACGCCAGTCAACAGTCTGGAGAAACACTCCTGGTACCATGGGCCTGTGTCCCGCAATGCCGCTGAGTATCTG CCCGCGGGGGGGG=GGJGJGJJJ1=GGCJJJCGJ=GJJJGGCJGGCGJGGJJCGJCGJGGG=GGGGJCGGGGGGJGGGGGGGGG8C=G=GCGG==CG8GCG=JCJJCCGG8CGGCGGCCGGGCCGCGGCGGG=GCGGCGG=8CGGCGCG= NH:i:1 HI:i:1 AS:i:298 nM:i:0 NM:i:0 +BCR-ABL1-22 147 9 275864 60 150M = 275237 -777 ATCACAATGGGGAATGGTGTGAAGCCCAAACCAAAAATGGCCAAGGCTGGGTCCCAAGCAACTACATCACGCCAGTCAACAGTCTGGAGAAACACTCCTGGTACCATGGGCCTGTGTCCCGCAATGCCGCTGAGTATCTGCTGAGCAGCG GGG1C8GGG=C=GGGCGCGG=GG=GCGGGGCGGGCGCGGGCGGJ1CCJCCC8GCC=GGCGGGGGGGCGCGCGGGCCGJGCGJGJGGJGCJGGGGGCGGGJ(JCGGGJJJJJJG=1GGJJJGJJJGGJJJJJJGJ1JJGG=GGGGGCGCCC NH:i:1 HI:i:1 AS:i:300 nM:i:0 XS:A:+ NM:i:0 +BCR-ABL1-30 83 9 275867 60 150M = 275254 -763 ACAATGGGGAATGGTGTGAAGCCCAAACCAAAAATGGCCAAGGCTGGGTCCCAAGCAACTACATCACGCCAGTCAACAGTCTGGAGAAACACTCCTGGTACCATGGGCCTGTGTCCCGCAATGCCGCTGAGTATCTGCTGAGCAGCGGGA CG8GC==GCGCCGGGGGGG8=CGGGGGC=CGGCGGGGGCG=GGGC=GCCJGGGGCGGCGCGGGGCGCGGGCCCGJGGG8(8GC8GCGCGCJGGGGGGJGGGJGJJJJJGJJJJCGGJJGJJJ=JJJJGGJ=JJJJGGGGGGGGGGGGCCC NH:i:1 HI:i:1 AS:i:298 nM:i:1 XS:A:+ NM:i:0 +BCR-ABL1-36 99 9 275872 60 150M = 275919 197 GGGGAATGGTGTGAAGCCCAAACCAAAAATGGCCAAGGCTGGGTCCCAAGCAACTACATCACGCCAGTCAACAGTCTGGAGAAACACTCCTGGTACCATGGGCCTGTGTCCCGCAATGCCGCTGAGTATCTGCTGAGCAGCGGGATCAAT CCCGGGGGGGGGGJJJGGJJ=JJJJGJJJJJ=GGJJJCJ=JGGGJGGJGJJGGGGCGCG=JGCCGJGCJGCGGGJGGCGCG1GJGC=8GGCCCCGG8CCGJCCGGGCGGGCGGGGGCG1GCCGGCGGGGGCCCGGCCCG(G8GC8=CCGC NH:i:1 HI:i:1 AS:i:298 nM:i:0 NM:i:0 +BCR-ABL1-70 163 9 275874 60 150M = 275911 187 GGAATGGTGTGAAGCCCAAACCAAAAATGGCCAAGGCTGGGTCCCAAGCAACTACATCACGCCAGTCAACAGTCTGGAGAAACACTCCTGGTACCATGGGCCTGTGTCCCGCAATGCCGCTGAGTATCTGCTGAGCAGCGGGATCAATGG CCCGGG==GGGGGJJJJJJJJJJJ=GJ8J=GJGJGJGGJCCJJJCGG1=GJGJG18JG=GGGC1GGGJ8GCCC1=CCGCGGG(GC1GCG1GCCGC8GG1GG=JJCJ1GGCGCCGCG(CGGCGGGGGG(GGGGGGCGCCGGGGCGCGCCG= NH:i:1 HI:i:1 AS:i:298 nM:i:0 NM:i:0 +BCR-ABL1-52 147 9 275877 60 150M = 275256 -771 ATGGTGTGAAGCCCAAACCAAAAATGGCCAAGGCTGGGTCCCAAGCAACTACATCACGCCAGTCAACAGTCTGGAGAAACACTCCTGGTACCATGGGCCTGTGTCCCGCAATGCCGCTGAGTATCTGCTGAGCAGCGGGATCAATGGCAG CG=GGGGC(CGGGGGGGCCGCGCCGGGGG=GGCC8CGGCCCCCCJ=CJ=GGGGGGGGCG=GGGGCG(GC1GGGC=GJCCCGJ=88GJGGJGJGCJGGJJGGJJJGJJJJGCGJJGGJ(GJGGJCJJJCJJJJJ=(JJGGGGGGGGGCCCC NH:i:1 HI:i:1 AS:i:298 nM:i:1 XS:A:+ NM:i:0 +BCR-ABL1-56 99 9 275885 60 150M = 275928 193 AAGCCCAAACCAAAAATGGCCAAGGCTGGGTCCCAAGCAACTACATCACGCCAGTCAACAGTCTGGAGAAACACTCCTGGTACCATGGGCCTGTGTCCCGCAATGCCGCTGAGTATCTGCTGAGCAGCGGGATCAATGGCAGCTTCTTGG =CCGC=GGGGGG1JJJGJGJJJCJJCGJJJJJJJJJJCGJJJJJJGGCCJ1JGCGGCGJ8GG(CJGJGJCGGCGG1CC=CG=GCC=GGGGG=GGCGCCG1CGGGGGGG1GGGGGCGGGCGCGGCGG811G8CCGGGGCGGGGGCCG=CGC NH:i:1 HI:i:1 AS:i:298 nM:i:0 NM:i:0 +BCR-ABL1-16 83 9 275888 60 150M = 275840 -198 CCCAAACCAAAAATGGCCAAGGCTGGGTCCCAAGCAACTACATCACGCCAGTCAACAGTCTGGAGAAACACTCCTGGTACCATGGGCCTGTGTCCCGCAATGCCGCTGAGTATCTGCTGAGCAGCGGGATCAATGGCAGCTTCTTGGTGC CGGGGGG=GGCGGGCGGGGGGGCGGGGCGGGGCCCG=CGGGGGCGGGCCCGG8GCCCGG8GCGGC=GCCCGGGGGGCGGGCGGJGGGCCGGJCGGJJGJGGGGGJJJJGJJGGJJGG=JGGJJGGJ=JGJJJJJJGJC1GGGG=GGG1C1 NH:i:1 HI:i:1 AS:i:296 nM:i:1 NM:i:0 +BCR-ABL1-32 147 9 275894 60 150M = 275850 -194 CCAAAAATGGCCAAGGCTGGGTCCCAAGCAACTACATCACGCCAGTCAACAGTCTGGAGAAACACTCCTGGTACCATGGGCCTGTGTCCCGCAATGCCGCTGAGTATCTGCTGAGCAGCGGGATCAATGGCAGCTTCTTGGTGCGTGAGA 8GCCCCGGCGCGC1GGGGCGGCCGGGGCG1(GG=GG=GCGGCGJCJJCGG8=GGGCCGCGGGCGGGGGC=GC1=GGGGGJGGCCGJJCGGGJJJCGGG8CCGGGCJGJGJJGCCGCJJJJJJJJJJGCGJJJGJJGJGGGGGCGGGG=CC NH:i:1 HI:i:1 AS:i:298 nM:i:0 NM:i:0 +BCR-ABL1-38 83 9 275900 60 150M = 275854 -196 ATGGCCAAGGCTGGGTCCCAAGCAACTACATCACGCCAGTCAACAGTCTGGAGAAACACTCCTGGTACCATGGGCCTGTGTCCCGCAATGCCGCTGAGTATCTGCTGAGCAGCGGGATCAATGGCAGCTTCTTGGTGCGTGAGAGTGAGA GCGCCGCGGGCGGGGGGGCGGCGCG8CGCGGGG8GGCGGGCCCCG8CC=JGCGGGGGGCGGGGCGCCGC=GCCCGGJGGGCGGGCJGCCJJGJG=GGCJJJGGJCGJCGCGJJJC=JJGJCJGJGJJGJJJ=JJJ1GGGGGGGGGGGCCC NH:i:1 HI:i:1 AS:i:298 nM:i:0 NM:i:0 +BCR-ABL1-6 147 9 275903 60 150M = 275846 -207 GCCAAGGCTGGGTCCCAAGCAACTACATCACGCCAGTCAACAGTCTGGAGAAACACTCCTGGTACCATGGGCCTGTGTCCCGCAATGCCGCTGAGTATCTGCTGAGCAGCGGGATCAATGGCAGCTTCTTGGTGCGTGAGAGTGAGAGCA CG8GCGGGCCCCGGCGCGGGGCCGGGCGG8CG=G=GCCGCGG=1CJ8JCCGCGGGGGCGGGGGGCGG=G=8GCGJG=GGGGGGJGCCGGJJGG=G=CJ8=JJJJGG=JJJJJGJJGJGJJGJJJJJCCCJJJGGJJJG1GGG1GGGGCCC NH:i:1 HI:i:1 AS:i:298 nM:i:0 NM:i:0 +BCR-ABL1-70 83 9 275911 60 150M = 275874 -187 TGGGTCCCAAGCAACTACATCACGCCAGTCAACAGTCTGGAGAAACACTCCTGGTACCATGGGCCTGTGTCCCGCAATGCCGCTGAGTATCTGCTGAGCAGCGGGATCAATGGCAGCTTCTTGGTGCGTGAGAGTGAGAGCAGTCCTGGC CGGGCGGGCGGGCCGGGG=GGG8GCGGG8GGCGGGCGGCGGGCGGGGG=JGGGGGCGCGGGGGCGGGGCGCG1GGGGCJC8GG=JGGJJCCCJJGGGGJGJGJGGJJGGJJJJJGCJJJJGGJJGJJGJJJGGGJJJGGGGCGGGGG=CC NH:i:1 HI:i:1 AS:i:298 nM:i:0 NM:i:0 +BCR-ABL1-36 147 9 275919 60 150M = 275872 -197 AAGCAACTACATCACGCCAGTCAACAGTCTGGAGAAACACTCCTGGTACCATGGGCCTGTGTCCCGCAATGCCGCTGAGTATCTGCTGAGCAGCGGGATCAATGGCAGCTTCTTGGTGCGTGAGAGTGAGAGCAGTCCTGGCCAGAGGTC C=GGGGGGCCGGGGGGGGG=GGCCGCGGGC1GCGC1GCGGCCGJJ(CCG8GCCGGGCCGGGC=CC1CG=CGCCGGG=CGGGGGGGGCGGGGGJ==JJJJJ1CJJJGJGGJCGCJGJGJGJJJJ=GG1CJJCGJG1GC=GGGCGCGGGCCC NH:i:1 HI:i:1 AS:i:298 nM:i:0 NM:i:0 +BCR-ABL1-56 147 9 275928 60 150M = 275885 -193 CATCACGCCAGTCAACAGTCTGGAGAAACACTCCTGGTACCATGGGCCTGTGTCCCGCAATGCCGCTGAGTATCTGCTGAGCAGCGGGATCAATGGCAGCTTCTTGGTGCGTGAGAGTGAGAGCAGTCCTGGCCAGAGGTCCATCTCGCT CCGGGCGCGCGGGCG=CCCGGCGCGGGGC=CGGCGGCCGCGGGJJJJCCGCCG(GCCCCCGGCCGGG=G8GGGGGGCC=C=CGGJGJJJGC=JGGJJJGJGJ1JJJGC=JJJG=JCJJJJJJJ=JJGGGJJJCGJJJGGGGGCGG=GCCC NH:i:1 HI:i:1 AS:i:298 nM:i:0 NM:i:0 +BCR-ABL1-74 77 * 0 0 * * 0 0 TCATTTTCACTGGGTCCAGCGAGAAGGTTTTCCTTGGAGTTCCAACGAGCGGCTTCACTCAGACCCTGAGGCTCAAAGTCAGATGCTACTGGCCGCTGAAGGGCTTTTGAACTCTGCTTAAATCCAGTGGCTGAGTGGACGATGACATTC CC11GGGGGGGGGGCCJJJGCGJJGJJJJJGGGGGGJJJGGJG==GCJCJ=GGJJGGJJGGCJGG=GGGGGJGGJGC=GC=GGGCGGGCGGGGCCGCGGGJCGC=GGC8CGCGCGGGGGGCGCC1GGCGCC=GCCGCGGC8GCGGGCCCG NH:i:0 HI:i:0 AS:i:155 nM:i:2 uT:A:1 +BCR-ABL1-74 141 * 0 0 * * 0 0 CATTCCGCTGACCATCAATAAGGAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAGGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAG CCCGGGGGGCGCGJGGJJGGJGJJJGJGGJJGGJGJJ1=JCJJGGGJJJJGGGJGCCJGGJGG=J1JG8JGCGGGJG=GC1CGCCGGCG(GGCGGCGGGGGCJC1CCGC==CCGGGGCGGCGGGCCGGCGCGC8CCCCGGG=GGGC=GGG NH:i:0 HI:i:0 AS:i:155 nM:i:2 uT:A:1 +BCR-ABL1-66 77 * 0 0 * * 0 0 TCCAGCGAGAAGGTTTTCCTTGGAGTTCCAACGAGCGGCTTCACTCAGACCCTGAGGCTCAAAGTCAGATGCTACTGGCCGCTGAAGGGCTTTTGAACTCTGCTTAAATCCAGTGGCTGAGTGGACGATGACATTCAGAAACCCATAGAG CCC=GGGGCGGGGJJJJJGJJJJ=JJJGJJ1GJJGJJJJJGJJJJJGGGGCGJJGGGJJJGGCGGGGJGCGG1JCGGG=GCCGCG=GC=G=GCCGGGGG8JGGGGGGGGGGGG=GGCGGC8GGCCGGGC=GGGGGGGGG=CGG=8GGCCG NH:i:0 HI:i:0 AS:i:159 nM:i:0 uT:A:1 +BCR-ABL1-66 141 * 0 0 * * 0 0 CATTCCGCTGACCATCAATAAGGAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAG CCCGGGGGGGGGGGGJ=JGJJJJJJJGGJJCCCJGJJ1JJJGCJGGGGJJJJ=GGGJGJGC(GGGGJGGGJG1=GGGGGGGG=G=C=GG8CC8GGGGGCCCCJCCCJGCG=GGCCGGCGGCGGCG==1GCCGGC1GGGGGCGGGGGGCGG NH:i:0 HI:i:0 AS:i:159 nM:i:0 uT:A:1 +BCR-ABL1-58 77 * 0 0 * * 0 0 ATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGG CCCGGCGGGGGGGGJJJJJGJJGJGJGJGJJJJJJJJJCJGJJJJGCG=8GGGJGJGGCGGJGCGJJJCJGGG=CGCCGGCCGGGCGCGGGCGCG1GGGCCCGGGGCG8GCCC=C8CGCGG=CCCGCCCCGGG=CCGGCGGGCGGGGGCG NH:i:0 HI:i:0 AS:i:185 nM:i:3 uT:A:1 +BCR-ABL1-58 141 * 0 0 * * 0 0 TTGGGGTCATTTTCACTGGGTCCAGCGAGAAGGTTTTCCTTGGAGTTCCAACGAGCGGCTTCACTCAGACCCTGAGGCTCAAAGTCAGATTCTACTGGCCGCTGAAGGGCTTTTGAACTCTGCTTAAATCCAGTGGCTGAGTGGACGATG CCCGGGGGGGGGGJJJJJJGJGJJJGGJ=JJJJJJJJGC=GJJGGJJGJJGG1GCJGGGG=JGGG8C=GCCGC==GGGCGGGGGG=GGG=(G=CCGCCGGGGCJJJJGGGC8GCGCGCG8CGGCCGGGCGCGCGG8CCGG8CGGGGGGGG NH:i:0 HI:i:0 AS:i:185 nM:i:3 uT:A:1 +BCR-ABL1-24 77 * 0 0 * * 0 0 CGCAGACCATCAATAAGGAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGGCTGAGTGAAG CC11GCGGGGGGGJCGJGJJCCJJJJGJJJJGJJGGJJJCJJJG8JJJ1GJ=JGGGGJJJCG=8GGCGCCGGGCCGGGCGGGGCGGGGCCGCGGCCGGG=J1GCCC1(CCGGCGGGCCGCGGGCGGGGC=GGCGCCGCC1GCGGGGGCGG NH:i:0 HI:i:0 AS:i:154 nM:i:3 uT:A:1 +BCR-ABL1-24 141 * 0 0 * * 0 0 TTTCACTGGGTCCAGCGAGAAGGTTTTCCTTGGAGTTCCAACGAGCGGCTTCACTCAGACCCTGAGGCTCAAAGTCAGATGCTACTGGCCGCTGAAGGGCTTTTGAACTCTGCTTAAATCCAGTGGCTGAGTGGACGATGACATTCAGAA C=CCGGGGGGGGCJ1GGJJJJ1JJJJJGJJ=GJJG8GGJ=GJGJJGJJGGGCGJGCGGGCGGG8GG=GJJGCG1GCGGJGCCGGCGGGCCGGGCG8GGGGG8C1==CGGCCCGCGGGGC8GCGGG8GGGCGCCGCCGCGGGCGGGGGGCG NH:i:0 HI:i:0 AS:i:154 nM:i:3 uT:A:1 +BCR-ABL1-10 77 * 0 0 * * 0 0 AGGTTGGGGTCATTTTCACTGGGTCCAGCGAGAAGGTTTTCCTTGGAGTTCCAACGAGCGGCTTCACTCAGACCCTGAGGCTCAAAGTCAGATGCTACTGGCCGCTGAAGGGCTTTTGAACTCTGCTTAAATCCAGTGGCTGAGTGGACG CC=GGGGGGGGGG1GJJJJJCJJJJJJJJJJJGJ=GJJJGCJJJJCJGJGCJGJJJGGJJJGGCCGGJGC=GGJ1C8GGGGGGCGCCGGGGGGCGGGCGCCCG1GGCGCGCGGGCC8GCGCGCGC8CCCGCGCGGGGGCGGGGGCGGCGG NH:i:0 HI:i:0 AS:i:181 nM:i:2 uT:A:1 +BCR-ABL1-10 141 * 0 0 * * 0 0 ATAAGGAAGATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAAAAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAGTGAAGCCGCTCGTTGGA 1CCGGCGGGGGG1GGJJJGCC1JJJJCCG=JGGJJGJJJ=GGGGGJJGGGGGGC1J=CJGCGGGGCGC(CGGGGG=GGGGG(G=CGGCGGGGCCCGC=CCCCJJCC8G1GGGGCGGGGGGCGCGGGGGGGCG=GGCCGCCGCC1G=GGGG NH:i:0 HI:i:0 AS:i:181 nM:i:2 uT:A:1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/cytobands.tsv Wed Jul 27 11:25:43 2022 +0000 @@ -0,0 +1,5 @@ +contig start end name giemsa +22 1 40586 q11.22 gpos25 +22 40586 269079 q11.23 gneg +9 1 21036 q34.11 gneg +9 21036 515509 q34.12 gpos25
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fusions.tsv Wed Jul 27 11:25:43 2022 +0000 @@ -0,0 +1,2 @@ +#gene1 gene2 strand1(gene/fusion) strand2(gene/fusion) breakpoint1 breakpoint2 site1 site2 type split_reads1 split_reads2 discordant_mates coverage1 coverage2 confidence reading_frame tags retained_protein_domains closest_genomic_breakpoint1 closest_genomic_breakpoint2 gene_id1 gene_id2 transcript_id1 transcript_id2 direction1 direction2 filters fusion_transcript peptide_sequence read_identifiers +BCR ABL1 +/+ +/+ 22:230999 9:275100 CDS/splice-site CDS/splice-site translocation 1 3 0 3 8 low in-frame . Bcr-Abl_oncoprotein_oligomerisation_domain(100%),C2_domain(100%),RhoGEF_domain(100%)|F-actin_binding(100%),Protein_kinase_domain(100%),SH2_domain(100%),SH3_domain(100%) . . ENSG00000186716 ENSG00000097007 ENST00000305877 ENST00000372348 downstream upstream . AGCTTCTCCCTGACATCCGTGGAGCTGCAGATGCTGACCAACTCGTGTGTGAAACTCCAGACTGTCCACAGCATTCCGCTGACCATCAATAAGGAAG___ATGATGAGTCTCCGGGGCTCTATGGGTTTCTGAATGTCATCGTCCACTCAGCCACTGGATTTAAGCAGAGTTCAA|AAGCCCTTCAGCGGCCAGTAGCATCTGACTTTGAGCCTCAGGGTCTGAGTGAAGCCGCTCGTTGGAACTCCAAGGAAAACCTTCTCGCTGGACCCAGTGAAAATGACCCCAACCTTTTCGTTGCACTGTATGATTTTGTGGCCAGTGGAGATAACACTCTAAGCATAACTAAAG___GTGAAAAGCTCCGGG SFSLTSVELQMLTNSCVKLQTVHSIPLTINKEDDESPGLYGFLNVIVHSATGFKQSS|kALQRPVASDFEPQGLSEAARWNSKENLLAGPSENDPNLFVALYDFVASGDNTLSITKGEKLR BCR-ABL1-4,BCR-ABL1-28,BCR-ABL1-60,BCR-ABL1-76
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/protein_domains.gff3 Wed Jul 27 11:25:43 2022 +0000 @@ -0,0 +1,83 @@ +9 pfam protein_domain 33502 33541 0 + . Name=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013 +9 pfam protein_domain 33992 34063 0 + . Name=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013 +9 pfam protein_domain 35324 35381 0 + . Name=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013 +9 pfam protein_domain 37391 37409 0 + . Name=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013 +9 pfam protein_domain 37479 37553 0 + . Name=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013 +9 pfam protein_domain 38833 38931 0 + . Name=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013 +9 pfam protein_domain 41390 41413 0 + . Name=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013 +9 pfam protein_domain 41489 41494 0 + . Name=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013 +9 pfam protein_domain 43744 43846 0 + . Name=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013 +9 pfam protein_domain 44647 44729 0 + . Name=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013 +9 pfam protein_domain 47496 47541 0 + . Name=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013 +9 pfam protein_domain 51664 51812 0 + . Name=KH domain;color=#808080;gene_id=ENSG00000107164;gene_name=FUBP3;protein_domain_id=PF00013 +9 pfam protein_domain 102331 102396 0 + . Name=Zinc finger%2C C2H2 type;color=#80FF00;gene_id=ENSG00000130711;gene_name=PRDM12;protein_domain_id=PF00096 +9 pfam protein_domain 102412 102480 0 + . Name=C2H2-type zinc finger;color=#80FF80;gene_id=ENSG00000130711;gene_name=PRDM12;protein_domain_id=PF13894 +9 pfam protein_domain 114903 114949 0 + . Name=Exosome complex exonuclease RRP4 N-terminal region;color=#FF0000;gene_id=ENSG00000130713;gene_name=EXOSC2;protein_domain_id=PF14382 +9 pfam protein_domain 116528 116596 0 + . Name=Exosome complex exonuclease RRP4 N-terminal region;color=#FF0000;gene_id=ENSG00000130713;gene_name=EXOSC2;protein_domain_id=PF14382 +9 pfam protein_domain 121951 121971 0 + . Name=KH domain;color=#000080;gene_id=ENSG00000130713;gene_name=EXOSC2;protein_domain_id=PF15985 +9 pfam protein_domain 123179 123300 0 + . Name=KH domain;color=#000080;gene_id=ENSG00000130713;gene_name=EXOSC2;protein_domain_id=PF15985 +9 pfam protein_domain 275219 275273 0 + . Name=SH3 domain;color=#FF0000;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00018 +9 pfam protein_domain 275837 275922 0 + . Name=SH3 domain;color=#FF0000;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00018 +9 pfam protein_domain 275962 276132 0 + . Name=SH2 domain;color=#80FFFF;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00017 +9 pfam protein_domain 283799 283855 0 + . Name=SH2 domain;color=#80FFFF;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00017 +9 pfam protein_domain 283973 284071 0 + . Name=Protein kinase domain;color=#80FF00;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00069 +9 pfam protein_domain 293165 293249 0 + . Name=Protein kinase domain;color=#80FF00;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00069 +9 pfam protein_domain 293896 294073 0 + . Name=Protein kinase domain;color=#80FF00;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00069 +9 pfam protein_domain 295904 296088 0 + . Name=Protein kinase domain;color=#80FF00;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00069 +9 pfam protein_domain 299451 299603 0 + . Name=Protein kinase domain;color=#80FF00;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00069 +9 pfam protein_domain 301104 301156 0 + . Name=Protein kinase domain;color=#80FF00;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF00069 +9 pfam protein_domain 306405 306716 0 + . Name=F-actin binding;color=#800000;gene_id=ENSG00000097007;gene_name=ABL1;protein_domain_id=PF08919 +9 pfam protein_domain 314470 314862 0 - . Name=Orexigenic neuropeptide Qrfp/P518 ;color=#808000;gene_id=ENSG00000188710;gene_name=QRFP;protein_domain_id=PF11109 +9 pfam protein_domain 325106 325108 0 - . Name=Fibrinogen beta and gamma chains%2C C-terminal globular domain;color=#808000;gene_id=ENSG00000130720;gene_name=FIBCD1;protein_domain_id=PF00147 +9 pfam protein_domain 325118 325359 0 - . Name=Fibrinogen beta and gamma chains%2C C-terminal globular domain;color=#808000;gene_id=ENSG00000130720;gene_name=FIBCD1;protein_domain_id=PF00147 +9 pfam protein_domain 326270 326449 0 - . Name=Fibrinogen beta and gamma chains%2C C-terminal globular domain;color=#808000;gene_id=ENSG00000130720;gene_name=FIBCD1;protein_domain_id=PF00147 +9 pfam protein_domain 332828 332924 0 - . Name=Fibrinogen beta and gamma chains%2C C-terminal globular domain;color=#808000;gene_id=ENSG00000130720;gene_name=FIBCD1;protein_domain_id=PF00147 +9 pfam protein_domain 344780 344908 0 - . Name=Fibrinogen beta and gamma chains%2C C-terminal globular domain;color=#808000;gene_id=ENSG00000130720;gene_name=FIBCD1;protein_domain_id=PF00147 +9 pfam protein_domain 430353 430623 0 + . Name=Laminin N-terminal (Domain VI);color=#000080;gene_id=ENSG00000050555;gene_name=LAMC3;protein_domain_id=PF00055 +9 pfam protein_domain 447321 447625 0 + . Name=Laminin N-terminal (Domain VI);color=#000080;gene_id=ENSG00000050555;gene_name=LAMC3;protein_domain_id=PF00055 +9 pfam protein_domain 453081 453209 0 + . Name=Laminin N-terminal (Domain VI);color=#000080;gene_id=ENSG00000050555;gene_name=LAMC3;protein_domain_id=PF00055 +9 pfam protein_domain 457203 457334 0 + . Name=Laminin EGF domain;color=#FFFFFF;gene_id=ENSG00000050555;gene_name=LAMC3;protein_domain_id=PF00053 +9 pfam protein_domain 459902 460054 0 + . Name=Laminin EGF domain;color=#FFFFFF;gene_id=ENSG00000050555;gene_name=LAMC3;protein_domain_id=PF00053 +9 pfam protein_domain 460070 460088 0 + . Name=Laminin EGF domain;color=#FFFFFF;gene_id=ENSG00000050555;gene_name=LAMC3;protein_domain_id=PF00053 +9 pfam protein_domain 460167 460282 0 + . Name=Laminin EGF domain;color=#FFFFFF;gene_id=ENSG00000050555;gene_name=LAMC3;protein_domain_id=PF00053 +9 pfam protein_domain 462677 462771 0 + . Name=Laminin EGF domain;color=#FFFFFF;gene_id=ENSG00000050555;gene_name=LAMC3;protein_domain_id=PF00053 +9 pfam protein_domain 466560 466608 0 + . Name=Laminin EGF domain;color=#FFFFFF;gene_id=ENSG00000050555;gene_name=LAMC3;protein_domain_id=PF00053 +9 pfam protein_domain 470157 470166 0 + . Name=Laminin B (Domain IV);color=#FFFF80;gene_id=ENSG00000050555;gene_name=LAMC3;protein_domain_id=PF00052 +9 pfam protein_domain 473527 473719 0 + . Name=Laminin B (Domain IV);color=#FFFF80;gene_id=ENSG00000050555;gene_name=LAMC3;protein_domain_id=PF00052 +9 pfam protein_domain 473886 474001 0 + . Name=Laminin B (Domain IV);color=#FFFF80;gene_id=ENSG00000050555;gene_name=LAMC3;protein_domain_id=PF00052 +9 pfam protein_domain 477965 478038 0 + . Name=Laminin B (Domain IV);color=#FFFF80;gene_id=ENSG00000050555;gene_name=LAMC3;protein_domain_id=PF00052 +9 pfam protein_domain 478042 478107 0 + . Name=Laminin EGF domain;color=#FFFFFF;gene_id=ENSG00000050555;gene_name=LAMC3;protein_domain_id=PF00053 +9 pfam protein_domain 478144 478183 0 + . Name=Laminin EGF domain;color=#FFFFFF;gene_id=ENSG00000050555;gene_name=LAMC3;protein_domain_id=PF00053 +9 pfam protein_domain 482071 482168 0 + . Name=Laminin EGF domain;color=#FFFFFF;gene_id=ENSG00000050555;gene_name=LAMC3;protein_domain_id=PF00053 +9 pfam protein_domain 482175 482259 0 + . Name=Laminin EGF domain;color=#FFFFFF;gene_id=ENSG00000050555;gene_name=LAMC3;protein_domain_id=PF00053 +9 pfam protein_domain 487996 488060 0 + . Name=Laminin EGF domain;color=#FFFFFF;gene_id=ENSG00000050555;gene_name=LAMC3;protein_domain_id=PF00053 +9 pfam protein_domain 488076 488222 0 + . Name=Laminin EGF domain;color=#FFFFFF;gene_id=ENSG00000050555;gene_name=LAMC3;protein_domain_id=PF00053 +9 pfam protein_domain 489116 489262 0 + . Name=Laminin EGF domain;color=#FFFFFF;gene_id=ENSG00000050555;gene_name=LAMC3;protein_domain_id=PF00053 +9 pfam protein_domain 489945 490067 0 + . Name=Laminin EGF domain;color=#FFFFFF;gene_id=ENSG00000050555;gene_name=LAMC3;protein_domain_id=PF00053 +9 pfam protein_domain 490710 490856 0 + . Name=Laminin EGF domain;color=#FFFFFF;gene_id=ENSG00000050555;gene_name=LAMC3;protein_domain_id=PF00053 +22 pfam protein_domain 2420 2524 0 - . Name=Armadillo/beta-catenin-like repeat;color=#000080;gene_id=ENSG00000100218;gene_name=RSPH14;protein_domain_id=PF00514 +22 pfam protein_domain 36321 37004 0 + . Name=G-protein alpha subunit;color=#80FFFF;gene_id=ENSG00000128266;gene_name=GNAZ;protein_domain_id=PF00503 +22 pfam protein_domain 63673 63981 0 + . Name=G-protein alpha subunit;color=#80FFFF;gene_id=ENSG00000128266;gene_name=GNAZ;protein_domain_id=PF00503 +22 pfam protein_domain 90736 90740 0 + . Name=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071 +22 pfam protein_domain 93060 93112 0 + . Name=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071 +22 pfam protein_domain 93619 93720 0 + . Name=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071 +22 pfam protein_domain 96554 96622 0 + . Name=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071 +22 pfam protein_domain 98578 98629 0 + . Name=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071 +22 pfam protein_domain 99484 99565 0 + . Name=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071 +22 pfam protein_domain 99749 99839 0 + . Name=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071 +22 pfam protein_domain 101465 101502 0 + . Name=Ras family;color=#80FFFF;gene_id=ENSG00000100228;gene_name=RAB36;protein_domain_id=PF00071 +22 pfam protein_domain 121553 121771 0 + . Name=Bcr-Abl oncoprotein oligomerisation domain;color=#FF0000;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF09036 +22 pfam protein_domain 201581 201640 0 + . Name=RhoGEF domain;color=#000000;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00621 +22 pfam protein_domain 201941 202126 0 + . Name=RhoGEF domain;color=#000000;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00621 +22 pfam protein_domain 208994 209101 0 + . Name=RhoGEF domain;color=#000000;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00621 +22 pfam protein_domain 212118 212178 0 + . Name=RhoGEF domain;color=#000000;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00621 +22 pfam protein_domain 213667 213719 0 + . Name=RhoGEF domain;color=#000000;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00621 +22 pfam protein_domain 214220 214312 0 + . Name=RhoGEF domain;color=#000000;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00621 +22 pfam protein_domain 230954 230999 0 + . Name=C2 domain;color=#00FF00;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00168 +22 pfam protein_domain 233127 233224 0 + . Name=C2 domain;color=#00FF00;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00168 +22 pfam protein_domain 235610 235741 0 + . Name=C2 domain;color=#00FF00;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00168 +22 pfam protein_domain 250010 250018 0 + . Name=C2 domain;color=#00FF00;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00168 +22 pfam protein_domain 252302 252422 0 + . Name=RhoGAP domain;color=#FFFFFF;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00620 +22 pfam protein_domain 253473 253607 0 + . Name=RhoGAP domain;color=#FFFFFF;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00620 +22 pfam protein_domain 254554 254659 0 + . Name=RhoGAP domain;color=#FFFFFF;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00620 +22 pfam protein_domain 255138 255228 0 + . Name=RhoGAP domain;color=#FFFFFF;gene_id=ENSG00000186716;gene_name=BCR;protein_domain_id=PF00620
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/all_fasta.loc.sample Wed Jul 27 11:25:43 2022 +0000 @@ -0,0 +1,18 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +# \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Wed Jul 27 11:25:43 2022 +0000 @@ -0,0 +1,7 @@ +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#" allow_duplicate_entries="False"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/all_fasta.loc" /> + </table> +</tables>