Mercurial > repos > iuc > control_freec
changeset 0:e46944a59b31 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/freec commit bec4fb59dc4776d33c2ce8c0bd614c90e5d4ecb2"
author | iuc |
---|---|
date | Thu, 13 Aug 2020 09:50:35 -0400 |
parents | |
children | 2c6349fb175c |
files | control_freec.xml macros.xml ratio2circos.py test-data/capture.bed test-data/fasta_indexes.loc test-data/genome.fasta test-data/genome.fasta.fai test-data/normal.bam test-data/output/GC_profile.targetedRegions.cnp test-data/output/control.bam_control.cpn test-data/output/sample.bam_CNVs test-data/output/sample.bam_info.txt test-data/output/sample.bam_ratio.txt test-data/output/sample.bam_sample.cpn test-data/tumor.bam tool-data/fasta_indexes.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
diffstat | 17 files changed, 995 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/control_freec.xml Thu Aug 13 09:50:35 2020 -0400 @@ -0,0 +1,278 @@ +<tool id="control_freec" name="Control-FREEC" version="@WRAPPER_VERSION@"> + <description>detects copy-number changes and allelic imbalances</description> + <macros> + <import>macros.xml</import> + </macros> + <requirements> + <requirement type="package" version="5.0.1">gawk</requirement> + <requirement type="package" version="@TOOL_VERSION@">control-freec</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + #if str($reference_source.ref_selector) == 'history': + ln -s '$reference_source.ref' ./genome.fa && + samtools faidx ./genome.fa 2>&1 || echo 'Error running samtools faidx for indexing fasta reference for control-freec' >&2 && + #else + ln -s '$reference_source.ref.fields.path' ./genome.fa && + ln -s '${reference_source.ref.fields.path}.fai' ./genome.fa.fai && + #end if + + #if int($WGS_WES.advanced_settings.window_section.window) == 0 + ln -s '$WGS_WES.input_capture_file' ./capture.bed && + #end if + + mkdir ./chromosomes && + mkdir ./output && + + awk -F " " '/^>/ {close(F); F="chromosomes/"substr($1,2)".fa"} {print >> F}' ./genome.fa && + + ln -s '$WGS_WES.input_sample_file' ./sample.bam && + #if $input_control_file + ln -s '$WGS_WES.input_control_file' ./control.bam && + #end if + + freec + -conf '$script_file' + -sample ./sample.bam + #if $WGS_WES.input_control_file + -control ./control.bam + #end if + + #if $output_section.makeGraph + && R -f `which makeGraph.R` '$WGS_WES.advanced_settings.ploidy' 'output/sample.bam_ratio.txt' + #end if + + #if $output_section.circos_data + && python '$__tool_directory__/ratio2circos.py' + -i ./output/sample.bam_ratio.BedGraph + -p '$WGS_WES.advanced_settings.ploidy' + -o sample.bam_ratio_log2_circos.txt + #end if + ]]></command> + <configfiles> + <configfile name="script_file"><![CDATA[ +#import os +#set galaxy_slots = os.getenv("GALAXY_SLOTS", 4) +[general] +chrFiles = chromosomes +outputDir = output +chrLenFile = genome.fa.fai +maxThreads = $galaxy_slots + +#if $WGS_WES.advanced_settings.degree == "34" + #set $degree_val = "3&4" +degree = $degree_val +#else +degree = $WGS_WES.advanced_settings.degree +#end if +forceGCcontentNormalization = $WGS_WES.advanced_settings.forceGCcontentNormalization +minCNAlength = $WGS_WES.advanced_settings.minCNAlength +minimalSubclonePresence = $WGS_WES.advanced_settings.minimalSubclonePresence +readCountThreshold = $WGS_WES.advanced_settings.readCountThreshold +window = $WGS_WES.advanced_settings.window_section.window +step = $WGS_WES.advanced_settings.window_section.step +coefficientOfVariation = $WGS_WES.advanced_settings.coefficientOfVariation +#if $output_section.circos_data +BedGraphOutput = TRUE +#else +BedGraphOutput = $output_section.BedGraphOutput +#end if +breakPointThreshold = $WGS_WES.advanced_settings.breakPointThreshold +breakPointType = $WGS_WES.advanced_settings.breakPointType +contaminationAdjustment = $WGS_WES.advanced_settings.contaminationAdjustment +contamination = $WGS_WES.advanced_settings.contamination +intercept = $WGS_WES.advanced_settings.intercept +minMappabilityPerWindow = $WGS_WES.advanced_settings.minMappabilityPerWindow +minExpectedGC = $WGS_WES.advanced_settings.minExpectedGC +maxExpectedGC = $WGS_WES.advanced_settings.maxExpectedGC +noisyData = $WGS_WES.advanced_settings.noisyData +ploidy = $WGS_WES.advanced_settings.ploidy +printNA = $WGS_WES.advanced_settings.printNA +sex = $WGS_WES.advanced_settings.sex +telocentromeric = $WGS_WES.advanced_settings.telocentromeric + +[sample] +mateFile = sample.bam +inputFormat = BAM +mateOrientation = $WGS_WES.mateOrientation_selector + +[control] +mateFile = control.bam +inputFormat = BAM +mateOrientation = $WGS_WES.mateOrientation_selector + +#if int($WGS_WES.advanced_settings.window_section.window) == 0 +[target] +captureRegions = capture.bed +#end if + ]]></configfile> + </configfiles> + <inputs> + <conditional name="WGS_WES"> + <param name="WGS_WES_selector" type="select" label="Select the sequencing method of the input file(s)" help=""> + <option value="WGS" selected="True">whole-genome sequencing (WGS)</option> + <option value="WES">whole-exome sequencing (WES)</option> + <option value="other">other method</option> + </param> + <when value="WGS"> + <expand macro="def_input_files" /> + <section name="advanced_settings" title="Advanced WGS settings" expanded="false"> + <expand macro="WGS" /> + <expand macro="shared" /> + </section> + </when> + <when value="WES"> + <expand macro="wes_input_files" /> + <section name="advanced_settings" title="Advanced WES settings" expanded="false"> + <expand macro="WES" /> + <expand macro="shared" /> + </section> + </when> + <when value="other"> + <expand macro="wes_input_files" /> + <section name="advanced_settings" title="Advanced settings" expanded="true"> + <expand macro="other" /> + <expand macro="shared" /> + </section> + </when> + </conditional> + <expand macro="reference_interface" /> + <expand macro="output_section" /> + </inputs> + <outputs> + <data name="out_sample_raw" format="tabular" label="${tool.name} on ${on_string}: Raw copy number profiles (sample)" from_work_dir="output/sample.bam_sample.cpn" /> + <data name="out_control_raw" format="tabular" label="${tool.name} on ${on_string}: Raw copy number profiles (control)" from_work_dir="output/control.bam_control.cpn" /> + <data name="out_sample_coord" format="tabular" label="${tool.name} on ${on_string}: Coordinates of predicted CN alterations" from_work_dir="output/sample.bam_CNVs" /> + <data name="out_sample_info" format="txt" label="${tool.name} on ${on_string}: Information about FREEC run" from_work_dir="output/sample.bam_info.txt" /> + <data name="out_sample_ratio" format="tabular" label="${tool.name} on ${on_string}: Ratios and predicted CN alterations for each window" from_work_dir="output/sample.bam_ratio.txt" /> + <data name="out_sample_subclones" format="txt" label="${tool.name} on ${on_string}: Subclones" from_work_dir="output/sample.bam_subclones.txt" > + <filter>int(WGS_WES['advanced_settings']['minimalSubclonePresence']) != 100</filter> + </data> + <data name="out_sample_bedgraph" format="bed" label="${tool.name} on ${on_string}: Ratios in BedGraph format" from_work_dir="output/sample.bam_ratio.BedGraph"> + <filter>output_section['BedGraphOutput']</filter> + </data> + <data name="out_mg_png" format="png" label="${tool.name} on ${on_string}: Normalized CN profile" from_work_dir="output/sample.bam_ratio.txt.png"> + <filter>output_section['makeGraph']</filter> + </data> + <data name="out_mg_log2_png" format="png" label="${tool.name} on ${on_string}: Normalized CN profile (log2)" from_work_dir="output/sample.bam_ratio.txt.log2.png"> + <filter>output_section['makeGraph']</filter> + </data> + <data name="out_gc_profile" format="tabular" label="${tool.name} on ${on_string}: GC-content profile" from_work_dir="output/GC_profile.targetedRegions.cnp"> + <filter>int(WGS_WES['advanced_settings']['window_section']['window']) == 0</filter> + </data> + <data name="out_ratio_log2_circos" format="tabular" label="${tool.name} on ${on_string}: Circos 2D-track data" from_work_dir="output/sample.bam_ratio_log2_circos.txt"> + <filter>output_section['circos_data']</filter> + </data> + </outputs> + <tests> + <test expect_num_outputs="5"> + <conditional name="reference_source"> + <param name="ref_selector" value="history"/> + <param name="ref" ftype="fasta" value="genome.fasta" /> + </conditional> + <param name="WGS_WES_selector" value="other" /> + <param name="input_sample_file" ftype="bam" value="tumor.bam" /> + <param name="input_control_file" ftype="bam" value="normal.bam" /> + <param name="mateOrientation_selector" value="0" /> + <param name="window" value="5000" /> + <param name="step" value="1000" /> + <output name="out_sample_raw" file="output/sample.bam_sample.cpn" /> + <output name="out_control_raw" file="output/control.bam_control.cpn" /> + <output name="out_sample_coord" file="output/sample.bam_CNVs" /> + <output name="out_sample_info" file="output/sample.bam_info.txt" /> + <output name="out_sample_ratio" file="output/sample.bam_ratio.txt" /> + </test> + <test expect_num_outputs="5"> + <conditional name="reference_source"> + <param name="ref_selector" value="cached"/> + <param name="ref" value="test_buildid"/> + </conditional> + <param name="WGS_WES_selector" value="other" /> + <param name="input_sample_file" ftype="bam" value="tumor.bam" /> + <param name="input_control_file" ftype="bam" value="normal.bam" /> + <param name="mateOrientation_selector" value="0" /> + <param name="window" value="5000" /> + <param name="step" value="1000" /> + <output name="out_sample_raw" file="output/sample.bam_sample.cpn" /> + <output name="out_control_raw" file="output/control.bam_control.cpn" /> + <output name="out_sample_coord" file="output/sample.bam_CNVs" /> + <output name="out_sample_info" file="output/sample.bam_info.txt" /> + <output name="out_sample_ratio" file="output/sample.bam_ratio.txt" /> + </test> + <test expect_num_outputs="5"> + <conditional name="reference_source"> + <param name="ref_selector" value="history"/> + <param name="ref" ftype="fasta" value="genome.fasta" /> + </conditional> + <param name="WGS_WES_selector" value="WGS" /> + <param name="input_sample_file" ftype="bam" value="tumor.bam" /> + <param name="input_control_file" ftype="bam" value="normal.bam" /> + <param name="mateOrientation_selector" value="0" /> + <param name="minimalSubclonePresence" value="100" /> + <param name="window" value="5000" /> + <param name="step" value="1000" /> + <output name="out_sample_raw" file="output/sample.bam_sample.cpn" /> + <output name="out_control_raw" file="output/control.bam_control.cpn" /> + <output name="out_sample_coord" file="output/sample.bam_CNVs" /> + <output name="out_sample_info" file="output/sample.bam_info.txt" /> + <output name="out_sample_ratio" file="output/sample.bam_ratio.txt" /> + </test> + <test expect_num_outputs="6"> + <conditional name="reference_source"> + <param name="ref_selector" value="history"/> + <param name="ref" ftype="fasta" value="genome.fasta" /> + </conditional> + <param name="WGS_WES_selector" value="WES" /> + <param name="input_sample_file" ftype="bam" value="tumor.bam" /> + <param name="input_control_file" ftype="bam" value="normal.bam" /> + <param name="input_capture_file" ftype="bed" value="capture.bed" /> + <param name="mateOrientation_selector" value="0" /> + <param name="minimalSubclonePresence" value="100" /> + <param name="forceGCcontentNormalization" value="2" /> + <param name="window" value="0" /> + <param name="step" value="0" /> + <output name="out_sample_raw"> + <assert_contents> + <has_text_matching expression="M\t\d+\t\d+\t\d+\tM:\d+-\d+"/> + </assert_contents> + </output> + <output name="out_control_raw"> + <assert_contents> + <has_text_matching expression="M\t\d+\t\d+\t\d+\tM:\d+-\d+"/> + </assert_contents> + </output> + <output name="out_sample_coord" file="output/sample.bam_CNVs" /> + <output name="out_sample_info"> + <assert_contents> + <has_text text="Program_Version"/> + </assert_contents> + </output> + <output name="out_sample_ratio"> + <assert_contents> + <has_text text="Chromosome"/> + </assert_contents> + </output> + <output name="out_gc_profile" file="output/GC_profile.targetedRegions.cnp" /> + </test> + </tests> + <help><![CDATA[ +Control-FREEC +============= + +Control-FREEC is a tool for detection of copy-number changes and allelic imbalances (including loss of heterozygosity/LOH) using deep-sequencing data originally developed by the Bioinformatics Laboratory of Institut Curie (Paris). Nowdays, Control-FREEC is supported by the team of Valentina Boeva at Institut Cochin, Inserm (Paris). + +Control-FREEC automatically computes, normalizes, segments copy number and beta allele frequency (BAF) profiles, then calls copy number alterations and LOH. The control (matched normal) sample is optional for whole genome sequencing data but mandatory for whole exome or targeted sequencing data. For whole genome sequencing data analysis, the program can also use mappability data (files created by GEM). + +**Input for CNA detection:** Aligned single-end, paired-end or mate-pair data in BAM format. + +**Output:** Regions of gain, loss and LOH and normalized copy number. + +For more information (f.e. to run Control-FREEC on non-human data) see the Control-FREEC documentation_. + +.. _documentation: http://boevalab.inf.ethz.ch/FREEC/index.html#documentation + + ]]></help> + <citations> + <citation type="doi">10.1093/bioinformatics/btr670</citation> + <citation type="doi">10.1093/bioinformatics/btq635</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Thu Aug 13 09:50:35 2020 -0400 @@ -0,0 +1,154 @@ +<macros> + <token name="@WRAPPER_VERSION@">@TOOL_VERSION@+galaxy0</token> + <token name="@TOOL_VERSION@">11.6</token> + <xml name="reference_interface"> + <conditional name="reference_source"> + <param name="ref_selector" type="select" label="Choose the source for the reference genome"> + <option value="cached">Locally cached</option> + <option value="history">History</option> + </param> + <when value="cached"> + <param argument="--ref" type="select" label="Reference genome"> + <options from_data_table="fasta_indexes"> + <!-- <filter type="data_meta" column="dbkey" key="dbkey" ref="input_sample_file" /> --> + <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file" /> + </options> + </param> + </when> + <when value="history"> + <param argument="--ref" type="data" format="fasta" label="Reference" help="Reference sequence" /> + </when> + </conditional> + </xml> + <xml name="WGS"> + <param name="degree" type="select" label="Degree of polynomial" help=""> + <option value="34" selected="True">GC-content based normalization, WGS (3&4)</option> + <option value="1">control-read-count-based normalization, WES (1)</option> + </param> + <param name="forceGCcontentNormalization" type="select" label="Read Count (RC) correction for GC-content bias and low mappability" help="Set to 1 or 2 to correct the Read Count (RC) for GC-content bias and low mappability even when you have a control sample. - Default (WGS): 0 - Default (WES): 1"> + <option value="0" selected="True">simply model "sample RC ~ Control RC" (0)</option> + <option value="1">normalize the sample and the control RC using GC-content and then calculate the ratio "Sample RC/contol RC" (1)</option> + <option value="2">model "sample RC ~ Control RC" bias, and then normalize for GC-content (2)</option> + </param> + <param name="minCNAlength" type="select" label="Minimal number of consecutive windows to call a CNA" help=""> + <option value="1" selected="True">WGS (1)</option> + <option value="3">WES (3)</option> + </param> + <param name="minimalSubclonePresence" type="integer" label="Detects subclones present in x% of cell population" value="20" help="Default: 100 (meaning "do not look for subclones") Suggested: 20 for WGS and 30 for WES." /> + <param name="readCountThreshold" type="integer" label="Threshold on the minimal number of reads per window" value="10" help="Threshold on the minimal number of reads per window in the control sample. Useful for exome-seq or targeted sequencing data. Default: 10 recommended value >=50 for for exome data." /> + <section name="window_section" title="Select window size" expanded="true"> + <param name="window" type="integer" value="50000" label="Explicit window size" help="Higher priority than coefficientOfVariation. Ex: for whole genome sequencing: "50000"; for whole exome sequencing: "0"" /> + <param name="step" type="integer" value="10000" label="Step" help="Used only when "window" is specified. Do not use for exome sequencing (instead set "0"). Ex: 10000" /> + </section> + </xml> + <xml name="WES"> + <param name="degree" type="select" label="Degree of polynomial" help=""> + <option value="34">GC-content based normalization, WGS (3&4)</option> + <option value="1" selected="True">control-read-count-based normalization, WES (1)</option> + </param> + <param name="forceGCcontentNormalization" type="select" label="Read Count (RC) correction for GC-content bias and low mappability" help="Set to 1 or 2 to correct the Read Count (RC) for GC-content bias and low mappability even when you have a control sample. - Default (WGS): 0 - Default (WES): 1"> + <option value="0">simply model "sample RC ~ Control RC" (0)</option> + <option value="1" selected="True">normalize the sample and the control RC using GC-content and then calculate the ratio "Sample RC/contol RC" (1)</option> + <option value="2">model "sample RC ~ Control RC" bias, and then normalize for GC-content (2)</option> + </param> + <param name="minCNAlength" type="select" label="Minimal number of consecutive windows to call a CNA" help=""> + <option value="1">WGS (1)</option> + <option value="3" selected="True">WES (3)</option> + </param> + <param name="minimalSubclonePresence" type="integer" label="Detects subclones present in x% of cell population" value="30" help="Default: 100 (meaning "do not look for subclones") Suggested: 20 for WGS and 30 for WES." /> + <param name="readCountThreshold" type="integer" label="Threshold on the minimal number of reads per window" value="50" help="Threshold on the minimal number of reads per window in the control sample. Useful for exome-seq or targeted sequencing data. Default: 10 recommended value >=50 for for exome data." /> + <section name="window_section" title="Select window size" expanded="false"> + <param name="window" type="integer" value="0" label="Explicit window size" help="Higher priority than coefficientOfVariation. Ex: for whole genome sequencing: "50000"; for whole exome sequencing: "0"" /> + <param name="step" type="integer" value="0" label="Step" help="Used only when "window" is specified. Do not use for exome sequencing (instead set "0"). Ex: 10000" /> + </section> + </xml> + <xml name="other"> + <param name="degree" type="select" label="Degree of polynomial" help=""> + <option value="34" selected="True">GC-content based normalization, WGS (3&4)</option> + <option value="1">control-read-count-based normalization, WES (1)</option> + </param> + <param name="forceGCcontentNormalization" type="select" label="Read Count (RC) correction for GC-content bias and low mappability" help="Set to 1 or 2 to correct the Read Count (RC) for GC-content bias and low mappability even when you have a control sample. - Default (WGS): 0 - Default (WES): 1"> + <option value="0" selected="True">simply model "sample RC ~ Control RC" (0)</option> + <option value="1">normalize the sample and the control RC using GC-content and then calculate the ratio "Sample RC/contol RC" (1)</option> + <option value="2">model "sample RC ~ Control RC" bias, and then normalize for GC-content (2)</option> + </param> + <param name="minCNAlength" type="select" label="Minimal number of consecutive windows to call a CNA" help=""> + <option value="1" selected="True">WGS (1)</option> + <option value="3">WES (3)</option> + </param> + <param name="minimalSubclonePresence" type="integer" label="Detects subclones present in x% of cell population" value="100" help="Default: 100 (meaning "do not look for subclones") Suggested: 20 for WGS and 30 for WES." /> + <param name="readCountThreshold" type="integer" label="Threshold on the minimal number of reads per window" value="10" help="Threshold on the minimal number of reads per window in the control sample. Useful for exome-seq or targeted sequencing data. Default: 10 recommended value >=50 for for exome data." /> + <section name="window_section" title="Select window size" expanded="true"> + <param name="window" type="integer" value="50000" label="Explicit window size" help="Higher priority than coefficientOfVariation. Ex: for whole genome sequencing: "50000"; for whole exome sequencing: "0"" /> + <param name="step" type="integer" value="10000" label="Step" help="Used only when "window" is specified. Do not use for exome sequencing (instead set "0"). Ex: 10000" /> + </section> + </xml> + <xml name="shared"> + <!-- general parameters --> + <param name="breakPointThreshold" type="float" label="Segmentation of normalized profiles (break point)" value="0.8" help="Positive value of threshold for segmentation of normalized profiles. Use something like 0.6 to get more segments (and thus more predicted CNVs)." /> + <param name="breakPointType" type="select" label="Desired behavior in the ambiguous regions"> + <option value="0" selected="true">the "unknown" region is attached to the "known" region on the right (0)</option> + <option value="1">make a separate fragment of this "unknown" region and then attaches it to the left or to the right region choosing the longer one (1)</option> + <option value="2">make a separate fragment of this "unknown" region and then attaches it to the left or to the right region but the "ploidy" copy number has a priority (2)</option> + <option value="3">make a separate fragment of this "unknown" region and then attaches it to the left or to the right region choosing the longer one but this "known" region should make at least half-size of the "unknown" region (3)</option> + <option value="4">make a separate fragment of this "unknown" region and do not assign any copy number to this region at all (4)</option> + </param> + <param name="coefficientOfVariation" type="float" label="Coefficient of variation to evaluate necessary window size" value="0.05" /> + <param name="contaminationAdjustment" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Adjust sample contamination?" help="a priori known value of tumor sample contamination by normal cells. Set "Yes" to correct for contamination by normal cells. If "contamination" is not provided, it will automatically evaluate the level of contamination." /> + <param name="contamination" type="float" label="Sample contamination by normal cells" value="0" help="Ex: contamination=0.25" /> + <!-- GCcontentProfile --> + <!-- gemMappabilityFile --> + <param name="intercept" type="select" label="Intercept of polynomial" help=""> + <option value="0">with a control dataset (0)</option> + <option value="1" selected="True">with GC-content (1)</option> + </param> + <param name="minMappabilityPerWindow" type="float" label="Minimal mappability per window" value="0.85" min="0" max="1" help="Only windows with fraction of mappable positions higher than or equal to this threshold will be considered (if "gemMappabilityFile" is not provided, one uses the percentage of non-N letters per window)" /> + <param name="minExpectedGC" type="float" label="Minimal expected value of the GC-content" value="0.35" min="0" max="1" help="Minimal expected value of the GC-content for the prior evaluation of "Read Count ~ GC-content" dependency. Change only if you run Control-FREEC on a bacterial genome." /> + <param name="maxExpectedGC" type="float" label="Maximal expected value of the GC-content" value="0.55" min="0" max="1" help="Maximal expected value of the GC-content for the prior evaluation of "Read Count ~ GC-content" dependency. Change only if you run Control-FREEC on a bacterial genome." /> + <param name="noisyData" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Noisy Data" help="Set "Yes" for target resequencing data (e.g., exome-seq) to avoid false positive predictions due to non-uniform capture" /> + <param name="ploidy" type="text" value="2" label="Genome ploidy" help="In case of doubt, you can set different values and Control-FREEC will select the one that explains most observed CNAs. Ex: 2 or 2,3,4" /> + <param name="printNA" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Print NA to avoid "-1"" help="Set "No" to avoid printing "-1" to the _ratio.txt files. Useful for exome-seq or targeted sequencing data." /> + <param name="sex" type="select" label="Sample sex" help=""XX" will exclude chr Y from the analysis. "XY" will not annotate one copy of chr X and Y as a loss."> + <option value="XY" selected="True">XY</option> + <option value="XX">XX</option> + <option value="no value"></option> + </param> + <param name="telocentromeric" type="integer" value="50000" label="Length of pre-telomeric and pre-centromeric regions" help="Control-FREEC will not output small CNAs and LOH found within these regions (they are likely to be false because of mappability/genome assembly issues). 50000 is OK for human/mouse genomes. Use smaller values for yeasts and flies. Do not set anything for exome-seq data." /> + <!-- uniqueMatch if true -> gemMappabilityFile is needed--> + <!-- BAF parameters --> + <!-- makePileup: path to a BED or VCF file with SNP positions --> + <!-- fastaFile --> + <!-- minimalCoveragePerPosition --> + <!-- minimalQualityPerPosition --> + <!-- shiftInQuality --> + <!-- SNPfile --> + </xml> + <xml name="mateOrientation_selector"> + <param name="mateOrientation_selector" type="select" display="radio" label="Format of reads" help="If you specify orientation of your reads then only reads mapping in the corresponding orientation will be used for calculation of copy number profiles."> + <option value="0" selected="True">single-end (0)</option> + <option value="RF">Illumina mate-pair (RF)</option> + <option value="FR">Illumina paired-end (FR)</option> + <option value="FF">SOLiD mate-pair (FF)</option> + </param> + </xml> + <xml name="input_control_sample"> + <param name="input_sample_file" type="data" format="bam" multiple="false" label="Sample file" help="Sample file in .BAM format." /> + <param name="input_control_file" type="data" format="bam" multiple="false" label="Control file" help="Control file in .BAM format." /> + </xml> + <xml name="wes_input_files"> + <expand macro="input_control_sample" /> + <param name="input_capture_file" optional="true" type="data" format="bed" multiple="false" label="BED file with capture regions" help="Capture regions in .BED format." /> + <expand macro="mateOrientation_selector" /> + </xml> + <xml name="def_input_files"> + <expand macro="input_control_sample" /> + <expand macro="mateOrientation_selector" /> + </xml> + <xml name="output_section"> + <section name="output_section" title="Outputs" expanded="false"> + <param name="BedGraphOutput" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="BedGraph Output for UCSC genome browser" help="Set "Yes" if you want an additional output in BedGraph format for the UCSC genome browser." /> + <param name="makeGraph" type="boolean" checked="false" label="Visualize normalized copy number profile with predicted CNAs" help="Currently only human genomes are supported!" /> + <param name="circos_data" type="boolean" checked="false" label="2D data track file for Circos" help="Output: chrName startPos endPos log2Ratio" /> + </section> + </xml> +</macros> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ratio2circos.py Thu Aug 13 09:50:35 2020 -0400 @@ -0,0 +1,20 @@ +import argparse +import math +import os + +parser = argparse.ArgumentParser() +parser.add_argument('-i', '--input', required=True, default='./output/sample.bam_ratio.BedGraph', type=str) +parser.add_argument('-o', '--output', required=True, default='./output/sample.bam_ratio_log2_circos.txt', type=str) +parser.add_argument('-p', '--ploidy', required=True, default=2, type=int) +args = parser.parse_args() + +path = os.path.dirname(args.input) +output = os.path.join(path, args.output) + +with open(args.input) as file: + for line in file.readlines(): + ls = line.split() + if ls[0] != "track" and float(ls[3]) > 0: + log2_ratio = math.log2(float(ls[3]) / args.ploidy) + with open(output, "a") as out: + out.write("{}\t{}\t{}\t{}\n".format(ls[0], ls[1], ls[2], log2_ratio))
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/capture.bed Thu Aug 13 09:50:35 2020 -0400 @@ -0,0 +1,37 @@ +chrM 576 647 +chrM 647 1601 +chrM 1601 1670 +chrM 1670 3229 +chrM 3229 3304 +chrM 3306 4262 +chrM 4262 4331 +chrM 4328 4400 +chrM 4401 4469 +chrM 4469 5511 +chrM 5511 5579 +chrM 5586 5655 +chrM 5656 5729 +chrM 5760 5826 +chrM 5825 5891 +chrM 5903 7445 +chrM 7445 7514 +chrM 7517 7585 +chrM 7585 8269 +chrM 8294 8364 +chrM 8365 8572 +chrM 8526 9207 +chrM 9206 9990 +chrM 9990 10058 +chrM 10058 10404 +chrM 10404 10469 +chrM 10469 10766 +chrM 10759 12137 +chrM 12137 12206 +chrM 12206 12265 +chrM 12265 12336 +chrM 12336 14148 +chrM 14148 14673 +chrM 14673 14742 +chrM 14746 15887 +chrM 15887 15953 +chrM 15955 16023
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fasta_indexes.loc Thu Aug 13 09:50:35 2020 -0400 @@ -0,0 +1,26 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Samtools indexed sequences data files. You will need +#to create these data files and then create a fasta_indexes.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The fasta_indexes.loc +#file has this format (white space characters are TAB characters): +# +# <unique_build_id> <dbkey> <display_name> <file_base_path> +# +#So, for example, if you had hg19 Canonical indexed stored in +# +# /depot/data2/galaxy/hg19/sam/, +# +#then the fasta_indexes.loc entry would look like this: +# +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +# +#and your /depot/data2/galaxy/hg19/sam/ directory +#would contain hg19canon.fa and hg19canon.fa.fai files. +# +#Your fasta_indexes.loc file should include an entry per line for +#each index set you have stored. The file in the path does actually +#exist, but it should never be directly used. Instead, the name serves +#as a prefix for the index file. For example: +# +test_buildid hg17 test_displayname ${__HERE__}/genome.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome.fasta Thu Aug 13 09:50:35 2020 -0400 @@ -0,0 +1,333 @@ +>chrM +NNNCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCAT +TTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTG +GAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATT +CTATTATTTATCGCACCTACGTTCAATATTACAGGCGAACATACCTACTA +AAGTGTGTTAATTAATTAATGCTTGTAGGACATAATAATAACAATTGAAT +GTCTGCACAGCCGCTTTCCACACAGACATCATAACAAAAAATTTCCACCA +AACCCCCCCCTCCCCCCGCTTCTGGCCACAGCACTTAAACACATCTCTGC +CAAACCCCAAAAACAAAGAACCCTAACACCAGCCTAACCAGATTTCAAAT +TTTATCTTTAGGCGGTATGCACTTTTAACAGTCACCCCCCAACTAACACA +TTATTTTCCCCTCCCACTCCCATACTACTAATCTCATCAATACAACCCCC +GCCCATCCTACCCAGCACACACACACCGCTGCTAACCCCATACCCCGAAC +CAACCAAACCCCAAAGACACCCCCCACAGTTTATGTAGCTTACCTCCTCA +AAGCAATACACTGAAAATGTTTAGACGGGCTCACATCACCCCATAAACAA +ATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGC +AAGCATCCCCGTTCCAGTGAGTTCACCCTCTAAATCACCACGATCAAAAG +GGACAAGCATCAAGCACGCAGCAATGCAGCTCAAAACGCTTAGCCTAGCC +ACACCCCCACGGGAAACAGCAGTGATTAACCTTTAGCAATAAACGAAAGT +TTAACTAAGCTATACTAACCCCAGGGTTGGTCAATTTCGTGCCAGCCACC +GCGGTCACACGATTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTT +TAGATCACCCCCTCCCCAATAAAGCTAAAACTCACCTGAGTTGTAAAAAA +CTCCAGTTGACACAAAATAGACTACGAAAGTGGCTTTAACATATCTGAAC +ACACAATAGCTAAGACCCAAACTGGGATTAGATACCCCACTATGCTTAGC +CCTAAACCTCAACAGTTAAATCAACAAAACTGCTCGCCAGAACACTACGA +GCCACAGCTTAAAACTCAAAGGACCTGGCGGTGCTTCATATCCCTCTAGA +GGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCACCTCTTGC +TCAGCCTATATACCGCCATCTTCAGCAAACCCTGATGAAGGCTACAAAGT +AAGCGCAAGTACCCACGTAAAGACGTTAGGTCAAGGTGTAGCCCATGAGG +TGGCAAGAAATGGGCTACATTTTCTACCCCAGAAAACTACGATAGCCCTT +ATGAAACTTAAGGGTCGAAGGTGGATTTAGCAGTAAACTGAGAGTAGAGT +GCTTAGTTGAACAGGGCCCTGAAGCGCGTACACACCGCCCGTCACCCTCC +TCAAGTATACTTCAAAGGACATTTAACTAAAACCCCTACGCATTTATATA +GAGGAGACAAGTCGTAACATGGTAAGTGTACTGGAAAGTGCACTTGGACG +AACCAGAGTGTAGCTTAACACAAAGCACCCAACTTACACTTAGGAGATTT +CAACTTAACTTGACCGCTCTGAGCTAAACCTAGCCCCAAACCCACTCCAC +CTTACTACCAGACAACCTTAGCCAAACCATTTACCCAAATAAAGTATAGG +CGATAGAAATTGAAACCTGGCGCAATAGATATAGTACCGCAAGGGAAAGA +TGAAAAATTATAACCAAGCATAATATAGCAAGGACTAACCCCTATACCTT +CTGCATAATGAATTAACTAGAAATAACTTTGCAAGGAGAGCCAAAGCTAA +GACCCCCGAAACCAGACGAGCTACCTAAGAACAGCTAAAAGAGCACACCC +GTCTATGTAGCAAAATAGTGGGAAGATTTATAGGTAGAGGCGACAAACCT +ACCGAGCCTGGTGATAGCTGGTTGTCCAAGATAGAATCTTAGTTCAACTT +TAAATTTGCCCACAGAACCCTCTAAATCCCCTTGTAAATTTAACTGTTAG +TCCAAAGAGGAACAGCTCTTTGGACACTAGGAAAAAACCTTGTAGAGAGA +GTAAAAAATTTAACACCCATAGTAGGCCTAAAAGCAGCCACCAATTAAGA +AAGCGTTCAAGCTCAACACCCACTACCTAAAAAATCCCAAACATATAACT +GAACTCCTCACACCCAATTGGACCAATCTATCACCCTATAGAAGAACTAA +TGTTAGTATAAGTAACATGAAAACATTCTCCTCCGCATAAGCCTGCGTCA +GATCAAAACACTGAACTGACAATTAACAGCCCAATATCTACAATCAACCA +ACAAGTCATTATTACCCTCACTGTCAACCCAACACAGGCATGCTCATAAG +GAAAGGTTAAAAAAAGTAAAAGGAACTCGGCAAACCTTACCCCGCCTGTT +TACCAAAAACATCACCTCTAGCATCACCAGTATTAGAGGCACCGCCTGCC +CAGTGACACATGTTTAACGGCCGCGGTACCCTAACCGTGCAaaggtagca +taatcacttgttccttaaatagggacctgtatgaatggctccacgagggt +tcagctgtctcttacttttaaccagtgaaattgacctgcccgtgaagagg +cgggcatgacacagcaagacgagaagaccctatggagctttaatttaTTA +ATGCAAACAGTACCTAACAAACCCACAGGTCCTAAACTACCAAACCTGCA +TTAAAAATTTCGGTTGGGGCGACCTCGGAGCAGAACCCAACCTCCGAGCA +GTACATGCTAAGACTTCACCAGTCAAAGCGAACTACTATACTCAATTGAT +CCAATAACTTGACCAACGGAACAAGTTACCCTAGGGATAACAGCGCAATC +CTATTCTAGAGTCCATATCAACAATAGGGTTTACGACCTCGATGTTGGAT +CAGGACATCCCGATGGTGCAGCCGCTATTAAAGGTTCGTTTGTTCAACGA +TTAAAGTCCTACGTGATCTGAGTTCAGACCGGAGTAATCCAGGTCGGTTT +CTATCTACTTCAAATTCCTCCCTGTACGAAAGGACAAGAGAAATAAGGCC +TACTTCACAAAGCGCCTTCCCCCGTAAATGATATCATCTCAACTTAGTAT +TATACCCACACCCACCCAAGAACAGGGTTTgttaagatggcagagcccgg +taatcgcataaaacttaaaactttacagtcagaggttcaattcctcttct +taacaacaTACCCATGGCCAACCTCCTACTCCTCATTGTACCCATTCTAA +TCGCAATGGCATTCCTAATGCTTACCGAACGAAAAATTCTAGGCTATATA +CAACTACGCAAAGGCCCCAACGTTGTAGGCCCCTACGGGCTACTACAACC +CTTCGCTGACGCCATAAAACTCTTCACCAAAGAGCCCCTAAAACCCGCCA +CATCTACCATCACCCTCTACATCACCGCCCCGACCTTAGCTCTCACCATC +GCTCTTCTACTATGAACCCCCCTCCCCATACCCAACCCCCTGGTCAACCT +CAACCTAGGCCTCCTATTTATTCTAGCCACCTCTAGCCTAGCCGTTTACT +CAATCCTCTGATCAGGGTGAGCATCAAACTCAAACTACGCCCTGATCGGC +GCACTGCGAGCAGTAGCCCAAACAATCTCATATGAAGTCACCCTAGCCAT +CATTCTACTATCAACATTACTAATAAGTGGCTCCTTTAACCTCTCCACCC +TTATCACAACACAAGAACACCTCTGATTACTCCTGCCATCATGACCCTTG +GCCATAATATGATTTATCTCCACACTAGCAGAGACCAACCGAACCCCCTT +CGACCTTGCCGAAGGGGAGTCCGAACTAGTCTCAGGCTTCAACATCGAAT +ACGCCGCAGGCCCCTTCGCCCTATTCTTCATAGCCGAATACACAAACATT +ATTATAATAAACACCCTCACCACTACAATCTTCCTAGGAACAACATATGA +CGCACTCTCCCCTGAACTCTACACAACATATTTTGTCACCAAGACCCTAC +TTCTAACCTCCCTGTTCTTATGAATTCGAACAGCATACCCCCGATTCCGC +TACGACCAACTCATACACCTCCTATGAAAAAACTTCCTACCACTCACCCT +AGCATTACTTATATGATATGTCTCCATACCCATTACAATCTCCAGCATTC +CCCCTCAAACCTAAGAAATATGTCTGATAAAAGAGTTACTTTGATAGAGT +AAATAATAGGAGCTTAAACCCCCTTATTTctaggactatgagaatcgaac +ccatccctgagaatccaaaattctccgtgccacctatcacaccccatcct +aAAGTAAGGTCAGCTAAATAAGCTATCGGGCCCATACCCCGAAAATGTTG +GTTATACCCTTCCCGTACTAATTAATCCCCTGGCCCAACCCGTCATCTAC +TCTACCATCTTTGCAGGCACACTCATCACAGCGCTAAGCTCGCACTGATT +TTTTACCTGAGTAGGCCTAGAAATAAACATGCTAGCTTTTATTCCAGTTC +TAACCAAAAAAATAAACCCTCGTTCCACAGAAGCTGCCATCAAGTATTTC +CTCACGCAAGCAACCGCATCCATAATCCTTCTAATAGCTATCCTCTTCAA +CAATATACTCTCCGGACAATGAACCATAACCAATACTACCAATCAATACT +CATCATTAATAATCATAATGGCTATAGCAATAAAACTAGGAATAGCCCCC +TTTCACTTCTGAGTCCCAGAGGTTACCCAAGGCACCCCTCTGACATCCGG +CCTGCTTCTTCTCACATGACAAAAACTAGCCCCCATCTCAATCATATACC +AAATCTCTCCCTCACTAAACGTAAGCCTTCTCCTCACTCTCTCAATCTTA +TCCATCATAGCAGGCAGTTGAGGTGGATTAAACCAAACCCAGCTACGCAA +AATCTTAGCATACTCCTCAATTACCCACATAGGATGAATAATAGCAGTTC +TACCGTACAACCCTAACATAACCATTCTTAATTTAACTATTTATATTATC +CTAACTACTACCGCATTCCTACTACTCAACTTAAACTCCAGCACCACGAC +CCTACTACTATCTCGCACCTGAAACAAGCTAACATGACTAACACCCTTAA +TTCCATCCACCCTCCTCTCCCTAGGAGGCCTGCCCCCGCTAACCGGCTTT +TTGCCCAAATGGGCCATTATCGAAGAATTCACAAAAAACAATAGCCTCAT +CATCCCCACCATCATAGCCACCATCACCCTCCTTAACCTCTACTTCTACC +TACGCCTAATCTACTCCACCTCAATCACACTACTCCCCATATCTAACAAC +GTAAAAATAAAATGACAGTTTGAACATACAAAACCCACCCCATTCCTCCC +CACACTCATCGCCCTTACCACGCTACTCCTACCTATCTCCCCTTTTATAC +TAATAATCTTATAGAAATTTAGGTTAAATACAGACCAAGAGCCTTCAAAG +CCCTCAGTAAGTTGCAATACTTAATTTCTGCAACAGCTAAGGACTGCAAA +ACCCCACTCTGCATCAACTGAACGCAAATCAGCCACTTTAATTAAGCTAA +GCCCTTACTAGACCAATGGGACTTAAACCCACAAACACTTAGTTAACAGC +TAAGCACCCTAATCAACTGGCTTCAATCTACTTCTCCCGCCGCCGGGAAA +AAAGGCGGGAGAAGCCCCGGCAGGTTTGAAGCTGCTTCTTCGAATTTGCA +ATTCAATATGAAAATCACCTCGGAGCTGGTAAAAAGAGGCCTAACCCCTG +TCTTTAGATTTACAGTCCAATGCTTCACTCAGCCATTTTACCTCACCCCC +ACTGATGTTCGCCGACCGTTGACTATTCTCTACAAACCACAAAGACATTG +GAACACTATACCTATTATTCGGCGCATGAGCTGGAGTCCTAGGCACAGCT +CTAAGCCTCCTTATTCGAGCCGAGCTGGGCCAGCCAGGCAACCTTCTAGG +TAACGACCACATCTACAACGTTATCGTCACAGCCCATGCATTTGTAATAA +TCTTCTTCATAGTAATACCCATCATAATCGGAGGCTTTGGCAACTGACTA +GTTCCCCTAATAATCGGTGCCCCCGATATGGCGTTTCCCCGCATAAACAA +CATAAGCTTCTGACTCTTACCTCCCTCTCTCCTACTCCTGCTCGCATCTG +CTATAGTGGAGGCCGGAGCAGGAACAGGTTGAACAGTCTACCCTCCCTTA +GCAGGGAACTACTCCCACCCTGGAGCCTCCGTAGACCTAACCATCTTCTC +CTTACACCTAGCAGGTGTCTCCTCTATCTTAGGGGCCATCAATTTCATCA +CAACAATTATCAATATAAAACCCCCTGCCATAACCCAATACCAAACGCCC +CTCTTCGTCTGATCCGTCCTAATCACAGCAGTCCTACTTCTCCTATCTCT +CCCAGTCCTAGCTGCTGGCATCACTATACTACTAACAGACCGCAACCTCA +ACACCACCTTCTTCGACCCCGCCGGAGGAGGAGACCCCATTCTATACCAA +CACCTATTCTGATTTTTCGGTCACCCTGAAGTTTATATTCTTATCCTACC +AGGCTTCGGAATAATCTCCCATATTGTAACTTACTACTCCGGAAAAAAAG +AACCATTTGGATACATAGGTATGGTCTGAGCTATGATATCAATTGGCTTC +CTAGGGTTTATCGTGTGAGCACACCATATATTTACAGTAGGAATAGACGT +AGACACACGAGCATATTTCACCTCCGCTACCATAATCATCGCTATCCCCA +CCGGCGTCAAAGTATTTAGCTGACTCGCCACACTCCACGGAAGCAATATG +AAATGATCTGCTGCAGTGCTCTGAGCCCTAGGATTCATCTTTCTTTTCAC +CGTAGGTGGCCTGACTGGCATTGTATTAGCAAACTCATCACTAGACATCG +TACTACACGACACGTACTACGTTGTAGCTCACTTCCACTATGTCCTATCA +ATAGGAGCTGTATTTGCCATCATAGGAGGCTTCATTCACTGATTTCCCCT +ATTCTCAGGCTACACCCTAGACCAAACCTACGCCAAAATCCATTTCACTA +TCATATTCATCGGCGTAAATCTAACTTTCTTCCCACAACACTTTCTCGGC +CTATCCGGAATGCCCCGACGTTACTCGGACTACCCCGATGCATACACCAC +ATGAAACATCCTATCATCTGTAGGCTCATTCATTTCTCTAACAGCAGTAA +TATTAATAATTTTCATGATTTGAGAAGCCTTCGCTTCGAAGCGAAAAGTC +CTAATAGTAGAAGAACCCTCCATAAACCTGGAGTGACTATATGGATGCCC +CCCACCCTACCACACATTCGAAGAACCCGTATACATAAAATCTAGACAaa +aaaggaaggaatcgaaccccccaaagctggtttcaagccaaccccatggc +ctccatgactttttcAAAAAGGTATTAGAAAAACCATTTCATAACTTTGT +CAAAGTTAAATTATAGGCTAAATCCTATATATCTTAATGGCACATGCAGC +GCAAGTAGGTCTACAAGACGCTACTTCCCCTATCATAGAAGAGCTTATCA +CCTTTCATGATCACGCCCTCATAATCATTTTCCTTATCTGCTTCCTAGTC +CTGTATGCCCTTTTCCTAACACTCACAACAAAACTAACTAATACTAACAT +CTCAGACGCTCAGGAAATAGAAACCGTCTGAACTATCCTGCCCGCCATCA +TCCTAGTCCTCATCGCCCTCCCATCCCTACGCATCCTTTACATAACAGAC +GAGGTCAACGATCCCTCCCTTACCATCAAATCAATTGGCCACCAATGGTA +CTGAACCTACGAGTACACCGACTACGGCGGACTAATCTTCAACTCCTACA +TACTTCCCCCATTATTCCTAGAACCAGGCGACCTGCGACTCCTTGACGTT +GACAATCGAGTAGTACTCCCGATTGAAGCCCCCATTCGTATAATAATTAC +ATCACAAGACGTCTTGCACTCATGAGCTGTCCCCACATTAGGCTTAAAAA +CAGATGCAATTCCCGGACGTCTAAACCAAACCACTTTCACCGCTACACGA +CCGGGGGTATACTACGGTCAATGCTCTGAAATCTGTGGAGCAAACCACAG +TTTCATGCCCATCGTCCTAGAATTAATTCCCCTAAAAATCTTTGAAATAG +GGCCCGTATTTACCCTATAGCACCCCCTCTACCCCCTCTAGAGCCCACTG +TAAAGCTAACTTAGCATTAACCTTTTAAGTTAAAGATTAAGAGAACCAAC +ACCTCTTTACAGTGAAATGCCCCAACTAAATACTACCGTATGGCCCACCA +TAATTACCCCCATACTCCTTACACTATTCCTCATCACCCAACTAAAAATA +TTAAACACAAACTACCACCTACCTCCCTCACCAAAGCCCATAAAAATAAA +AAATTATAACAAACCCTGAGAACCAAAATGAACGAAAATCTGTTCGCTTC +ATTCATTGCCCCCACAATCCTAGGCCTACCCGCCGCAGTACTGATCATTC +TATTTCCCCCTCTATTGATCCCCACCTCCAAATATCTCATCAACAACCGA +CTAATCACCACCCAACAATGACTAATCAAACTAACCTCAAAACAAATGAT +AGCCATACACAACACTAAAGGACGAACCTGATCTCTTATACTAGTATCCT +TAATCATTTTTATTGCCACAACTAACCTCCTCGGACTCCTGCCTCACTCA +TTTACACCAACCACCCAACTATCTATAAACCTAGCCATGGCCATCCCCTT +ATGAGCGGGCGCAGTGATTATAGGCTTTCGCTCTAAGATTAAAAATGCCC +TAGCCCACTTCTTACCACAAGGCACACCTACACCCCTTATCCCCATACTA +GTTATTATCGAAACCATCAGCCTACTCATTCAACCAATAGCCCTGGCCGT +ACGCCTAACCGCTAACATTACTGCAGGCCACCTACTCATGCACCTAATTG +GAAGCGCCACCCTAGCAATATCAACCATTAACCTTCCCTCTACACTTATC +ATCTTCACAATTCTAATTCTACTGACTATCCTAGAAATCGCTGTCGCCTT +AATCCAAGCCTACGTTTTCACACTTCTAGTAAGCCTCTACCTGCACGACA +ACACATAATGACCCACCAATCACATGCCTATCATATAGTAAAACCCAGCC +CATGACCCCTAACAGGGGCCCTCTCAGCCCTCCTAATGACCTCCGGCCTA +GCCATGTGATTTCACTTCCACTCCATAACGCTCCTCATACTAGGCCTACT +AACCAACACACTAACCATATACCAATGGTGGCGCGATGTAACACGAGAAA +GCACATACCAAGGCCACCACACACCACCTGTCCAAAAAGGCCTTCGATAC +GGGATAATCCTATTTATTACCTCAGAAGTTTTTTTCTTCGCAGGATTTTT +CTGAGCCTTTTACCACTCCAGCCTAGCCCCTACCCCCCAACTAGGAGGGC +ACTGGCCCCCAACAGGCATCACCCCGCTAAATCCCCTAGAAGTCCCACTC +CTAAACACATCCGTATTACTCGCATCAGGAGTATCAATCACCTGAGCTCA +CCATAGTCTAATAGAAAACAACCGAAACCAAATAATTCAAGCACTGCTTA +TTACAATTTTACTGGGTCTCTATTTTACCCTCCTACAAGCCTCAGAGTAC +TTCGAGTCTCCCTTCACCATTTCCGACGGCATCTACGGCTCAACATTTTT +TGTAGCCACAGGCTTCCACGGACTTCACGTCATTATTGGCTCAACTTTCC +TCACTATCTGCTTCATCCGCCAACTAATATTTCACTTTACATCCAAACAT +CACTTTGGCTTCGAAGCCGCCGCCTGATACTGGCATTTTGTAGATGTGGT +TTGACTATTTCTGTATGTCTCCATCTATTGATGAGGGTCTTACTCTTTTA +GTATAAATAGTACCGTTAACTTCCAATTAACTAGTTTTGACAACATTCAA +AAAAGAGTAATAAACTTCGCCTTAATTTTAATAATCAACACCCTCCTAGC +CTTACTACTAATAATTATTACATTTTGACTACCACAACTCAACGGCTACA +TAGAAAAATCCACCCCTTACGAGTGCGGCTTCGACCCTATATCCCCCGCC +CGCGTCCCTTTCTCCATAAAATTCTTCTTAGTAGCTATTACCTTCTTATT +ATTTGATCTAGAAATTGCCCTCCTTTTACCCCTACCATGAGCCCTACAAA +CAACTAACCTGCCACTAATAGTTATGTCATCCCTCTTATTAATCATCATC +CTAGCCCTAAGTCTGGCCTATGAGTGACTACAAAAAGGATTAGACTGAGC +CGAATTGGTATATAGTTTAAACAAAACGAATGATTTCGACTCATTAAATT +ATGATAATCATATTTACCAAATGCCCCTCATTTACATAAATATTATACTA +GCATTTACCATCTCACTTCTAGGAATACTAGTATATCGCTCACACCTCAT +ATCCTCCCTACTATGCCTAGAAGGAATAATACTATCGCTGTTCATTATAG +CTACTCTCATAACCCTCAACACCCACTCCCTCTTAGCCAATATTGTGCCT +ATTGCCATACTAGTCTTTGCCGCCTGCGAAGCAGCGGTGGGCCTAGCCCT +ACTAGTCTCAATCTCCAACACATATGGCCTAGACTACGTACATAACCTAA +ACCTACTCCAATGCTAAAACTAATCGTCCCAACAATTATATTACTACCAC +TGACATGACTTTCCAAAAAGCACATAATTTGAATCAACACAACCACCCAC +AGCCTAATTATTAGCATCATCCCCCTACTATTTTTTAACCAAATCAACAA +CAACCTATTTAGCTGTTCCCCAACCTTTTCCTCCGACCCCCTAACAACCC +CCCTCCTAATACTAACTACCTGACTCCTACCCCTCACAATCATGGCAAGC +CAACGCCACTTATCCAGCGAACCACTATCACGAAAAAAACTCTACCTCTC +TATACTAATCTCCCTACAAATCTCCTTAATTATAACATTCACAGCCACAG +AACTAATCATATTTTATATCTTCTTCGAAACCACACTTATCCCCACCTTG +GCTATCATCACCCGATGAGGCAACCAGCCAGAACGCCTGAACGCAGGCAC +ATACTTCCTATTCTACACCCTAGTAGGCTCCCTTCCCCTACTCATCGCAC +TAATTTACACTCACAACACCCTAGGCTCACTAAACATTCTACTACTCACT +CTCACTGCCCAAGAACTATCAAACTCCTGAGCCAACAACTTAATATGACT +AGCTTACACAATAGCTTTTATAGTAAAGATACCTCTTTACGGACTCCACT +TATGACTCCCTAAAGCCCATGTCGAAGCCCCCATCGCTGGGTCAATAGTA +CTTGCCGCAGTACTCTTAAAACTAGGCGGCTATGGTATAATACGCCTCAC +ACTCATTCTCAACCCCCTGACAAAACACATAGCCTACCCCTTCCTTGTAC +TATCCCTATGAGGCATAATTATAACAAGCTCCATCTGCCTACGACAAACA +GACCTAAAATCGCTCATTGCATACTCTTCAATCAGCCACATAGCCCTCGT +AGTAACAGCCATTCTCATCCAAACCCCCTGAAGCTTCACCGGCGCAGTCA +TTCTCATAATCGCCCACGGACTCACATCCTCATTACTATTCTGCCTAGCA +AACTCAAACTACGAACGCACTCACAGTCGCATCATAATCCTCTCTCAAGG +ACTTCAAACTCTACTCCCACTAATAGCTTTTTGATGACTTCTAGCAAGCC +TCGCTAACCTCGCCTTACCCCCCACTATTAACCTACTGGGAGAACTCTCT +GTGCTAGTAACCACGTTCTCCTGATCAAATATCACTCTCCTACTTACAGG +ACTCAACATACTAGTCACAGCCCTATACTCCCTCTACATATTTACCACAA +CACAATGGGGCTCACTCACCCACCACATTAACAACATAAAACCCTCATTC +ACACGAGAAAACACCCTCATGTTCATACACCTATCCCCCATTCTCCTCCT +ATCCCTCAACCCCGACATCATTACCGGGTTTTCCTCTTGTAAATATAGTT +TAACCAAAACATCAGATTGTGAATCTGACAACAGAGGCTTACGACCCCTT +ATTTACCGAGAAAGCTCACAAGAACTGCTAACTCATGCCCCCATGTCTAA +CAACATGGCTTTCTCAACTTTTAAAGGATAACAGCTATCCATTGGTCTTA +GGCCCCAAAAATTTTGGTGCAACTCCAAATAAAAGTAATAACCATGCACA +CTACTATAACCACCCTAACCCTGACTTCCCTAATTCCCCCCATCCTTACC +ACCCTCGTTAACCCTAACAAAAAAAACTCATACCCCCATTATGTAAAATC +CATTGTCGCATCCACCTTTATTATCAGTCTCTTCCCCACAACAATATTCA +TGTGCCTAGACCAAGAAGTTATTATCTCGAACTGACACTGAGCCACAACC +CAAACAACCCAGCTCTCCCTAAGCTTCAAACTAGACTACTTCTCCATAAT +ATTCATCCCTGTAGCATTGTTCGTTACATGGTCCATCATAGAATTCTCAC +TGTGATATATAAACTCAGACCCAAACATTAATCAGTTCTTCAAATATCTA +CTCATTTTCCTAATTACCATACTAATCTTAGTTACCGCTAACAACCTATT +CCAACTGTTCATCGGCTGAGAGGGCGTAGGAATTATATCCTTCTTGCTCA +TCAGTTGATGATACGCCCGAGCAGATGCCAACACAGCAGCCATTCAAGCA +GTCCTATACAACCGTATCGGCGATATCGGTTTCATCCTCGCCTTAGCATG +ATTTATCCTACACTCCAACTCATGAGACCCACAACAAATAGCCCTTCTAA +ACGCTAATCCAAGCCTCACCCCACTACTAGGCCTCCTCCTAGCAGCAGCA +GGCAAATCAGCCCAATTAGGTCTCCACCCCTGACTCCCCTCAGCCATAGA +AGGCCCCACCCCAGTCTCAGCCCTACTCCACTCAAGCACTATAGTTGTAG +CAGGAATCTTCTTACTCATCCGCTTCCACCCCCTAGCAGAAAATAGCCCA +CTAATCCAAACTCTAACACTATGCTTAGGCGCTATCACCACTCTGTTCGC +AGCAGTCTGCGCCCTTACACAAAATGACATCAAAAAAATCGTAGCCTTCT +CCACTTCAAGTCAACTAGGACTCATAATAGTTACAATCGGCATCAACCAA +CCACACCTAGCATTCCTGCACATCTGTACCCACGCCTTCTTCAAAGCCAT +ACTATTTATGTGCTCCGGGTCCATCATCCACAACCTTAACAATGAACAAG +ATATTCGAAAAATAGGAGGACTACTCAAAACCATACCTCTCACTTCAACC +TCCCTCACCATTGGCAGCCTAGCATTAGCAGGAATACCTTTCCTCACAGG +TTTCTACTCCAAAGACCACATCATCGAAACCGCAAACATATCATACACAA +ACGCCTGAGCCCTATCTATTACTCTCATCGCTACCTCCCTGACAAGCGCC +TATAGCACTCGAATAATTCTTCTCACCCTAACAGGTCAACCTCGCTTCCC +CACCCTTACTAACATTAACGAAAATAACCCCACCCTACTAAACCCCATTA +AACGCCTGGCAGCCGGAAGCCTATTCGCAGGATTTCTCATTACTAACAAC +ATTTCCCCCGCATCCCCCTTCCAAACAACAATCCCCCTCTACCTAAAACT +CACAGCCCTCGCTGTCACTTTCCTAGGACTTCTAACAGCCCTAGACCTCA +ACTACCTAACCAACAAACTTAAAATAAAATCCCCACTATGCACATTTTAT +TTCTCCAACATACTCGGATTCTACCCTAGCATCACACACCGCACAATCCC +CTATCTAGGCCTTCTTACGAGCCAAAACCTGCCCCTACTCCTCCTAGACC +TAACCTGACTAGAAAAGCTATTACCTAAAACAATTTCACAGCACCAAATC +TCCACCTCCATCATCACCTCAACCCAAAAAGGCATAATTAAACTTTACTT +CCTCTCTTTCTTCTTCCCACTCATCCTAACCCTACTCCTAATCACATAAC +CTATTCCCCCGAGCAATCTCAATTACAATATATACACCAACAAACAATGT +TCAACCAGTAACCACTACTAATCAACGCCCATAATCATACAAAGCCCCCG +CACCAATAGGATCCTCCCGAATCAACCCTGACCCCTCTCCTTCATAAATT +ATTCAGCTTCCTACACTATTAAAGTTTACCACAACCACCACCCCATCATA +CTCTTTCACCCACAGCACCAATCCTACCTCCATCGCTAACCCCACTAAAA +CACTCACCAAGACCTCAACCCCTGACCCCCATGCCTCAGGATACTCCTCA +ATAGCCATCGCTGTAGTATATCCAAAGACAACCATCATTCCCCCTAAATA +AATTAAAAAAACTATTAAACCCATATAACCTCCCCCAAAATTCAGAATAA +TAACACACCCGACCACACCGCTAACAATCAGTACTAAACCCCCATAAATA +GGAGAAGGCTTAGAAGAAAACCCCACAAACCCCATTACTAAACCCACACT +CAACAGAAACAAAGCATACATCATTATTCTCGCACGGACTACAACCACGA +CCAATGATATGAAAAACCATCGTTGTATTTCAACTACAAGAACACCAATG +ACCCCAATACGCAAAATTAACCCCCTAATAAAATTAATTAACCACTCATT +CATCGACCTCCCCACCCCATCCAACATCTCCGCATGATGAAACTTCGGCT +CACTCCTTGGCGCCTGCCTGATCCTCCAAATCACCACAGGACTATTCCTA +GCCATACACTACTCACCAGACGCCTCAACCGCCTTTTCATCAATCGCCCA +CATCACTCGAGACGTAAATTATGGCTGAATCATCCGCTACCTTCACGCCA +ATGGCGCCTCAATATTCTTTATCTGCCTCTTCCTACACATCGGGCGAGGC +CTATATTACGGATCATTTCTCTACTCAGAAACCTGAAACATCGGCATTAT +CCTCCTGCTTGCAACTATAGCAACAGCCTTCATAGGCTATGTCCTCCCGT +GAGGCCAAATATCATTCTGAGGGGCCACAGTAATTACAAACTTACTATCC +GCCATCCCATACATTGGGACAGACCTAGTTCAATGAATCTGAGGAGGCTA +CTCAGTAGACAGTCCCACCCTCACACGATTCTTTACCTTTCACTTCATCT +TACCCTTCATTATTGCAGCCCTAGCAGCACTCCACCTCCTATTCTTGCAC +GAAACGGGATCAAACAACCCCCTAGGAATCACCTCCCATTCCGATAAAAT +CACCTTCCACCCTTACTACACAATCAAAGACGCCCTCGGCTTACTTCTCT +TCCTTCTCTCCTTAATGACATTAACACTATTCTCACCAGACCTCCTAGGC +GACCCAGACAATTATACCCTAGCCAACCCCTTAAACACCCCTCCCCACAT +CAAGCCCGAATGATATTTCCTATTCGCCTACACAATTCTCCGATCCGTCC +CTAACAAACTAGGAGGCGTCCTTGCCCTATTACTATCCATCCTCATCCTA +GCAATAATCCCCATCCTCCATATATCCAAACAACAAAGCATAATATTTCG +CCCACTAAGCCAATCACTTTATTGACTCCTAGCCGCAGACCTCCTCATTC +TAACCTGAATCGGAGGACAACCAGTAAGCTACCCTTTTACCATCATTGGA +CAAGTAGCATCCGTACTATACTTCACAACAATCCTAATCCTAATACCAAC +TATCTCCCTAATTGAAAACAAAATACTCAAATGGGCCTGTCCTTGTAGTA +TAAACTAATACACCAGTCTTGTAAACCGGAGACGAAAACCTTTTTCCAAG +GACAAATCAGAGAAAAAGTCTTTAACTCCACCATTAGCACCCAAAGCTAA +GATTCTAATTTAAACTATTCTCTGTTCTTTCATGGGGAAGCAGATTTGGG +TACCACCCAAGTATTGACTCACCCATCAACAACCGCTATGTATTTCGTAC +ATTACTGCCAGCCACCATGAATATTGTACGGTACCATAAATACTTGACCA +CCTGTAGTACATAAAAACCCAACCCACATCAAACCCCCCCCCCCCATGCT +TACAAGCAAGTACAGCAATCAACCTTCAACTATCACACATCAACTGCAAC +TCCAAAGCCACCCCTCACCCACTAGGATACCAACAAACCTACCCACCCTT +AACAGTACATAGTACATAAAGTCATTTACCGTACATAGCACATTACAGTC +AAATCCCTTCTCGTCCCCATGGATGACCCCCCTCAGATAGGGGTCCCTTG +ACCACCATCCTCCGTGAAATCAATATCCCGCACAAGAGTGCTACTCTCCT +CGCTCCGGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGAC +ATCTGGTTCCTACTTCAGGGCCATAAAGCCTAAATAGCCCACACGTTCCC +CTTAAATAAGACATCACGATG
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genome.fasta.fai Thu Aug 13 09:50:35 2020 -0400 @@ -0,0 +1,1 @@ +chrM 16571 6 50 51
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output/GC_profile.targetedRegions.cnp Thu Aug 13 09:50:35 2020 -0400 @@ -0,0 +1,37 @@ +M 576 0.416667 1 +M 647 0.456544 1 +M 1601 0.428571 1 +M 1670 0.430128 1 +M 3229 0.381579 1 +M 3306 0.477534 1 +M 4262 0.285714 1 +M 4328 0.479452 1 +M 4401 0.449275 1 +M 4469 0.430489 1 +M 5511 0.347826 1 +M 5586 0.442857 1 +M 5656 0.405405 1 +M 5760 0.462687 1 +M 5825 0.432836 1 +M 5903 0.462087 1 +M 7445 0.485714 1 +M 7517 0.231884 1 +M 7585 0.461314 1 +M 8294 0.338028 1 +M 8365 0.394231 1 +M 8526 0.444282 1 +M 9206 0.467516 1 +M 9990 0.26087 1 +M 10058 0.403458 1 +M 10404 0.227273 1 +M 10469 0.42953 1 +M 10759 0.444525 1 +M 12137 0.357143 1 +M 12206 0.466667 1 +M 12265 0.347222 1 +M 12336 0.44898 1 +M 14148 0.429658 1 +M 14673 0.385714 1 +M 14746 0.460595 1 +M 15887 0.38806 1 +M 15955 0.333333 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output/control.bam_control.cpn Thu Aug 13 09:50:35 2020 -0400 @@ -0,0 +1,17 @@ +M 0 4999 2091 +M 1000 5999 535 +M 2000 6999 586 +M 3000 7999 552 +M 4000 8999 515 +M 5000 9999 506 +M 6000 10999 414 +M 7000 11999 402 +M 8000 12999 407 +M 9000 13999 445 +M 10000 14999 589 +M 11000 15999 640 +M 12000 16999 594 +M 13000 17999 482 +M 14000 18999 378 +M 15000 19999 162 +M 16000 20999 66
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output/sample.bam_info.txt Thu Aug 13 09:50:35 2020 -0400 @@ -0,0 +1,13 @@ +Program_Version v11.6 +Sample_Name sample.bam +Control_Used True +CGcontent_Used False +Mappability_Used False +Looking_For_Subclones False +Breakpoint_Threshold 0.8 +Window 5000 +Number_Of_Reads|Pairs_In_Sample 657 +Number_Of_Reads|Pairs_In_Control 3348 +Output_Ploidy 2 +Sample_Purity 1 +Good_Polynomial_Fit True
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output/sample.bam_ratio.txt Thu Aug 13 09:50:35 2020 -0400 @@ -0,0 +1,18 @@ +Chromosome Start Ratio MedianRatio CopyNumber +M 1 0.0141987 1.0003 2 +M 1001 1.0003 1.0003 2 +M 2001 0.946506 1.0003 2 +M 3001 1.00877 1.0003 2 +M 4001 1.01629 1.0003 2 +M 5001 1.00938 1.0003 2 +M 6001 0.911101 1.0003 2 +M 7001 1.03405 1.0003 2 +M 8001 1.04266 1.0003 2 +M 9001 0.997997 1.0003 2 +M 10001 1.08042 1.0003 2 +M 11001 1.01 1.0003 2 +M 12001 0.942906 1.0003 2 +M 13001 0.803137 1.0003 2 +M 14001 0.73252 1.0003 2 +M 15001 -1 1.0003 2 +M 16001 -1 1.0003 2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output/sample.bam_sample.cpn Thu Aug 13 09:50:35 2020 -0400 @@ -0,0 +1,17 @@ +M 0 4999 395 +M 1000 5999 110 +M 2000 6999 109 +M 3000 7999 112 +M 4000 8999 111 +M 5000 9999 110 +M 6000 10999 93 +M 7000 11999 103 +M 8000 12999 105 +M 9000 13999 106 +M 10000 14999 125 +M 11000 15999 132 +M 12000 16999 110 +M 13000 17999 87 +M 14000 18999 68 +M 15000 19999 27 +M 16000 20999 8
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/fasta_indexes.loc.sample Thu Aug 13 09:50:35 2020 -0400 @@ -0,0 +1,29 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Samtools indexed sequences data files. You will need +#to create these data files and then create a fasta_indexes.loc file +#similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The fasta_indexes.loc +#file has this format (white space characters are TAB characters): +# +# <unique_build_id> <dbkey> <display_name> <file_base_path> +# +#So, for example, if you had hg19 Canonical indexed stored in +# +# /depot/data2/galaxy/hg19/sam/, +# +#then the fasta_indexes.loc entry would look like this: +# +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +# +#and your /depot/data2/galaxy/hg19/sam/ directory +#would contain hg19canon.fa and hg19canon.fa.fai files. +# +#Your fasta_indexes.loc file should include an entry per line for +#each index set you have stored. The file in the path does actually +#exist, but it should never be directly used. Instead, the name serves +#as a prefix for the index file. For example: +# +#hg18canon hg18 Human (Homo sapiens): hg18 Canonical /depot/data2/galaxy/hg18/sam/hg18canon.fa +#hg18full hg18 Human (Homo sapiens): hg18 Full /depot/data2/galaxy/hg18/sam/hg18full.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /depot/data2/galaxy/hg19/sam/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /depot/data2/galaxy/hg19/sam/hg19full.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Thu Aug 13 09:50:35 2020 -0400 @@ -0,0 +1,8 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <!-- Location of SAMTools indexes for FASTA files --> + <table name="fasta_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/fasta_indexes.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Thu Aug 13 09:50:35 2020 -0400 @@ -0,0 +1,7 @@ +<tables> + <!-- Location of SAMTools indexed FASTA files --> + <table name="fasta_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="${__HERE__}/test-data/fasta_indexes.loc" /> + </table> +</tables>