# HG changeset patch
# User padge
# Date 1652377158 0
# Node ID cc0957c46408f0af1f06f6ad73b192001188202a
"planemo upload for repository https://github.com/kirstvh/MultiplexCrisprDOE commit b6c1b1860eee82b06ed4a592d1f9eee6886be318-dirty"
diff -r 000000000000 -r cc0957c46408 MultiplexCrisprDOE.jl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/MultiplexCrisprDOE.jl Thu May 12 17:39:18 2022 +0000
@@ -0,0 +1,1048 @@
+"""
+ gRNA_frequency_distribution(m,
+ sd,
+ l,
+ u,
+ n_gRNA_total;
+ normalize = true,
+ visualize = false)
+
+Generates vector with frequencies in the combinatorial gRNA/Cas9 construct library for all gRNAs
+
+***INPUT***
+m: the average abundance of the gRNAs (in terms of absolute or relative frequency)
+sd: the standard deviation on the gRNA abundances (in terms of absolute or relative frequency)
+l: minimal gRNA abundance (in terms of absolute or relative frequency)
+u: maximal gRNA abundance (in terms of absolute or relative frequency)
+n_gRNA_total: the total number of gRNAs in the experiment
+normalize: if set to "true", the gRNA abundances (absolute frequencies) are converted into relative frequencies
+visualize: if set to "true", a histogram of all gRNA abundances is plotted
+
+***OUTPUT***
+p_gRNA_freq: vector with frequencies for all gRNAs in the construct library
+"""
+function gRNA_frequency_distribution(m,
+ sd,
+ l,
+ u,
+ n_gRNA_total;
+ normalize = true,
+ visualize = false)
+
+ d_gRNA_freq = truncated(Normal(m, sd), l, u) # gRNA frequency distribution
+ p_gRNA_freq = collect(rand(d_gRNA_freq, n_gRNA_total)) # sample gRNA frequencies from distribution
+
+ if normalize # convert into relative frequencies
+ p_gRNA_freq /= sum(p_gRNA_freq)
+ end
+
+ if visualize
+ return histogram(p_gRNA_freq, label="",
+ xlabel="Number of reads per gRNA",
+ linecolor="white",
+ normalize=:probability,
+ xtickfontsize=10,ytickfontsize=10,
+ color=:mediumturquoise, size=(600,350), bins = 25,
+ ylabel="Relative frequency",
+ title="gRNA frequency distribution")
+ else
+ return p_gRNA_freq
+ end
+end
+
+"""
+ gRNA_edit_distribution(f_act,
+ ϵ_edit_act,
+ ϵ_edit_inact,
+ sd_act,
+ n_gRNA_total;
+ visualize = false)
+
+Generates vector with genome editing efficiencies for all the gRNAs in the experiment.
+
+***INPUT***
+f_act: fraction of all gRNAs that is active
+ϵ_edit_act: Average genome editing efficiency for active gRNAs - mean of the genome editing efficiency distribution for active gRNAs
+ϵ_edit_inact: Average genome editing efficiency for inactive gRNAs - mean of the genome editing efficiency distribution for inactive gRNAs
+sd_act: standard deviation of the genome editing efficiency distributions for active and inactive gRNAs
+n_gRNA_total: the total number of gRNAs in the experiment
+visualize: if set to "true", a histogram of all genome editing efficiency is plotted
+
+***OUTPUT***
+p_gRNA_edit: vector with genome editing efficiencies for all gRNAs
+"""
+function gRNA_edit_distribution(f_act, ϵ_edit_act, ϵ_edit_inact, sd_act, n_gRNA_total; visualize=false)
+ d_act = Binomial(1, f_act) # there is a probability f_act that a gRNA is active
+ d_high_act = truncated(Normal(ϵ_edit_act, sd_act), 0.01, 1) # average genome editing efficiency for active gRNAs is equal to ϵ_edit_act
+ d_low_act = truncated(Normal(ϵ_edit_inact, sd_act), 0.01, 1) # average genome editing efficiency for inactive gRNAs is equal to ϵ_edit_inact
+ p_gRNA_edit = zeros(n_gRNA_total) # initialize vector with genome editing efficiencies for gRNAs
+
+ for i in 1:n_gRNA_total
+ if rand(d_act, 1) == [1] # gRNA is active
+ p_gRNA_edit[i] = rand(d_high_act, 1)[1]
+ else # gRNA is inactive
+ p_gRNA_edit[i] = rand(d_low_act, 1)[1]
+ end
+ end
+
+ if visualize
+ return histogram(p_gRNA_edit,
+ normalize = :probability,
+ linecolor = "white",
+ label="",
+ color=:turquoise4,
+ xtickfontsize=10,ytickfontsize=10, xlim = (0, 1),
+ xticks=(0:0.1:1),
+ bins = 150,
+ xlabel="gRNA editing efficiency",
+ ylabel="Relative frequency",
+ title="gRNA genome editing effiency distribution")
+
+ else
+ return p_gRNA_edit
+ end
+end
+
+"""
+ simulate_Nₓ₁(x,
+ g,
+ r,
+ n_gRNA_total,
+ p_gRNA_freq,
+ p_gRNA_edit,
+ ϵ_KO;
+ iter = 500)
+
+Computes the expected value and the standard deviation of the minimal plant library size for full coverage of all single gene knockouts (E[Nx,1] and σ[Nx,1]) using simulation
+
+***INPUT***
+x: number of target genes in the experiment
+g: number of gRNAs designed per target gene
+r: number of gRNA sequences per combinatorial gRNA/Cas construct
+n_gRNA_total: total number of gRNAs in the experiment
+p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!)
+p_gRNA_edit: vector with genome editing efficiencies for all gRNAs
+ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout
+iter: number of CRISPR/Cas experiments that are simulated to obtain E[Nₓ₁] and σ[Nₓ₁]
+
+***OUTPUT***
+E_Nₓ₁: expected value of the plant library size for full coverage of all single gene knockouts
+sd_Nₓ₁: standard deviation on the plant library size for full coverage of all single gene knockouts
+"""
+function simulate_Nₓ₁(x,
+ g,
+ r,
+ n_gRNA_total,
+ p_gRNA_freq,
+ p_gRNA_edit,
+ ϵ_KO;
+ iter = 500)
+
+ @assert x * g == n_gRNA_total
+ Nₓ₁_vec = [] #stores number of plants to reach full coverage for each simulated experiment
+ for i in 1:iter
+ genes_vec = [] # Initialize vector to store single gene knockouts that are observed in plants
+ Nₓ₁ = 0
+ while genes_vec != collect(1:x) # check if all possible single gene knockouts are present: if no full coverage, sample an additional plant
+ Nₓ₁ += 1 # count how many plants must be sampled to observe all single gene knockouts
+
+ # sample combinatorial gRNA/Cas9 construct
+ gRNA_indices_construct = findall((rand(Multinomial(r, p_gRNA_freq))) .!= 0)
+
+ # execute mutations
+ gRNA_indices_mutations = [gRNA for gRNA in gRNA_indices_construct if rand(Binomial(1, p_gRNA_edit[gRNA])) == 1]
+
+ # effective gene knockout (loss-of-function) ?
+ gRNA_indices_KO = [gRNA for gRNA in gRNA_indices_mutations if rand(Binomial(1, ϵ_KO)) == 1]
+
+ # which genes are knocked out?
+ genes_indices_KO = Int.(ceil.(gRNA_indices_KO / g))
+ append!(genes_vec, genes_indices_KO)
+
+ # update vector with observed gene knockouts
+ genes_vec = Int.(sort(unique(genes_vec)))
+ end
+ push!(Nₓ₁_vec, Nₓ₁) # add plant library size for full coverage of current experiment to vector
+ end
+
+ # Calculate expected value and standard deviation
+ E_Nₓ₁ = mean(Nₓ₁_vec);
+ sd_Nₓ₁ = std(Nₓ₁_vec)
+
+ return E_Nₓ₁, sd_Nₓ₁
+end
+
+
+"""
+ BioCCP_Nₓ₁(x,
+ g,
+ r,
+ n_gRNA_total,
+ p_gRNA_freq,
+ p_gRNA_edit,
+ ϵ_KO)
+
+Computes the expected value and the standard deviation of the minimal plant library size for
+full coverage of all single gene knockouts (E[Nx,1] and σ[Nx,1]) using BioCCP
+
+***INPUT***
+x: number of target genes in the experiment
+g: number of gRNAs designed per target gene
+r: number of gRNA sequences per combinatorial gRNA/Cas construct
+n_gRNA_total: total number of gRNAs in the experiment
+p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!)
+p_gRNA_edit: vector with genome editing efficiencies for all gRNAs
+ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout
+
+***OUTPUT***
+E_Nₓ₁ : expected value of the plant library size for full coverage of all single gene knockouts
+sd_Nₓ₁ : standard deviation on the plant library size for full coverage of all single gene knockouts
+"""
+function BioCCP_Nₓ₁(x,
+ g,
+ r,
+ n_gRNA_total,
+ p_gRNA_freq,
+ p_gRNA_edit,
+ ϵ_KO)
+
+ # prepare input for BioCCP functions
+ p_gRNAs = p_gRNA_freq .* p_gRNA_edit * ϵ_KO # calculate probability for each gRNA to induce effective gene knockout
+ p_genes = [sum(p_gRNAs[i:i+g-1]) for i in 1:g:n_gRNA_total] # obtain probability of single gene knockout by summing up probability of all corresponding gRNAs to induce effective gene knockout
+
+ # Apply BioCCP functions
+ E_Nₓ₁ = expectation_minsamplesize(x; p=p_genes, r=r, normalize=false)
+ sd_Nₓ₁ = std_minsamplesize(x; p=p_genes, r=r, normalize=false)
+
+ return E_Nₓ₁, sd_Nₓ₁
+end
+
+
+"""
+ simulate_Nₓ₂(x,
+ g,
+ r,
+ n_gRNA_total,
+ p_gRNA_freq,
+ p_gRNA_edit,
+ ϵ_KO;
+ iter=500)
+
+Computes the expected value and the standard deviation of the minimal plant library size for full coverage of all pairwise combinations of gene knockouts
+in a multiplex CRISPR/Cas experiment (E[Nx,2] and σ[Nx,2]) using simulation
+
+***INPUT***
+x: number of target genes in the experiment
+g: number of gRNAs designed per target gene
+r: number of gRNA sequences per combinatorial gRNA/Cas construct
+n_gRNA_total: total number of gRNAs in the experiment
+p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!)
+p_gRNA_edit: vector with genome editing efficiencies for all gRNAs
+ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout
+iter: number of CRISPR/Cas experiments that are simulated to obtain E[Nₓ₂] and σ[Nₓ₂]
+
+***OUTPUT***
+E_Nₓ₂ : expected value of the plant library size for full coverage of all pairwise combinations of gene knockouts
+sd_Nₓ₂ : standard deviation on the plant library size for full coverage of all pairwise combinations of gene knockouts
+"""
+function simulate_Nₓ₂(x,
+ g,
+ r,
+ n_gRNA_total,
+ p_gRNA_freq,
+ p_gRNA_edit,
+ ϵ_KO;
+ iter = 500)
+
+ @assert x * g == n_gRNA_total
+ Nₓ₂_vec = [] #stores number of plants to reach full coverage of all pairwise combinations of gene knockouts for each simulated experiment
+
+ for i in 1:iter
+ X_interactions_count = zeros(x, x) # Initialize matrix to count pairwise interactions
+ Nₓ₂ = 0
+ while X_interactions_count != ones(x, x) # check if all pairwise combinations are present
+ Nₓ₂ += 1 # count how many plants must be sampled to fill pairwise interaction matrix
+
+ # sample combinatorial gRNA/Cas9 construct
+ gRNA_indices_construct = findall((rand(Multinomial(r, p_gRNA_freq))) .!= 0)
+
+ # execute mutations
+ gRNA_indices_mutations = [gRNA for gRNA in gRNA_indices_construct if rand(Binomial(1, p_gRNA_edit[gRNA])) == 1]
+
+ # effective gene knockout (loss-of-function) ?
+ gRNA_indices_KO = [gRNA for gRNA in gRNA_indices_mutations if rand(Binomial(1, ϵ_KO)) == 1]
+
+ # which genes are knocked out?
+ genes_indices_KO = Int.(ceil.(gRNA_indices_KO / g))
+
+ # which pairwise combinations are present?
+ interactions = collect(combinations(genes_indices_KO, 2))
+
+ # Store represented combinations in matrix
+ for interaction in interactions
+ j = interaction[1]; k = interaction[2]
+ X_interactions_count[j,k] = 1; X_interactions_count[k,j] = 1; X_interactions_count[j,j] = 1; X_interactions_count[k,k] = 1
+ end
+ end
+
+ push!(Nₓ₂_vec, Nₓ₂)
+ end
+
+ # Calculate expected value and standard deviation
+ E_Nₓ₂ = mean(Nₓ₂_vec)
+ sd_Nₓ₂ = std(Nₓ₂_vec)
+
+ return E_Nₓ₂, sd_Nₓ₂
+end
+
+"""
+ BioCCP_Nₓ₂(x,
+ g,
+ r,
+ n_gRNA_total,
+ p_gRNA_freq,
+ p_gRNA_edit, ϵ_KO)
+
+Computes the expected value and the standard deviation of the minimal plant library size for full coverage of all pairwise combinations of gene knockouts in a multiplex CRISPR/Cas experiment (E[Nx,2] and σ[Nx,2]) using BioCCP
+
+ ***INPUT***
+x: number of target genes in the experiment
+g: number of gRNAs designed per target gene
+r: number of gRNA sequences per combinatorial gRNA/Cas construct
+n_gRNA_total: total number of gRNAs in the experiment
+p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!)
+p_gRNA_edit: vector with genome editing efficiencies for all gRNAs
+ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout
+
+***OUTPUT***
+E_Nₓ₂: expected value of the plant library size for full coverage of all pairwise combinations of gene knockouts
+sd_Nₓ₂: standard deviation on the plant library size for full coverage of all pairwise combinations of gene knockouts
+"""
+function BioCCP_Nₓ₂(x,
+ g,
+ r,
+ n_gRNA_total,
+ p_gRNA_freq,
+ p_gRNA_edit, ϵ_KO)
+
+ # how many pairwise combinations of gRNAs
+ ind_combinations_gRNA = collect(combinations(1:n_gRNA_total, 2))
+ n_combinations_gRNA = length(ind_combinations_gRNA)
+
+ # calculate probability and activity of gRNA combinations
+ p_combinations_gRNA_library = zeros(n_combinations_gRNA)
+ p_combinations_gRNA_act = zeros(n_combinations_gRNA)
+ for i in 1:n_combinations_gRNA
+ p_combinations_gRNA_library[i] = p_gRNA_freq[ind_combinations_gRNA[i][1]] * p_gRNA_freq[ind_combinations_gRNA[i][2]]
+ p_combinations_gRNA_act[i] = p_gRNA_edit[ind_combinations_gRNA[i][1]] * p_gRNA_edit[ind_combinations_gRNA[i][2]]
+ end
+
+ # normalize probability gRNA combinations
+ p_combinations_gRNA_library /= sum(p_combinations_gRNA_library)
+
+ # select pairwise gRNA combinations of which each component codes for different gene (goal is to study combinations of knockouts in different genes)
+ p_combinations_gRNA_library_interest = []
+ p_combinations_gRNA_act_interest = []
+ ind_combinations_gRNA_interest = []
+ for i in 1:n_combinations_gRNA
+ if ceil(ind_combinations_gRNA[i][1]/g) != ceil(ind_combinations_gRNA[i][2]/g)
+ push!(p_combinations_gRNA_library_interest, p_combinations_gRNA_library[i])
+ push!(p_combinations_gRNA_act_interest, p_combinations_gRNA_act[i])
+ push!(ind_combinations_gRNA_interest, ind_combinations_gRNA[i])
+ end
+ end
+
+ n_combinations_gRNA_interest = length(p_combinations_gRNA_library_interest)
+ p_combinations_gRNA = p_combinations_gRNA_library_interest .* p_combinations_gRNA_act_interest * ϵ_KO^2
+
+ # sum up probabilities or gRNA combinations for corresponding gene knockout combinations
+ p_genes_matrix = zeros(x, x)
+ for i in 1:n_combinations_gRNA_interest
+ gene1 = Int(ceil(ind_combinations_gRNA_interest[i][1]/g))
+ gene2 = Int(ceil(ind_combinations_gRNA_interest[i][2]/g))
+ p_genes_matrix[gene1, gene2] += p_combinations_gRNA[i]
+ end
+ p_genes = collect([p_genes_matrix[i, j] for j in 2:size(p_genes_matrix, 1) for i in 1:j-1])
+ n_combinations_genes = length(p_genes)
+ combinations_pp = length(collect(combinations(1:r, 2)))
+
+ # Apply BioCCP functions
+ E_Nₓ₂ = expectation_minsamplesize(n_combinations_genes; p=p_genes, r=combinations_pp, normalize=false)
+ sd_Nₓ₂ = std_minsamplesize(n_combinations_genes; p=p_genes, r=combinations_pp, normalize=false)
+
+ return E_Nₓ₂, sd_Nₓ₂
+end
+
+"""
+ simulate_Nₓ₂_countKOs(x,
+ g,
+ r,
+ n_gRNA_total,
+ p_gRNA_freq,
+ p_gRNA_edit, ϵ_KO; iter=100000)
+
+Counts the number of knockouts per plant in the experiment.
+
+***INPUT***
+x: number of target genes in the experiment
+g: number of gRNAs designed per target gene
+r: number of gRNA sequences per combinatorial gRNA/Cas construct
+n_gRNA_total: total number of gRNAs in the experiment
+p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!)
+p_gRNA_edit: vector with genome editing efficiencies for all gRNAs
+ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout
+iter: number of plants that are sampled
+
+***OUTPUT***
+n_KOs_vec: vector with the number of knockouts for each plant
+"""
+function simulate_Nₓ₂_countKOs(x,
+ g,
+ r,
+ n_gRNA_total,
+ p_gRNA_freq,
+ p_gRNA_edit,
+ ϵ_KO;
+ iter = 100000)
+
+ @assert x * g == n_gRNA_total
+
+ n_KOs_vec = []
+
+ for j in 1:iter
+ # sample combinatorial gRNA/Cas9 construct
+ gRNA_indices_construct = findall((rand(Multinomial(r, p_gRNA_freq))) .!= 0)
+
+ # execute mutations
+ gRNA_indices_mutations = [gRNA for gRNA in gRNA_indices_construct if rand(Binomial(1, p_gRNA_edit[gRNA])) == 1]
+
+ # effective gene knockout (loss-of-function) ?
+ gRNA_indices_KO = [gRNA for gRNA in gRNA_indices_mutations if rand(Binomial(1, ϵ_KO)) == 1]
+
+ # which genes are knocked out?
+ genes_indices_KO = Int.(ceil.(gRNA_indices_KO / g))
+
+ push!(n_KOs_vec, length(unique((genes_indices_KO))))
+ end
+
+ return n_KOs_vec
+end
+
+"""
+ simulate_Nₓ₃(x,
+ g,
+ r,
+ n_gRNA_total,
+ p_gRNA_freq,
+ p_gRNA_edit,
+ ϵ_KO;
+ iter=500)
+
+Computes the expected value and the standard deviation of the minimal plant library size for full coverage of all triple combinations of gene knockouts in
+a multiplex CRISPR/Cas experiment (E[Nx,3] and σ[Nx,3]) using simulation
+
+***INPUT***
+x: number of target genes in the experiment
+g: number of gRNAs designed per target gene
+r: number of gRNA sequences per combinatorial gRNA/Cas construct
+n_gRNA_total: total number of gRNAs in the experiment
+p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!)
+p_gRNA_edit: vector with genome editing efficiencies for all gRNAs
+ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout
+iter: number of CRISPR/Cas experiments that are simulated to obtain E[Nₓ₃] and σ[Nₓ₃]
+
+***OUTPUT***
+E_Nₓ₃: expecteded value of the plant library size for full coverage of all triple combinations of gene knockouts
+sd_Nₓ₃: standard deviation on the plant library size for full coverage of all triple combinations of gene knockouts
+"""
+function simulate_Nₓ₃(x,
+ g,
+ r,
+ n_gRNA_total,
+ p_gRNA_freq,
+ p_gRNA_edit,
+ ϵ_KO;
+ iter = 500)
+
+ @assert x * g == n_gRNA_total
+
+ Nₓ₃_vec = [] # stores number of plants required for each experiment
+
+ for i in 1:iter
+ # println("got till here")
+ X_interactions_count = zeros(x, x, x) # Initialize matrix to count triple interactions
+ # println(X_interactions_count)
+ # println(ones(x, x, x))
+ Nₓ₃ = 0
+
+ while X_interactions_count != ones(x, x, x) # check if all triple combinations are present
+ Nₓ₃ += 1 # count how many plants must be sampled to fill triple interaction matrix
+
+ # sample combinatorial gRNA/Cas9 construct
+ gRNA_indices_construct = findall((rand(Multinomial(r, p_gRNA_freq))) .!= 0)
+
+ # execute mutations
+ gRNA_indices_mutations = [gRNA for gRNA in gRNA_indices_construct if rand(Binomial(1, p_gRNA_edit[gRNA])) == 1]
+
+ # effective gene knockout (loss-of-function) ?
+ gRNA_indices_KO = [gRNA for gRNA in gRNA_indices_mutations if rand(Binomial(1, ϵ_KO)) == 1]
+
+ # which genes are knocked out?
+ genes_indices_KO = Int.(ceil.(gRNA_indices_KO / g))
+
+ # which triple combinations are present?
+ interactions = collect(combinations(genes_indices_KO, 3))
+
+ # Store represented triple combinations in 3D-matrix
+ for interaction in interactions
+ j = interaction[1]
+ k = interaction[2]
+ l = interaction[3]
+ X_interactions_count[j,k,l] = 1
+ X_interactions_count[k,j,l] = 1
+ X_interactions_count[l,j,k] = 1
+ X_interactions_count[l,k,j] = 1
+ X_interactions_count[j,l,k] = 1
+ X_interactions_count[k,l,j] = 1
+
+ X_interactions_count[:,l,l] .= 1
+ X_interactions_count[:,k,k] .= 1
+ X_interactions_count[:,j,j] .= 1
+ X_interactions_count[l,:,l] .= 1
+ X_interactions_count[k,:,k] .= 1
+ X_interactions_count[j,:,j] .= 1
+
+ X_interactions_count[j,j,:] .= 1
+ X_interactions_count[k,k,:] .= 1
+ X_interactions_count[l,l,:] .= 1
+ end
+ end
+ push!(Nₓ₃_vec, Nₓ₃)
+ end
+
+ # calculate expected value and standard deviation
+ E_Nₓ₃ = mean(Nₓ₃_vec); sd_Nₓ₃ = std(Nₓ₃_vec)
+
+ return E_Nₓ₃, sd_Nₓ₃
+end
+
+
+"""
+ BioCCP_Nₓ₃(x,
+ g,
+ r,
+ n_gRNA_total,
+ p_gRNA_freq,
+ p_gRNA_edit,
+ ϵ_KO)
+
+Computes the expected value and the standard deviation of the minimal plant library size
+for full coverage of all triple combinations of gene knockouts in a multiplex CRISPR/Cas experiment (E[Nx,3] and σ[Nx,3]) using BioCCP
+
+***INPUT***
+x: number of target genes in the experiment
+g: number of gRNAs designed per target gene
+r: number of gRNA sequences per combinatorial gRNA/Cas construct
+n_gRNA_total: total number of gRNAs in the experiment
+p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!)
+p_gRNA_edit: vector with genome editing efficiencies for all gRNAs
+ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout
+
+***OUTPUT***
+E_Nₓ₃: expecteded value of the plant library size for full coverage of all triple combinations of gene knockouts
+sd_Nₓ₃: standard deviation on the plant library size for full coverage of all triple combinations of gene knockouts
+"""
+function BioCCP_Nₓ₃(x,
+ g,
+ r,
+ n_gRNA_total,
+ p_gRNA_freq,
+ p_gRNA_edit,
+ ϵ_KO)
+
+ # how many triple combinations of gRNAs
+ ind_combinations_gRNA = collect(combinations(1:n_gRNA_total, 3))
+ n_combinations_gRNA = length(ind_combinations_gRNA)
+
+ # calculate probability and activity of triple gRNA combinations
+ p_combinations_gRNA_library = zeros(n_combinations_gRNA)
+ p_combinations_gRNA_act = zeros(n_combinations_gRNA)
+ for i in 1:n_combinations_gRNA
+ p_combinations_gRNA_library[i] = p_gRNA_freq[ind_combinations_gRNA[i][1]] * p_gRNA_freq[ind_combinations_gRNA[i][2]] * p_gRNA_freq[ind_combinations_gRNA[i][3]]
+ p_combinations_gRNA_act[i] = p_gRNA_edit[ind_combinations_gRNA[i][1]] * p_gRNA_edit[ind_combinations_gRNA[i][2]] * p_gRNA_edit[ind_combinations_gRNA[i][3]]
+ end
+
+ # normalize probability gRNA combinations
+ p_combinations_gRNA_library /= sum(p_combinations_gRNA_library)
+
+ # select triple gRNA combinations of which each component codes for different gene (goal is to study combinations of knockouts in different genes)
+ p_combinations_gRNA_library_interest = []
+ p_combinations_gRNA_act_interest = []
+ ind_combinations_gRNA_interest = []
+ for i in 1:n_combinations_gRNA
+ if ceil(ind_combinations_gRNA[i][1]/g) != ceil(ind_combinations_gRNA[i][2]/g) && ceil(ind_combinations_gRNA[i][1]/g) != ceil(ind_combinations_gRNA[i][3]/g) && ceil(ind_combinations_gRNA[i][3]/g) != ceil(ind_combinations_gRNA[i][2]/g)
+ push!(p_combinations_gRNA_library_interest, p_combinations_gRNA_library[i])
+ push!(p_combinations_gRNA_act_interest, p_combinations_gRNA_act[i])
+ push!(ind_combinations_gRNA_interest, ind_combinations_gRNA[i])
+ end
+ end
+
+ n_combinations_gRNA_interest = length(p_combinations_gRNA_library_interest)
+ p_combinations_gRNA = p_combinations_gRNA_library_interest .* p_combinations_gRNA_act_interest * ϵ_KO^3
+
+ # sum up probabilities or gRNA combinations for corresponding gene knockout combinations
+ p_genes_matrix = zeros(x, x, x)
+ for i in 1:n_combinations_gRNA_interest
+ gene1 = Int(ceil(ind_combinations_gRNA_interest[i][1]/g))
+ gene2 = Int(ceil(ind_combinations_gRNA_interest[i][2]/g))
+ gene3 = Int(ceil(ind_combinations_gRNA_interest[i][3]/g))
+ p_genes_matrix[gene1, gene2, gene3] += p_combinations_gRNA[i]
+ end
+
+ combinations_genes = collect(combinations(1:x, 3))
+ p_genes = []
+ for combination in combinations_genes
+ push!(p_genes, p_genes_matrix[combination[1], combination[2], combination[3]])
+ end
+
+ n_combinations_genes = length(p_genes)
+ combinations_pp = length(collect(combinations(1:r, 3)))
+
+ # apply BioCCP functions
+ E_Nₓ₃ = expectation_minsamplesize(n_combinations_genes; p=p_genes, r=combinations_pp, normalize=false)
+ sd_Nₓ₃ = std_minsamplesize(n_combinations_genes; p=p_genes, r=combinations_pp, normalize=false)
+
+ return E_Nₓ₃, sd_Nₓ₃
+end
+
+"""
+ BioCCP_Pₓ₁(x,
+ N,
+ g,
+ r,
+ n_gRNA_total,
+ p_gRNA_freq,
+ p_gRNA_edit,
+ ϵ_KO)
+
+Computes the probability of full coverage of all single gene knockouts (Px,1) for an experiment with given plant library size using BioCCP
+
+***INPUT***
+x: number of target genes in the experiment
+N: plant library size
+g: number of gRNAs designed per target gene
+r: number of gRNA sequences per combinatorial gRNA/Cas construct
+n_gRNA_total: total number of gRNAs in the experiment
+p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!)
+p_gRNA_edit: vector with genome editing efficiencies for all gRNAs
+ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout
+
+***OUTPUT***
+Pₓ₁: probability of full coverage of all single gene knockouts
+
+"""
+function BioCCP_Pₓ₁(x,
+ N,
+ g,
+ r,
+ n_gRNA_total,
+ p_gRNA_freq,
+ p_gRNA_edit,
+ ϵ_KO)
+
+ # prepare input
+ p_gRNAs = p_gRNA_freq .* p_gRNA_edit * ϵ_KO
+ p_genes = [sum(p_gRNAs[i:i+g-1]) for i in 1:g:n_gRNA_total]
+
+ # apply BioCCP function
+ Pₓ₁ = success_probability(x, N; p=p_genes, r=r, normalize=false)
+
+ return Pₓ₁
+end
+
+"""
+BioCCP_γₓ₁(x,
+ N,
+ g,
+ r,
+ n_gRNA_total,
+ p_gRNA_freq,
+ p_gRNA_edit,
+ ϵ_KO)
+
+Computes the expected coverage of all single gene knockouts (Px,1) for an experiment with given plant library size using BioCCP
+
+***INPUT***
+x: number of target genes in the experiment
+N: plant library size
+g: number of gRNAs designed per target gene
+r: number of gRNA sequences per combinatorial gRNA/Cas construct
+n_gRNA_total: total number of gRNAs in the experiment
+p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!)
+p_gRNA_edit: vector with genome editing efficiencies for all gRNAs
+ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout
+
+***OUTPUT***
+γₓ₁: expected coverage of all single gene knockouts
+"""
+function BioCCP_γₓ₁(x,
+ N,
+ g,
+ r,
+ n_gRNA_total,
+ p_gRNA_freq,
+ p_gRNA_edit,
+ ϵ_KO)
+
+ p_gRNAs = p_gRNA_freq .* p_gRNA_edit * ϵ_KO
+ p_genes = [sum(p_gRNAs[i:i+g-1]) for i in 1:g:n_gRNA_total]
+
+ # Apply BioCCP function
+ γₓ₁ = expectation_fraction_collected(x, N; p=p_genes, r=r, normalize=false)
+
+ return γₓ₁
+end
+
+
+"""
+ BioCCP_Pₓ₂(x,
+ N,
+ g,
+ r,
+ n_gRNA_total,
+ p_gRNA_freq,
+ p_gRNA_edit,
+ ϵ_KO)
+
+Computes the probability of full coverage of all pairwise combinations of gene knockouts (Px,2) for an experiment with given plant library size using BioCCP
+
+***INPUT***
+x: number of target genes in the experiment
+N: plant library size
+g: number of gRNAs designed per target gene
+r: number of gRNA sequences per combinatorial gRNA/Cas construct
+n_gRNA_total: total number of gRNAs in the experiment
+p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!)
+p_gRNA_edit: vector with genome editing efficiencies for all gRNAs
+ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout
+
+***OUTPUT***
+Pₓ₂: probability of full coverage of all pairwise combinations of gene knockouts
+"""
+function BioCCP_Pₓ₂(x,
+ N,
+ g,
+ r,
+ n_gRNA_total,
+ p_gRNA_freq,
+ p_gRNA_edit,
+ ϵ_KO)
+
+ # how many pairwise combinations of gRNAs
+ ind_combinations_gRNA = collect(combinations(1:n_gRNA_total, 2))
+ n_combinations_gRNA = length(ind_combinations_gRNA)
+
+ # calculate probability and activity of gRNA combinations
+ p_combinations_gRNA_library = zeros(n_combinations_gRNA)
+ p_combinations_gRNA_act = zeros(n_combinations_gRNA)
+ for i in 1:n_combinations_gRNA
+ p_combinations_gRNA_library[i] = p_gRNA_freq[ind_combinations_gRNA[i][1]] * p_gRNA_freq[ind_combinations_gRNA[i][2]]
+ p_combinations_gRNA_act[i] = p_gRNA_edit[ind_combinations_gRNA[i][1]] * p_gRNA_edit[ind_combinations_gRNA[i][2]]
+ end
+
+ # normalize probability gRNA combinations
+ p_combinations_gRNA_library /= sum(p_combinations_gRNA_library)
+
+ # select pairwise gRNA combinations of which each component codes for different gene (goal is to study combinations of knockouts in different genes)
+ p_combinations_gRNA_library_interest = []
+ p_combinations_gRNA_act_interest = []
+ ind_combinations_gRNA_interest = []
+ for i in 1:n_combinations_gRNA
+ if ceil(ind_combinations_gRNA[i][1]/g) != ceil(ind_combinations_gRNA[i][2]/g)
+ push!(p_combinations_gRNA_library_interest, p_combinations_gRNA_library[i])
+ push!(p_combinations_gRNA_act_interest, p_combinations_gRNA_act[i])
+ push!(ind_combinations_gRNA_interest, ind_combinations_gRNA[i])
+ end
+ end
+
+ n_combinations_gRNA_interest = length(p_combinations_gRNA_library_interest)
+ p_combinations_gRNA = p_combinations_gRNA_library_interest .* p_combinations_gRNA_act_interest * ϵ_KO^2
+
+ # sum up probabilities or gRNA combinations for corresponding gene knockout combinations
+ p_genes_matrix = zeros(x, x)
+ for i in 1:n_combinations_gRNA_interest
+ gene1 = Int(ceil(ind_combinations_gRNA_interest[i][1]/g))
+ gene2 = Int(ceil(ind_combinations_gRNA_interest[i][2]/g))
+ p_genes_matrix[gene1, gene2] += p_combinations_gRNA[i]
+ end
+
+ p_genes = collect([p_genes_matrix[i, j] for j in 2:size(p_genes_matrix, 1) for i in 1:j-1])
+ n_combinations_genes = length(p_genes)
+ combinations_pp = length(collect(combinations(1:r, 2)))
+
+ # Apply BioCCP function
+ Pₓ₂ = success_probability(n_combinations_genes, N; p=p_genes, r=combinations_pp, normalize=false)
+
+ return Pₓ₂
+end
+
+"""
+ BioCCP_γₓ₂(x,
+ N,
+ g,
+ r,
+ n_gRNA_total,
+ p_gRNA_freq,
+ p_gRNA_edit,
+ ϵ_KO)
+
+Computes the expected coverage of all pairwise combinations of gene knockouts (γx,2) for an experiment with given plant library size using BioCCP
+
+***INPUT***
+x: number of target genes in the experiment
+N: plant library size
+g: number of gRNAs designed per target gene
+r: number of gRNA sequences per combinatorial gRNA/Cas construct
+n_gRNA_total: total number of gRNAs in the experiment
+p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!)
+p_gRNA_edit: vector with genome editing efficiencies for all gRNAs
+ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout
+
+***OUTPUT***
+γₓ₂: expected coverage of all pairwise combinations of gene knockouts
+"""
+function BioCCP_γₓ₂(x,
+ N,
+ g,
+ r,
+ n_gRNA_total,
+ p_gRNA_freq,
+ p_gRNA_edit,
+ ϵ_KO)
+
+ # how many pairwise combinations of gRNAs
+ ind_combinations_gRNA = collect(combinations(1:n_gRNA_total, 2))
+ n_combinations_gRNA = length(ind_combinations_gRNA)
+
+ # calculate probability and activity of gRNA combinations
+ p_combinations_gRNA_library = zeros(n_combinations_gRNA)
+ p_combinations_gRNA_act = zeros(n_combinations_gRNA)
+ for i in 1:n_combinations_gRNA
+ p_combinations_gRNA_library[i] = p_gRNA_freq[ind_combinations_gRNA[i][1]] * p_gRNA_freq[ind_combinations_gRNA[i][2]]
+ p_combinations_gRNA_act[i] = p_gRNA_edit[ind_combinations_gRNA[i][1]] * p_gRNA_edit[ind_combinations_gRNA[i][2]]
+ end
+
+ # normalize probability gRNA combinations
+ p_combinations_gRNA_library /= sum(p_combinations_gRNA_library)
+
+ # select pairwise gRNA combinations of which each component codes for different gene (goal is to study combinations of knockouts in different genes)
+ p_combinations_gRNA_library_interest = []
+ p_combinations_gRNA_act_interest = []
+ ind_combinations_gRNA_interest = []
+ for i in 1:n_combinations_gRNA
+ if ceil(ind_combinations_gRNA[i][1]/g) != ceil(ind_combinations_gRNA[i][2]/g)
+ push!(p_combinations_gRNA_library_interest, p_combinations_gRNA_library[i])
+ push!(p_combinations_gRNA_act_interest, p_combinations_gRNA_act[i])
+ push!(ind_combinations_gRNA_interest, ind_combinations_gRNA[i])
+ end
+ end
+
+ n_combinations_gRNA_interest = length(p_combinations_gRNA_library_interest)
+ p_combinations_gRNA = p_combinations_gRNA_library_interest .* p_combinations_gRNA_act_interest * ϵ_KO^2
+
+ # sum up probabilities or gRNA combinations for corresponding gene knockout combinations
+ p_genes_matrix = zeros(x, x)
+ for i in 1:n_combinations_gRNA_interest
+ gene1 = Int(ceil(ind_combinations_gRNA_interest[i][1]/g))
+ gene2 = Int(ceil(ind_combinations_gRNA_interest[i][2]/g))
+ p_genes_matrix[gene1, gene2] += p_combinations_gRNA[i]
+ end
+
+ p_genes = collect([p_genes_matrix[i, j] for j in 2:size(p_genes_matrix, 1) for i in 1:j-1])
+ n_combinations_genes = length(p_genes)
+ combinations_pp = length(collect(combinations(1:r, 2)))
+
+ # Apply BioCCP function
+ γₓ₂ = expectation_fraction_collected(n_combinations_genes, N; p=p_genes, r=combinations_pp, normalize=false)
+
+ return γₓ₂
+end
+
+
+"""
+ BioCCP_γₓ₃(x,
+ N,
+ g,
+ r,
+ n_gRNA_total,
+ p_gRNA_freq,
+ p_gRNA_edit,
+ ϵ_KO)
+
+Computes the expected coverage of all triple combinations of gene knockouts (γx,3) for an experiment with given plant library size using BioCCP
+
+***INPUT***
+x: number of target genes in the experiment
+N: plant library size
+g: number of gRNAs designed per target gene
+r: number of gRNA sequences per combinatorial gRNA/Cas construct
+n_gRNA_total: total number of gRNAs in the experiment
+p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!)
+p_gRNA_edit: vector with genome editing efficiencies for all gRNAs
+ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout
+
+***OUTPUT***
+γₓ₃: expected coverage of all triple combinations of gene knockouts
+"""
+function BioCCP_γₓ₃(x,
+ N,
+ g,
+ r,
+ n_gRNA_total,
+ p_gRNA_freq,
+ p_gRNA_edit,
+ ϵ_KO)
+
+ # how many triple combinations of gRNAs
+ ind_combinations_gRNA = collect(combinations(1:n_gRNA_total, 3))
+ n_combinations_gRNA = length(ind_combinations_gRNA)
+
+ # calculate probability and activity of triple gRNA combinations
+ p_combinations_gRNA_library = zeros(n_combinations_gRNA)
+ p_combinations_gRNA_act = zeros(n_combinations_gRNA)
+ for i in 1:n_combinations_gRNA
+ p_combinations_gRNA_library[i] = p_gRNA_freq[ind_combinations_gRNA[i][1]] * p_gRNA_freq[ind_combinations_gRNA[i][2]] * p_gRNA_freq[ind_combinations_gRNA[i][3]]
+ p_combinations_gRNA_act[i] = p_gRNA_edit[ind_combinations_gRNA[i][1]] * p_gRNA_edit[ind_combinations_gRNA[i][2]] * p_gRNA_edit[ind_combinations_gRNA[i][3]]
+ end
+
+ # normalize probability gRNA combinations
+ p_combinations_gRNA_library /= sum(p_combinations_gRNA_library)
+
+ # select triple gRNA combinations of which each component codes for different gene (goal is to study combinations of knockouts in different genes)
+ p_combinations_gRNA_library_interest = []
+ p_combinations_gRNA_act_interest = []
+ ind_combinations_gRNA_interest = []
+ for i in 1:n_combinations_gRNA
+ if ceil(ind_combinations_gRNA[i][1]/g) != ceil(ind_combinations_gRNA[i][2]/g) && ceil(ind_combinations_gRNA[i][1]/g) != ceil(ind_combinations_gRNA[i][3]/g) && ceil(ind_combinations_gRNA[i][3]/g) != ceil(ind_combinations_gRNA[i][2]/g)
+ push!(p_combinations_gRNA_library_interest, p_combinations_gRNA_library[i])
+ push!(p_combinations_gRNA_act_interest, p_combinations_gRNA_act[i])
+ push!(ind_combinations_gRNA_interest, ind_combinations_gRNA[i])
+ end
+ end
+
+ n_combinations_gRNA_interest = length(p_combinations_gRNA_library_interest)
+ p_combinations_gRNA = p_combinations_gRNA_library_interest .* p_combinations_gRNA_act_interest * ϵ_KO^3
+
+ # sum up probabilities or gRNA combinations for corresponding gene knockout combinations
+ p_genes_matrix = zeros(x, x, x)
+ for i in 1:n_combinations_gRNA_interest
+ gene1 = Int(ceil(ind_combinations_gRNA_interest[i][1]/g))
+ gene2 = Int(ceil(ind_combinations_gRNA_interest[i][2]/g))
+ gene3 = Int(ceil(ind_combinations_gRNA_interest[i][3]/g))
+ p_genes_matrix[gene1, gene2, gene3] += p_combinations_gRNA[i]
+ end
+
+ combinations_genes = collect(combinations(1:x, 3))
+ p_genes = []
+ for combination in combinations_genes
+ push!(p_genes, p_genes_matrix[combination[1], combination[2], combination[3]])
+ end
+
+ n_combinations_genes = length(p_genes)
+ combinations_pp = length(collect(combinations(1:r, 3)))
+
+ # Apply BioCCP function
+ γₓ₃ = expectation_fraction_collected(n_combinations_genes, N; p=p_genes, r=combinations_pp, normalize=false)
+
+ return γₓ₃
+end
+
+"""
+ BioCCP_Pₓ₃(x,
+ N,
+ g,
+ r,
+ n_gRNA_total,
+ p_gRNA_freq,
+ p_gRNA_edit,
+ ϵ_KO)
+
+Computes the probability of full coverage of all triple combinations of gene knockouts (Px,3) for an experiment with given plant library size using BioCCP
+
+***INPUT***
+x: number of target genes in the experiment
+N: plant library size
+g: number of gRNAs designed per target gene
+r: number of gRNA sequences per combinatorial gRNA/Cas construct
+n_gRNA_total: total number of gRNAs in the experiment
+p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!)
+p_gRNA_edit: vector with genome editing efficiencies for all gRNAs
+ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout
+
+***OUTPUT***
+Pₓ₃: probability of full coverage of all triple combinations of gene knockouts
+"""
+function BioCCP_Pₓ₃(x,
+ N,
+ g,
+ r,
+ n_gRNA_total,
+ p_gRNA_freq,
+ p_gRNA_edit,
+ ϵ_KO)
+
+ # how many triple combinations of gRNAs
+ ind_combinations_gRNA = collect(combinations(1:n_gRNA_total, 3))
+ n_combinations_gRNA = length(ind_combinations_gRNA)
+
+ # calculate probability and activity of triple gRNA combinations
+ p_combinations_gRNA_library = zeros(n_combinations_gRNA)
+ p_combinations_gRNA_act = zeros(n_combinations_gRNA)
+ for i in 1:n_combinations_gRNA
+ p_combinations_gRNA_library[i] = p_gRNA_freq[ind_combinations_gRNA[i][1]] * p_gRNA_freq[ind_combinations_gRNA[i][2]] * p_gRNA_freq[ind_combinations_gRNA[i][3]]
+ p_combinations_gRNA_act[i] = p_gRNA_edit[ind_combinations_gRNA[i][1]] * p_gRNA_edit[ind_combinations_gRNA[i][2]] * p_gRNA_edit[ind_combinations_gRNA[i][3]]
+ end
+
+ # normalize probability gRNA combinations
+ p_combinations_gRNA_library /= sum(p_combinations_gRNA_library)
+
+ # select triple gRNA combinations of which each component codes for different gene (goal is to study combinations of knockouts in different genes)
+ p_combinations_gRNA_library_interest = []
+ p_combinations_gRNA_act_interest = []
+ ind_combinations_gRNA_interest = []
+ for i in 1:n_combinations_gRNA
+ if ceil(ind_combinations_gRNA[i][1]/g) != ceil(ind_combinations_gRNA[i][2]/g) && ceil(ind_combinations_gRNA[i][1]/g) != ceil(ind_combinations_gRNA[i][3]/g) && ceil(ind_combinations_gRNA[i][3]/g) != ceil(ind_combinations_gRNA[i][2]/g)
+ push!(p_combinations_gRNA_library_interest, p_combinations_gRNA_library[i])
+ push!(p_combinations_gRNA_act_interest, p_combinations_gRNA_act[i])
+ push!(ind_combinations_gRNA_interest, ind_combinations_gRNA[i])
+ end
+ end
+
+ n_combinations_gRNA_interest = length(p_combinations_gRNA_library_interest)
+ p_combinations_gRNA = p_combinations_gRNA_library_interest .* p_combinations_gRNA_act_interest * ϵ_KO^3
+
+ # sum up probabilities or gRNA combinations for corresponding gene knockout combinations
+ p_genes_matrix = zeros(x, x, x)
+ for i in 1:n_combinations_gRNA_interest
+ gene1 = Int(ceil(ind_combinations_gRNA_interest[i][1]/g))
+ gene2 = Int(ceil(ind_combinations_gRNA_interest[i][2]/g))
+ gene3 = Int(ceil(ind_combinations_gRNA_interest[i][3]/g))
+ p_genes_matrix[gene1, gene2, gene3] += p_combinations_gRNA[i]
+ end
+
+ combinations_genes = collect(combinations(1:x, 3))
+ p_genes = []
+ for combination in combinations_genes
+ push!(p_genes, p_genes_matrix[combination[1], combination[2], combination[3]])
+ end
+
+ n_combinations_genes = length(p_genes)
+ combinations_pp = length(collect(combinations(1:r, 3)))
+
+ # Apply BioCCP function
+ Pₓ₃ = success_probability(n_combinations_genes, N; p=p_genes, r=combinations_pp, normalize=false)
+
+ return Pₓ₃
+end
+
+
\ No newline at end of file
diff -r 000000000000 -r cc0957c46408 README.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst Thu May 12 17:39:18 2022 +0000
@@ -0,0 +1,4 @@
+MultiplexCrisprDOE
+==================
+provides simulation- and BioCCP-based approaches for computing the minimal plant library size
+that guarantees full combinatorial coverage (and other relevant statistics) for multiplex CRISPR/Cas experiments in plants.
\ No newline at end of file
diff -r 000000000000 -r cc0957c46408 main.jl
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/main.jl Thu May 12 17:39:18 2022 +0000
@@ -0,0 +1,612 @@
+using Pkg
+Pkg.add("Plots");
+Pkg.add("Distributions");
+Pkg.add("LinearAlgebra");
+Pkg.add("Combinatorics");
+Pkg.add("BioCCP");
+Pkg.add("ArgParse");
+Pkg.add("XLSX");
+Pkg.add("DataFrames");
+Pkg.add("Weave");
+Pkg.add("DataStructures");
+Pkg.add("PrettyTables");
+
+using Random
+using Plots
+using Distributions
+using LinearAlgebra
+using Combinatorics
+using BioCCP
+using ArgParse
+using XLSX
+using DataFrames
+using Weave
+using DataStructures
+using PrettyTables
+
+global current_dir = pwd()
+include("MultiplexCrisprDOE.jl");
+
+function main(args)
+
+ aps = ArgParseSettings("MultiplexCrisprDOE")
+
+ @add_arg_table! aps begin
+ "gfd" #, "gRNA_freq_dist"
+ action = :command # adds a command which will be read from an argument
+ help = "gRNA/Cas9 frequencies"
+ "ged" #, "gRNA_edit_dist"
+ action = :command
+ help = "gRNA/Cas9 editing efficiencies"
+ "sim" # simulation
+ action = :command
+ help = "simulation-based approaches for computing the minimal plant library size that guarantees full combinatorial coverage (and other relevant statistics)"
+ "ccp" # bioccp
+ action = :command
+ help = "BioCCP-based approaches for computing the minimal plant library size that guarantees full combinatorial coverage (and other relevant statistics)"
+ end
+
+ @add_arg_table! aps["gfd"] begin # add command arg_table: same as usual, but invoked on s["grna"]
+ "m"
+ arg_type = Int
+ help = "plant library size"
+ "sd"
+ arg_type = Int
+ help = "the standard deviation on the gRNA abundances (in terms of absolute or relative frequency)"
+ "l"
+ arg_type = Int
+ help = "minimal gRNA abundance (in terms of absolute or relative frequency)"
+ "u"
+ arg_type = Int
+ help = "maximal gRNA abundance (in terms of absolute or relative frequency)"
+ "n" #, "--n_gRNA_total"
+ arg_type = Int
+ help = "the total number of gRNAs in the experiment"
+ "--normalize"
+ action = :store_true
+ # arg_type = Bool
+ # default = true
+ help = "if provided, the gRNA abundances (absolute frequencies) are converted into relative frequencies"
+ "--visualize"
+ action = :store_true
+ # arg_type = Bool
+ # default = false
+ help = "if provided, a histogram of all gRNA abundances is plotted"
+ "--out_file"
+ arg_type = String
+ default = "gRNA_reads"
+ help = "Output excel file prefix"
+ end
+
+ @add_arg_table! aps["ged"] begin # add command arg_table: same as usual, but invoked on s["grna"]
+ "f_act"
+ arg_type = Float16
+ help = "fraction of all gRNAs that is active"
+ "eps_edit_act"
+ arg_type = Float16
+ help = "Average genome editing efficiency for active gRNAs - mean of the genome editing efficiency distribution for active gRNAs"
+ "eps_edit_inact"
+ arg_type = Float16
+ help = "Average genome editing efficiency for inactive gRNAs - mean of the genome editing efficiency distribution for inactive gRNAs"
+ "sd_act"
+ arg_type = Float16
+ help = "standard deviation of the genome editing efficiency distributions for active and inactive gRNAs"
+ "n_gRNA_total"
+ arg_type = Int
+ help = "the total number of gRNAs in the experiment"
+ "--visualize"
+ action = :store_true
+ # arg_type = Bool
+ # default = false
+ help = "if provided a histogram of all genome editing efficiency is plotted"
+ "--out_file"
+ arg_type = String
+ default = "gRNA_edit"
+ help = "Output excel file prefix"
+ end
+
+ @add_arg_table! aps["sim"] begin
+ "M" #, "--mode"
+ # action = :command
+ # dest_name = "M"
+ arg_type = Int
+ range_tester = x -> 1 <= x <= 4
+ help = """Select simulation mode (1: simulate_Nₓ₁; 2: simulate_Nₓ₂; 3: simulate_Nₓ₃; 4: simulate_Nₓ₂_countKOs)"""
+ "x"
+ arg_type = Int
+ help = "number of target genes in the experiment"
+ "g"
+ arg_type = Int
+ help = "number of gRNAs designed per target gene"
+ "r"
+ arg_type = Int
+ help = "number of gRNA sequences per combinatorial gRNA/Cas construct"
+ "t"#, "--n_gRNA_total"
+ arg_type = Int
+ help = "total number of gRNAs in the experiment"
+ "f"#, "--p_gRNA_freq"
+ arg_type = String #Vector{Float64}
+ help = "vector with relative frequencies for all gRNAs in the construct library (normalized!)"
+ "e"#, "--p_gRNA_edit"
+ arg_type = String #Vector{Float64}
+ help = "vector with genome editing efficiencies for all gRNAs"
+ "E"#, "--ϵ_KO"
+ arg_type=Float16
+ help = "global knockout efficiency; fraction of mutations leading to effective gene knockout"
+ "--i", "--iter"
+ arg_type = Int
+ default = 500
+ help = "number of CRISPR/Cas experiments that are simulated"
+ end
+
+ @add_arg_table! aps["ccp"] begin
+ "M"#, "--mode"
+ arg_type = Int
+ range_tester = x -> 1 <= x <= 9
+ help = """Select BioCCP mode (1: BioCCP_Nₓ₁; 2: BioCCP_Nₓ₂; 3: BioCCP_Nₓ₃; 4: BioCCP_Pₓ₁; 5: BioCCP_Pₓ₂ ;
+ 6: BioCCP_Pₓ₃; 7: BioCCP_γₓ₁; 8: BioCCP_γₓ₂; 9: BioCCP_γₓ₃)"""
+ "x"
+ arg_type = Int
+ help = "number of target genes in the experiment"
+ "N"
+ arg_type = Int
+ help = "(Minimum) plant library size"
+ "--s", "--step"
+ arg_type = Int
+ default = 5
+ range_tester = x -> 1 <= x <= 10
+ help = "Step size for plant library size (optional for calculating expected combinatorial coverage / plant library size)"
+ "--MN", "--max_pl_size"
+ arg_type = Int
+ default = 4000
+ help = "Maximum plant library size (optional for calculating expected combinatorial coverage / plant library size)"
+ "g"
+ arg_type = Int
+ help = "number of gRNAs designed per target gene"
+ "r"
+ arg_type = Int
+ help = "number of gRNA sequences per combinatorial gRNA/Cas construct"
+ "t"#, "--n_gRNA_total"
+ arg_type = Int
+ help = "total number of gRNAs in the experiment"
+ "f"#, "--p_gRNA_freq"
+ arg_type = String #Vector{Float64}
+ help = "File containing vector with relative frequencies for all gRNAs in the construct library (normalized!)"
+ "e"#, "--p_gRNA_edit"
+ arg_type = String #Vector{Float64}
+ help = "File containing vector with genome editing efficiencies for all gRNAs"
+ "E"#, "--ϵ_KO"
+ arg_type=Float16
+ help = "global knockout efficiency; fraction of mutations leading to effective gene knockout"
+ end
+
+ parsed_args = parse_args(args, aps)
+ command_args = parsed_args[parsed_args["%COMMAND%"]]
+ println(command_args)
+
+ tool_info = OrderedDict()
+ args_info = OrderedDict()
+ grna_dict = Dict()
+ out_dict = Dict()
+ if parsed_args["%COMMAND%"] == "gfd"
+ tool_info["method"] = "gRNA_ frequency _distribution"
+ tool_info["description"] = "Generates vector with frequencies in the combinatorial "*
+ "gRNA/Cas9 construct library for all gRNAs"
+ tool_info["mode"] = ""
+ tool_info["mode_description"] = ""
+ args_info["Plant library size"] = command_args["m"]
+ args_info["SD on the gRNA abundances"] = command_args["sd"]
+ args_info["Minimal gRNA abundance"] = command_args["l"]
+ args_info["Maximal gRNA abundance"] = command_args["u"]
+ args_info["Total number of gRNAs"] = command_args["n"]
+ args_info["Convert gRNA abundances to relative frequencies"] = string(command_args["normalize"])
+ args_info["Plot gRNA abundances"] = string(command_args["visualize"])
+
+ m = command_args["m"]
+ sd = command_args["sd"]
+ l = command_args["l"]
+ u = command_args["u"]
+ n_gRNA_total = command_args["n"]
+ norm = command_args["normalize"]
+ viz = command_args["visualize"]
+
+ println(string(norm))
+ println(string(viz))
+
+ p_gRNA_reads = gRNA_frequency_distribution(m, sd, l, u, n_gRNA_total; normalize = norm, visualize = false)
+ grna_dict["p_gRNA_reads"] = p_gRNA_reads
+
+ # println(p_gRNA_reads)
+ # write to excel file
+ fn = command_args["out_file"] * ".xlsx"
+ labels = ["gRNA_read"]
+ columns = Vector()
+ push!(columns, p_gRNA_reads)
+ XLSX.openxlsx(fn, mode="w") do xf
+ sheet = xf[1]
+ XLSX.writetable!(sheet, columns, labels)
+ end
+
+ out_dict["output file"] = fn
+
+ elseif parsed_args["%COMMAND%"] == "ged"
+ tool_info["method"] = "gRNA_ edit _distribution"
+ tool_info["description"] = "Generates vector with genome editing efficiencies "*
+ "for all the gRNAs in the experiment"
+ tool_info["mode"] = ""
+ tool_info["mode_description"] = ""
+ args_info["Fraction of active gRNAs"] = command_args["f_act"]
+ args_info["Average genome editing efficiency of active gRNAs"] = command_args["eps_edit_act"]
+ args_info["Average genome editing efficiency of inactive gRNAs"] = command_args["eps_edit_inact"]
+ args_info["Standard deviation"] = command_args["sd_act"]
+ args_info["Total number of gRNAs"] = command_args["n_gRNA_total"]
+ args_info["Plot genome editing efficiency"] = string(command_args["visualize"])
+
+ f_act = command_args["f_act"]
+ eps_edit_act = command_args["eps_edit_act"]
+ eps_edit_inact = command_args["eps_edit_inact"]
+ sd_act = command_args["sd_act"]
+ n_gRNA_total = command_args["n_gRNA_total"]
+ viz = ["visualize"]
+
+ p_gRNA_edit = gRNA_edit_distribution(f_act, eps_edit_act, eps_edit_inact, sd_act, n_gRNA_total; visualize=false)
+ grna_dict["p_gRNA_edit"] = p_gRNA_edit
+ # write to excel file
+ fn = command_args["out_file"] * ".xlsx"
+ labels = ["gRNA_edit_efficiency"]
+ columns = Vector()
+ push!(columns, p_gRNA_edit)
+ XLSX.openxlsx(fn, mode="w") do xf
+ sheet = xf[1]
+ XLSX.writetable!(sheet, columns, labels)
+ end
+
+ out_dict["output file"] = fn
+
+ elseif parsed_args["%COMMAND%"] == "sim" || parsed_args["%COMMAND%"] == "ccp"
+
+ filename = command_args["f"]
+ sheet = 1
+ data = DataFrame(XLSX.readtable(filename, sheet)...)
+ p_gRNA_reads = data[!,"gRNA_read"]
+ p_gRNA_reads_normalized = p_gRNA_reads/sum(p_gRNA_reads) # normalize
+ f = p_gRNA_reads_normalized
+ grna_dict["p_gRNA_reads"] = f
+
+ filename = command_args["e"]
+ sheet = 1
+ data = DataFrame(XLSX.readtable(filename, sheet)...)
+ p_gRNA_edit = data[!,"gRNA_edit_efficiency"]
+ e = p_gRNA_edit
+ grna_dict["p_gRNA_edit"] = e
+
+ x = command_args["x"]
+ g = command_args["g"]
+ r = command_args["r"]
+ t = command_args["t"] # n_gRNA_total
+ E = command_args["E"] # ϵ_KO # iter = 500
+
+ args_info["# of target genes in the experiment"] = command_args["x"]
+ args_info["# of gRNAs designed per target gene"] = command_args["g"]
+ args_info["# of gRNAs / combi gRNA/Cas construct"] = command_args["r"]
+ args_info["Total number of gRNAs"] = command_args["t"]
+ args_info["Relative frequencies for all gRNAs"] = command_args["f"]
+ args_info["Genome editing efficiencies for all gRNAs"] = command_args["e"]
+ args_info["Global knockout efficiency"] = command_args["E"]
+
+ if parsed_args["%COMMAND%"] == "sim"
+ tool_info["method"] = "simulation"
+ tool_info["description"] = "simulation-based approaches for computing the minimal "*
+ "plant library size that guarantees full combinatorial "*
+ "coverage (and other relevant statistics)"
+ i = command_args["i"] # iter = 500
+ args_info["# of simulated experiments"] = command_args["i"]
+
+ if command_args["M"] == 1
+ tool_info["mode"] = "simulate_Nx1"
+ tool_info["mode_description"] = "Computes the expected value and the standard deviation "*
+ "of the minimal plant library size for full coverage of "*
+ "all single gene knockouts (E[Nx,1] and σ[Nx,1]) using simulation"
+ E_sim, sd_sim = simulate_Nₓ₁(x, g, r, t, f, e, E; iter=i)
+ out_dict["E_sim"] = E_sim
+ out_dict["sd_sim"] = sd_sim
+
+ elseif command_args["M"] == 2
+ tool_info["mode"] = "simulate_Nx2"
+ tool_info["mode_description"] = "Computes the expected value and the standard deviation of "*
+ "the minimal plant library size for full coverage of "*
+ "all pairwise combinations of gene knockouts in a "*
+ "multiplex CRISPR/Cas experiment (E[Nx,2] and σ[Nx,2]) using simulation"
+
+ E_sim, sd_sim = simulate_Nₓ₂(x, g, r, t, f, e, E; iter=i)
+ out_dict["E_sim"] = E_sim
+ out_dict["sd_sim"] = sd_sim
+
+ elseif command_args["M"] == 3
+ tool_info["mode"] = "simulate_Nx3"
+ tool_info["mode_description"] = "Computes the expected value and the standard deviation of "*
+ "the minimal plant library size for full coverage of "*
+ "all triple combinations of gene knockouts in a "*
+ "multiplex CRISPR/Cas experiment (E[Nx,3] and σ[Nx,3]) using simulation"
+
+ E_sim, sd_sim = simulate_Nₓ₃(x, g, r, t, f, e, E; iter=i)
+ out_dict["E_sim"] = E_sim
+ out_dict["sd_sim"] = sd_sim
+
+ elseif command_args["M"] == 4
+ tool_info["mode"] = "simulate_Nx2_countKOs"
+ tool_info["mode_description"] = "Counts of the number of knockouts per plant in the experiment"
+
+ n_KOs_vec = simulate_Nₓ₂_countKOs(x, g, r, t, f, e, E; iter=i)
+ out_dict["n_KOs_vec"] = n_KOs_vec
+
+ # write to excel file
+ fn = "countKOs.xlsx"
+ labels = ["countKOs"]
+ columns = Vector()
+ push!(columns, n_KOs_vec)
+ XLSX.openxlsx(fn, mode="w") do xf
+ sheet = xf[1]
+ XLSX.writetable!(sheet, columns, labels)
+ end
+
+ out_dict["output file"] = fn
+
+ end
+
+ elseif parsed_args["%COMMAND%"] == "ccp"
+ tool_info["method"] = "BioCCP"
+ tool_info["description"] = "BioCCP-based approaches for computing the minimal "*
+ "plant library size that guarantees full combinatorial "*
+ "coverage (and other relevant statistics)"
+
+ N = command_args["N"]
+ if haskey(command_args,"s") && haskey(command_args,"MN")
+ s = command_args["s"]
+ MN = command_args["MN"]
+ args_info["Step size"] = command_args["s"]
+ args_info["Maximum Plant library size"] = command_args["MN"]
+ end
+ args_info["Plant library size"] = command_args["N"]
+
+
+ if command_args["M"] == 1
+ tool_info["mode"] = "BioCCP_Nx1"
+ tool_info["mode_description"] = "Computes the expected value and the standard deviation of "*
+ "the minimal plant library size for full coverage of all "*
+ "single gene knockouts (E[Nx,1] and σ[Nx,1]) using BioCCP"
+
+ E_sim, sd_sim = BioCCP_Nₓ₁(x, g, r, t, f, e, E)
+ out_dict["E_sim"] = E_sim
+ out_dict["sd_sim"] = sd_sim
+
+ elseif command_args["M"] == 2
+ tool_info["mode"] = "BioCCP_Nx2"
+ tool_info["mode_description"] = "Computes the expected value and the standard deviation of "*
+ "the minimal plant library size for full coverage of all "*
+ "pairwise combinations of gene knockouts in a multiplex "*
+ "CRISPR/Cas experiment (E[Nx,2] and σ[Nx,2]) using BioCCP"
+
+ E_sim, sd_sim = BioCCP_Nₓ₂(x, g, r, t, f, e, E)
+ out_dict["E_sim"] = E_sim
+ out_dict["sd_sim"] = sd_sim
+
+ elseif command_args["M"] == 3
+ tool_info["mode"] = "BioCCP_Nx3"
+ tool_info["mode_description"] = "Computes the expected value and the standard deviation of "*
+ "the minimal plant library size for full coverage of all triple combinations of "*
+ "gene knockouts in a multiplex CRISPR/Cas experiment (E[Nx,3] and σ[Nx,3]) using BioCCP"
+
+ E_sim, sd_sim = BioCCP_Nₓ₃(x, g, r, t, f, e, E)
+ out_dict["E_sim"] = E_sim
+ out_dict["sd_sim"] = sd_sim
+
+ elseif command_args["M"] == 4
+ tool_info["mode"] = "BioCCP_Px1"
+ tool_info["mode_description"] = "Computes the probability of full coverage of "*
+ "all single gene knockouts (Px,1) for an experiment with given "*
+ "plant library size using BioCCP"
+
+ if s != nothing && MN != nothing
+ plant_library_sizes = N:s:MN
+ else
+ plant_library_sizes = N
+ end
+ PT = []
+ global N_95_P = nothing
+ for N in plant_library_sizes
+ Pr = BioCCP_Pₓ₁(x, N, g, r, t, f, e, E)
+ push!(PT, Pr)
+ if Pr < 0.95
+ N_95_P = N
+ end
+ end
+ # P = BioCCP_Pₓ₁(x, N, g, r, t, f, e, E)
+ out_dict["P_sim"] = PT
+ out_dict["N_95_P"] = N_95_P
+ out_dict["pls"] = plant_library_sizes
+
+ elseif command_args["M"] == 5
+ tool_info["mode"] = "BioCCP_Px2"
+ tool_info["mode_description"] = "Computes the probability of full coverage of "*
+ "all pairwise combinations of gene knockouts (Px,2) "*
+ "for an experiment with given plant library size using BioCCP"
+
+ if s != nothing && MN != nothing
+ plant_library_sizes = N:s:MN
+ else
+ plant_library_sizes = N
+ end
+ PT = []
+ global N_95_P = nothing
+ for N in plant_library_sizes
+ Pr = BioCCP_Pₓ₂(x, N, g, r, t, f, e, E)
+ push!(PT, Pr)
+ if Pr < 0.95
+ N_95_P = N
+ end
+ end
+ # P = BioCCP_Pₓ₂(x, N, g, r, t, f, e, E)
+ out_dict["P_sim"] = PT
+ out_dict["N_95_P"] = N_95_P
+ out_dict["pls"] = plant_library_sizes
+
+ elseif command_args["M"] == 6
+ tool_info["mode"] = "BioCCP_Px3"
+ tool_info["mode_description"] = "Computes the probability of full coverage of all "*
+ "triple combinations of gene knockouts (Px,3) for an experiment "*
+ "with given plant library size using BioCCP"
+
+ if s != nothing && MN != nothing
+ plant_library_sizes = N:s:MN
+ else
+ plant_library_sizes = N
+ end
+ PT = []
+ global N_95_P = nothing
+ for N in plant_library_sizes
+ Pr = BioCCP_Pₓ₃(x, N, g, r, t, f, e, E)
+ push!(PT, Pr)
+ if Pr < 0.95
+ N_95_P = N
+ end
+ end
+ # P = BioCCP_Pₓ₃(x, N, g, r, t, f, e, E)
+ out_dict["P_sim"] = PT
+ out_dict["N_95_P"] = N_95_P
+ out_dict["pls"] = plant_library_sizes
+
+ elseif command_args["M"] == 7
+ tool_info["mode"] = "BioCCP_γx1"
+ tool_info["mode_description"] = "Computes the expected coverage of all "*
+ "single gene knockouts (E[γx,1]) for an experiment "*
+ "with given plant library size using BioCCP"
+ if s != nothing && MN != nothing
+ plant_library_sizes = N:s:MN
+ else
+ plant_library_sizes = N
+ end
+ exp_cov = []
+ global N_95 = nothing
+ for N in plant_library_sizes
+ E_cov = BioCCP_γₓ₁(x, N, g, r, t, f, e, E)
+ push!(exp_cov, E_cov)
+ if E_cov < 0.95
+ N_95 = N
+ end
+ end
+ # E_sim, sd_sim = BioCCP_γₓ₁(x, N, g, r, t, f, e, E)
+ out_dict["E_cov"] = exp_cov
+ out_dict["N_95"] = N_95
+ out_dict["pls"] = plant_library_sizes
+
+ elseif command_args["M"] == 8
+ tool_info["mode"] = "BioCCP_γx2"
+ tool_info["mode_description"] = "Computes the expected coverage of all "*
+ "pairwise combinations of gene knockouts (E[γx,2]) for an experiment with "*
+ "given plant library size using BioCCP"
+
+ if s != nothing && MN != nothing
+ plant_library_sizes = N:s:MN
+ else
+ plant_library_sizes = N
+ end
+ exp_cov = []
+ global N_95 = nothing
+ for N in plant_library_sizes
+ E_cov = BioCCP_γₓ₂(x, N, g, r, t, f, e, E)
+ push!(exp_cov, E_cov)
+ if E_cov < 0.95
+ N_95 = N
+ end
+ end
+ # E_sim, sd_sim = BioCCP_γₓ₂(x, N, g, r, t, f, e, E)
+ out_dict["E_cov"] = exp_cov
+ out_dict["N_95"] = N_95
+ out_dict["pls"] = plant_library_sizes
+
+ elseif command_args["M"] == 9
+ tool_info["mode"] = "BioCCP_γx3"
+ tool_info["mode_description"] = "Computes the expected coverage of all "*
+ "triple combinations of gene knockouts (E[γx,3]) for an experiment with "*
+ "given plant library size using BioCCP"
+
+ if s != nothing && MN != nothing
+ plant_library_sizes = N:s:MN
+ else
+ plant_library_sizes = N
+ end
+ exp_cov = []
+ global N_95 = nothing
+ for N in plant_library_sizes
+ E_cov = BioCCP_γₓ₃(x, N, g, r, t, f, e, E)
+ push!(exp_cov, E_cov)
+ if E_cov < 0.95
+ N_95 = N
+ end
+ end
+ # E_sim, sd_sim = BioCCP_γₓ₃(x, N, g, r, t, f, e, E)
+ out_dict["E_cov"] = exp_cov
+ out_dict["N_95"] = N_95
+ out_dict["pls"] = plant_library_sizes
+
+ end
+ end
+ end
+
+ println(parsed_args)
+ println("Parsed args:")
+ for (key,val) in parsed_args
+ println(" $key => $(repr(val))")
+ end
+ println()
+ println("Command: ", parsed_args["%COMMAND%"])
+ # h1 = histogram(grna_dict["p_gRNA_reads"], label="",
+ # xlabel="Number of reads per gRNA",
+ # linecolor="white",
+ # normalize=:probability,
+ # xtickfontsize=10,ytickfontsize=10,
+ # color=:mediumturquoise, size=(600,350), bins = 25,
+ # ylabel="Relative frequency",
+ # title="gRNA frequency distribution")
+
+ # h2 = histogram(grna_dict["p_gRNA_edit"],
+ # normalize = :probability,
+ # linecolor = "white",
+ # label="",
+ # color=:turquoise4,
+ # xtickfontsize=10,ytickfontsize=10, xlim = (0, 1),
+ # xticks=(0:0.1:1),
+ # bins = 150,
+ # xlabel="gRNA editing efficiency",
+ # ylabel="Relative frequency",
+ # title="gRNA genome editing effiency distribution")
+
+ # p_plot = plot(plant_library_sizes, Pₓ₂, label="Pₓ₂",
+ # title="Probability of full combinatorial coverage with respect to plant library size",
+ # xlabel="N", ylabel="Pₓₖ",
+ # xticks = (0:500:50000, string.(0:500:50000)),
+ # size=(900,400), color=:turquoise4, linewidth=2)
+ # hline!([0.95], linestyle=:dash, color=:grey, label="Pₓₖ = 0.95", legend=:bottomright)
+
+ # exp_plot = plot(plant_library_sizes, expected_γₓ₂,
+ # label="E[γₓ₂]", title="Expected combinatorial coverage w.r.t. plant library size",
+ # xlabel="N", ylabel="E[γₓₖ]",
+ # xticks = (0:500:50000, string.(0:500:50000)),
+ # size=(800,400), color=:turquoise4, linewidth=2)
+ # hline!([0.95], linestyle=:dash, color=:grey, label="E[γₓₖ] = 0.95", legend=:bottomright)
+
+
+ ENV["GKSwstype"]="nul"
+ weave(string(@__DIR__) * "/report.jmd",
+ args = (parsed_args = parsed_args,
+ tool_info = tool_info,
+ args_info = args_info,
+ grna_dict = grna_dict,
+ #h1 = h1, h2 = h2,
+ output = out_dict);
+ doctype = "md2html", out_path = :pwd)
+ ENV["GKSwstype"]="gksqt"
+end
+
+main(ARGS)
diff -r 000000000000 -r cc0957c46408 mcdoe.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mcdoe.xml Thu May 12 17:39:18 2022 +0000
@@ -0,0 +1,341 @@
+
Method: BioCCP
+Description: BioCCP-based approaches for computing the minimal plant library size that guarantees full combinatorial coverage (and other relevant statistics)
+Mode: BioCCP_γx3
+Mode description: Computes the expected coverage of all triple combinations of gene knockouts (E[γx,3]) for an experiment with given plant library size using BioCCP
+10 rows × 2 columns
Argument | Value | |
---|---|---|
Any | Any | |
1 | # of target genes in the experiment | 20 |
2 | # of gRNAs designed per target gene | 6 |
3 | # of gRNAs / combi gRNA/Cas construct | 3 |
4 | Total number of gRNAs | 120 |
5 | Relative frequencies for all gRNAs | example_data.xlsx |
6 | Genome editing efficiencies for all gRNAs | example_data.xlsx |
7 | Global knockout efficiency | 0.8 |
8 | Step size | 5 |
9 | Maximum Plant library size | 6000 |
10 | Plant library size | 0 |
+At a given number of plants, what is the expected coverage of pairwise gene + knockout combinations? +N_95: 6000 ++ + + + +
Method: gRNA_ edit _distribution
+Description: Generates vector with genome editing efficiencies for all the gRNAs in the experiment
+Mode:
+Mode description:
+6 rows × 2 columns
Argument | Value | |
---|---|---|
Any | Any | |
1 | Fraction of active gRNAs | 0.9 |
2 | Average genome editing efficiency of active gRNAs | 0.95 |
3 | Average genome editing efficiency of inactive gRNAs | 0.1 |
4 | Standard deviation | 0.01 |
5 | Total number of gRNAs | 120 |
6 | Plot genome editing efficiency | false |
+Output written to: +gRNA_edit.xlsx ++ + + +
Method: gRNA_ frequency _distribution
+Description: Generates vector with frequencies in the combinatorial gRNA/Cas9 construct library for all gRNAs
+Mode:
+Mode description:
+7 rows × 2 columns
Argument | Value | |
---|---|---|
Any | Any | |
1 | Plant library size | 75 |
2 | SD on the gRNA abundances | 25 |
3 | Minimal gRNA abundance | 50 |
4 | Maximal gRNA abundance | 100 |
5 | Total number of gRNAs | 120 |
6 | Convert gRNA abundances to relative frequencies | false |
7 | Plot gRNA abundances | false |
+Output written to: +gRNA_reads.xlsx ++ + + +
Method: simulation
+Description: simulation-based approaches for computing the minimal plant library size that guarantees full combinatorial coverage (and other relevant statistics)
+Mode: simulateNx2countKOs
+Mode description: Counts of the number of knockouts per plant in the experiment
+8 rows × 2 columns
Argument | Value | |
---|---|---|
Any | Any | |
1 | # of target genes in the experiment | 20 |
2 | # of gRNAs designed per target gene | 6 |
3 | # of gRNAs / combi gRNA/Cas construct | 2 |
4 | Total number of gRNAs | 120 |
5 | Relative frequencies for all gRNAs | example_data.xlsx |
6 | Genome editing efficiencies for all gRNAs | example_data.xlsx |
7 | Global knockout efficiency | 0.8 |
8 | # of simulated experiments | 10 |
+Output written to: +countKOs.xlsx ++ + + +