# HG changeset patch # User padge # Date 1652377158 0 # Node ID cc0957c46408f0af1f06f6ad73b192001188202a "planemo upload for repository https://github.com/kirstvh/MultiplexCrisprDOE commit b6c1b1860eee82b06ed4a592d1f9eee6886be318-dirty" diff -r 000000000000 -r cc0957c46408 MultiplexCrisprDOE.jl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MultiplexCrisprDOE.jl Thu May 12 17:39:18 2022 +0000 @@ -0,0 +1,1048 @@ +""" + gRNA_frequency_distribution(m, + sd, + l, + u, + n_gRNA_total; + normalize = true, + visualize = false) + +Generates vector with frequencies in the combinatorial gRNA/Cas9 construct library for all gRNAs + +***INPUT*** +m: the average abundance of the gRNAs (in terms of absolute or relative frequency) +sd: the standard deviation on the gRNA abundances (in terms of absolute or relative frequency) +l: minimal gRNA abundance (in terms of absolute or relative frequency) +u: maximal gRNA abundance (in terms of absolute or relative frequency) +n_gRNA_total: the total number of gRNAs in the experiment +normalize: if set to "true", the gRNA abundances (absolute frequencies) are converted into relative frequencies +visualize: if set to "true", a histogram of all gRNA abundances is plotted + +***OUTPUT*** +p_gRNA_freq: vector with frequencies for all gRNAs in the construct library +""" +function gRNA_frequency_distribution(m, + sd, + l, + u, + n_gRNA_total; + normalize = true, + visualize = false) + + d_gRNA_freq = truncated(Normal(m, sd), l, u) # gRNA frequency distribution + p_gRNA_freq = collect(rand(d_gRNA_freq, n_gRNA_total)) # sample gRNA frequencies from distribution + + if normalize # convert into relative frequencies + p_gRNA_freq /= sum(p_gRNA_freq) + end + + if visualize + return histogram(p_gRNA_freq, label="", + xlabel="Number of reads per gRNA", + linecolor="white", + normalize=:probability, + xtickfontsize=10,ytickfontsize=10, + color=:mediumturquoise, size=(600,350), bins = 25, + ylabel="Relative frequency", + title="gRNA frequency distribution") + else + return p_gRNA_freq + end +end + +""" + gRNA_edit_distribution(f_act, + ϵ_edit_act, + ϵ_edit_inact, + sd_act, + n_gRNA_total; + visualize = false) + +Generates vector with genome editing efficiencies for all the gRNAs in the experiment. + +***INPUT*** +f_act: fraction of all gRNAs that is active +ϵ_edit_act: Average genome editing efficiency for active gRNAs - mean of the genome editing efficiency distribution for active gRNAs +ϵ_edit_inact: Average genome editing efficiency for inactive gRNAs - mean of the genome editing efficiency distribution for inactive gRNAs +sd_act: standard deviation of the genome editing efficiency distributions for active and inactive gRNAs +n_gRNA_total: the total number of gRNAs in the experiment +visualize: if set to "true", a histogram of all genome editing efficiency is plotted + +***OUTPUT*** +p_gRNA_edit: vector with genome editing efficiencies for all gRNAs +""" +function gRNA_edit_distribution(f_act, ϵ_edit_act, ϵ_edit_inact, sd_act, n_gRNA_total; visualize=false) + d_act = Binomial(1, f_act) # there is a probability f_act that a gRNA is active + d_high_act = truncated(Normal(ϵ_edit_act, sd_act), 0.01, 1) # average genome editing efficiency for active gRNAs is equal to ϵ_edit_act + d_low_act = truncated(Normal(ϵ_edit_inact, sd_act), 0.01, 1) # average genome editing efficiency for inactive gRNAs is equal to ϵ_edit_inact + p_gRNA_edit = zeros(n_gRNA_total) # initialize vector with genome editing efficiencies for gRNAs + + for i in 1:n_gRNA_total + if rand(d_act, 1) == [1] # gRNA is active + p_gRNA_edit[i] = rand(d_high_act, 1)[1] + else # gRNA is inactive + p_gRNA_edit[i] = rand(d_low_act, 1)[1] + end + end + + if visualize + return histogram(p_gRNA_edit, + normalize = :probability, + linecolor = "white", + label="", + color=:turquoise4, + xtickfontsize=10,ytickfontsize=10, xlim = (0, 1), + xticks=(0:0.1:1), + bins = 150, + xlabel="gRNA editing efficiency", + ylabel="Relative frequency", + title="gRNA genome editing effiency distribution") + + else + return p_gRNA_edit + end +end + +""" + simulate_Nₓ₁(x, + g, + r, + n_gRNA_total, + p_gRNA_freq, + p_gRNA_edit, + ϵ_KO; + iter = 500) + +Computes the expected value and the standard deviation of the minimal plant library size for full coverage of all single gene knockouts (E[Nx,1] and σ[Nx,1]) using simulation + +***INPUT*** +x: number of target genes in the experiment +g: number of gRNAs designed per target gene +r: number of gRNA sequences per combinatorial gRNA/Cas construct +n_gRNA_total: total number of gRNAs in the experiment +p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!) +p_gRNA_edit: vector with genome editing efficiencies for all gRNAs +ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout +iter: number of CRISPR/Cas experiments that are simulated to obtain E[Nₓ₁] and σ[Nₓ₁] + +***OUTPUT*** +E_Nₓ₁: expected value of the plant library size for full coverage of all single gene knockouts +sd_Nₓ₁: standard deviation on the plant library size for full coverage of all single gene knockouts +""" +function simulate_Nₓ₁(x, + g, + r, + n_gRNA_total, + p_gRNA_freq, + p_gRNA_edit, + ϵ_KO; + iter = 500) + + @assert x * g == n_gRNA_total + Nₓ₁_vec = [] #stores number of plants to reach full coverage for each simulated experiment + for i in 1:iter + genes_vec = [] # Initialize vector to store single gene knockouts that are observed in plants + Nₓ₁ = 0 + while genes_vec != collect(1:x) # check if all possible single gene knockouts are present: if no full coverage, sample an additional plant + Nₓ₁ += 1 # count how many plants must be sampled to observe all single gene knockouts + + # sample combinatorial gRNA/Cas9 construct + gRNA_indices_construct = findall((rand(Multinomial(r, p_gRNA_freq))) .!= 0) + + # execute mutations + gRNA_indices_mutations = [gRNA for gRNA in gRNA_indices_construct if rand(Binomial(1, p_gRNA_edit[gRNA])) == 1] + + # effective gene knockout (loss-of-function) ? + gRNA_indices_KO = [gRNA for gRNA in gRNA_indices_mutations if rand(Binomial(1, ϵ_KO)) == 1] + + # which genes are knocked out? + genes_indices_KO = Int.(ceil.(gRNA_indices_KO / g)) + append!(genes_vec, genes_indices_KO) + + # update vector with observed gene knockouts + genes_vec = Int.(sort(unique(genes_vec))) + end + push!(Nₓ₁_vec, Nₓ₁) # add plant library size for full coverage of current experiment to vector + end + + # Calculate expected value and standard deviation + E_Nₓ₁ = mean(Nₓ₁_vec); + sd_Nₓ₁ = std(Nₓ₁_vec) + + return E_Nₓ₁, sd_Nₓ₁ +end + + +""" + BioCCP_Nₓ₁(x, + g, + r, + n_gRNA_total, + p_gRNA_freq, + p_gRNA_edit, + ϵ_KO) + +Computes the expected value and the standard deviation of the minimal plant library size for +full coverage of all single gene knockouts (E[Nx,1] and σ[Nx,1]) using BioCCP + +***INPUT*** +x: number of target genes in the experiment +g: number of gRNAs designed per target gene +r: number of gRNA sequences per combinatorial gRNA/Cas construct +n_gRNA_total: total number of gRNAs in the experiment +p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!) +p_gRNA_edit: vector with genome editing efficiencies for all gRNAs +ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout + +***OUTPUT*** +E_Nₓ₁ : expected value of the plant library size for full coverage of all single gene knockouts +sd_Nₓ₁ : standard deviation on the plant library size for full coverage of all single gene knockouts +""" +function BioCCP_Nₓ₁(x, + g, + r, + n_gRNA_total, + p_gRNA_freq, + p_gRNA_edit, + ϵ_KO) + + # prepare input for BioCCP functions + p_gRNAs = p_gRNA_freq .* p_gRNA_edit * ϵ_KO # calculate probability for each gRNA to induce effective gene knockout + p_genes = [sum(p_gRNAs[i:i+g-1]) for i in 1:g:n_gRNA_total] # obtain probability of single gene knockout by summing up probability of all corresponding gRNAs to induce effective gene knockout + + # Apply BioCCP functions + E_Nₓ₁ = expectation_minsamplesize(x; p=p_genes, r=r, normalize=false) + sd_Nₓ₁ = std_minsamplesize(x; p=p_genes, r=r, normalize=false) + + return E_Nₓ₁, sd_Nₓ₁ +end + + +""" + simulate_Nₓ₂(x, + g, + r, + n_gRNA_total, + p_gRNA_freq, + p_gRNA_edit, + ϵ_KO; + iter=500) + +Computes the expected value and the standard deviation of the minimal plant library size for full coverage of all pairwise combinations of gene knockouts +in a multiplex CRISPR/Cas experiment (E[Nx,2] and σ[Nx,2]) using simulation + +***INPUT*** +x: number of target genes in the experiment +g: number of gRNAs designed per target gene +r: number of gRNA sequences per combinatorial gRNA/Cas construct +n_gRNA_total: total number of gRNAs in the experiment +p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!) +p_gRNA_edit: vector with genome editing efficiencies for all gRNAs +ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout +iter: number of CRISPR/Cas experiments that are simulated to obtain E[Nₓ₂] and σ[Nₓ₂] + +***OUTPUT*** +E_Nₓ₂ : expected value of the plant library size for full coverage of all pairwise combinations of gene knockouts +sd_Nₓ₂ : standard deviation on the plant library size for full coverage of all pairwise combinations of gene knockouts +""" +function simulate_Nₓ₂(x, + g, + r, + n_gRNA_total, + p_gRNA_freq, + p_gRNA_edit, + ϵ_KO; + iter = 500) + + @assert x * g == n_gRNA_total + Nₓ₂_vec = [] #stores number of plants to reach full coverage of all pairwise combinations of gene knockouts for each simulated experiment + + for i in 1:iter + X_interactions_count = zeros(x, x) # Initialize matrix to count pairwise interactions + Nₓ₂ = 0 + while X_interactions_count != ones(x, x) # check if all pairwise combinations are present + Nₓ₂ += 1 # count how many plants must be sampled to fill pairwise interaction matrix + + # sample combinatorial gRNA/Cas9 construct + gRNA_indices_construct = findall((rand(Multinomial(r, p_gRNA_freq))) .!= 0) + + # execute mutations + gRNA_indices_mutations = [gRNA for gRNA in gRNA_indices_construct if rand(Binomial(1, p_gRNA_edit[gRNA])) == 1] + + # effective gene knockout (loss-of-function) ? + gRNA_indices_KO = [gRNA for gRNA in gRNA_indices_mutations if rand(Binomial(1, ϵ_KO)) == 1] + + # which genes are knocked out? + genes_indices_KO = Int.(ceil.(gRNA_indices_KO / g)) + + # which pairwise combinations are present? + interactions = collect(combinations(genes_indices_KO, 2)) + + # Store represented combinations in matrix + for interaction in interactions + j = interaction[1]; k = interaction[2] + X_interactions_count[j,k] = 1; X_interactions_count[k,j] = 1; X_interactions_count[j,j] = 1; X_interactions_count[k,k] = 1 + end + end + + push!(Nₓ₂_vec, Nₓ₂) + end + + # Calculate expected value and standard deviation + E_Nₓ₂ = mean(Nₓ₂_vec) + sd_Nₓ₂ = std(Nₓ₂_vec) + + return E_Nₓ₂, sd_Nₓ₂ +end + +""" + BioCCP_Nₓ₂(x, + g, + r, + n_gRNA_total, + p_gRNA_freq, + p_gRNA_edit, ϵ_KO) + +Computes the expected value and the standard deviation of the minimal plant library size for full coverage of all pairwise combinations of gene knockouts in a multiplex CRISPR/Cas experiment (E[Nx,2] and σ[Nx,2]) using BioCCP + + ***INPUT*** +x: number of target genes in the experiment +g: number of gRNAs designed per target gene +r: number of gRNA sequences per combinatorial gRNA/Cas construct +n_gRNA_total: total number of gRNAs in the experiment +p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!) +p_gRNA_edit: vector with genome editing efficiencies for all gRNAs +ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout + +***OUTPUT*** +E_Nₓ₂: expected value of the plant library size for full coverage of all pairwise combinations of gene knockouts +sd_Nₓ₂: standard deviation on the plant library size for full coverage of all pairwise combinations of gene knockouts +""" +function BioCCP_Nₓ₂(x, + g, + r, + n_gRNA_total, + p_gRNA_freq, + p_gRNA_edit, ϵ_KO) + + # how many pairwise combinations of gRNAs + ind_combinations_gRNA = collect(combinations(1:n_gRNA_total, 2)) + n_combinations_gRNA = length(ind_combinations_gRNA) + + # calculate probability and activity of gRNA combinations + p_combinations_gRNA_library = zeros(n_combinations_gRNA) + p_combinations_gRNA_act = zeros(n_combinations_gRNA) + for i in 1:n_combinations_gRNA + p_combinations_gRNA_library[i] = p_gRNA_freq[ind_combinations_gRNA[i][1]] * p_gRNA_freq[ind_combinations_gRNA[i][2]] + p_combinations_gRNA_act[i] = p_gRNA_edit[ind_combinations_gRNA[i][1]] * p_gRNA_edit[ind_combinations_gRNA[i][2]] + end + + # normalize probability gRNA combinations + p_combinations_gRNA_library /= sum(p_combinations_gRNA_library) + + # select pairwise gRNA combinations of which each component codes for different gene (goal is to study combinations of knockouts in different genes) + p_combinations_gRNA_library_interest = [] + p_combinations_gRNA_act_interest = [] + ind_combinations_gRNA_interest = [] + for i in 1:n_combinations_gRNA + if ceil(ind_combinations_gRNA[i][1]/g) != ceil(ind_combinations_gRNA[i][2]/g) + push!(p_combinations_gRNA_library_interest, p_combinations_gRNA_library[i]) + push!(p_combinations_gRNA_act_interest, p_combinations_gRNA_act[i]) + push!(ind_combinations_gRNA_interest, ind_combinations_gRNA[i]) + end + end + + n_combinations_gRNA_interest = length(p_combinations_gRNA_library_interest) + p_combinations_gRNA = p_combinations_gRNA_library_interest .* p_combinations_gRNA_act_interest * ϵ_KO^2 + + # sum up probabilities or gRNA combinations for corresponding gene knockout combinations + p_genes_matrix = zeros(x, x) + for i in 1:n_combinations_gRNA_interest + gene1 = Int(ceil(ind_combinations_gRNA_interest[i][1]/g)) + gene2 = Int(ceil(ind_combinations_gRNA_interest[i][2]/g)) + p_genes_matrix[gene1, gene2] += p_combinations_gRNA[i] + end + p_genes = collect([p_genes_matrix[i, j] for j in 2:size(p_genes_matrix, 1) for i in 1:j-1]) + n_combinations_genes = length(p_genes) + combinations_pp = length(collect(combinations(1:r, 2))) + + # Apply BioCCP functions + E_Nₓ₂ = expectation_minsamplesize(n_combinations_genes; p=p_genes, r=combinations_pp, normalize=false) + sd_Nₓ₂ = std_minsamplesize(n_combinations_genes; p=p_genes, r=combinations_pp, normalize=false) + + return E_Nₓ₂, sd_Nₓ₂ +end + +""" + simulate_Nₓ₂_countKOs(x, + g, + r, + n_gRNA_total, + p_gRNA_freq, + p_gRNA_edit, ϵ_KO; iter=100000) + +Counts the number of knockouts per plant in the experiment. + +***INPUT*** +x: number of target genes in the experiment +g: number of gRNAs designed per target gene +r: number of gRNA sequences per combinatorial gRNA/Cas construct +n_gRNA_total: total number of gRNAs in the experiment +p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!) +p_gRNA_edit: vector with genome editing efficiencies for all gRNAs +ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout +iter: number of plants that are sampled + +***OUTPUT*** +n_KOs_vec: vector with the number of knockouts for each plant +""" +function simulate_Nₓ₂_countKOs(x, + g, + r, + n_gRNA_total, + p_gRNA_freq, + p_gRNA_edit, + ϵ_KO; + iter = 100000) + + @assert x * g == n_gRNA_total + + n_KOs_vec = [] + + for j in 1:iter + # sample combinatorial gRNA/Cas9 construct + gRNA_indices_construct = findall((rand(Multinomial(r, p_gRNA_freq))) .!= 0) + + # execute mutations + gRNA_indices_mutations = [gRNA for gRNA in gRNA_indices_construct if rand(Binomial(1, p_gRNA_edit[gRNA])) == 1] + + # effective gene knockout (loss-of-function) ? + gRNA_indices_KO = [gRNA for gRNA in gRNA_indices_mutations if rand(Binomial(1, ϵ_KO)) == 1] + + # which genes are knocked out? + genes_indices_KO = Int.(ceil.(gRNA_indices_KO / g)) + + push!(n_KOs_vec, length(unique((genes_indices_KO)))) + end + + return n_KOs_vec +end + +""" + simulate_Nₓ₃(x, + g, + r, + n_gRNA_total, + p_gRNA_freq, + p_gRNA_edit, + ϵ_KO; + iter=500) + +Computes the expected value and the standard deviation of the minimal plant library size for full coverage of all triple combinations of gene knockouts in +a multiplex CRISPR/Cas experiment (E[Nx,3] and σ[Nx,3]) using simulation + +***INPUT*** +x: number of target genes in the experiment +g: number of gRNAs designed per target gene +r: number of gRNA sequences per combinatorial gRNA/Cas construct +n_gRNA_total: total number of gRNAs in the experiment +p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!) +p_gRNA_edit: vector with genome editing efficiencies for all gRNAs +ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout +iter: number of CRISPR/Cas experiments that are simulated to obtain E[Nₓ₃] and σ[Nₓ₃] + +***OUTPUT*** +E_Nₓ₃: expecteded value of the plant library size for full coverage of all triple combinations of gene knockouts +sd_Nₓ₃: standard deviation on the plant library size for full coverage of all triple combinations of gene knockouts +""" +function simulate_Nₓ₃(x, + g, + r, + n_gRNA_total, + p_gRNA_freq, + p_gRNA_edit, + ϵ_KO; + iter = 500) + + @assert x * g == n_gRNA_total + + Nₓ₃_vec = [] # stores number of plants required for each experiment + + for i in 1:iter + # println("got till here") + X_interactions_count = zeros(x, x, x) # Initialize matrix to count triple interactions + # println(X_interactions_count) + # println(ones(x, x, x)) + Nₓ₃ = 0 + + while X_interactions_count != ones(x, x, x) # check if all triple combinations are present + Nₓ₃ += 1 # count how many plants must be sampled to fill triple interaction matrix + + # sample combinatorial gRNA/Cas9 construct + gRNA_indices_construct = findall((rand(Multinomial(r, p_gRNA_freq))) .!= 0) + + # execute mutations + gRNA_indices_mutations = [gRNA for gRNA in gRNA_indices_construct if rand(Binomial(1, p_gRNA_edit[gRNA])) == 1] + + # effective gene knockout (loss-of-function) ? + gRNA_indices_KO = [gRNA for gRNA in gRNA_indices_mutations if rand(Binomial(1, ϵ_KO)) == 1] + + # which genes are knocked out? + genes_indices_KO = Int.(ceil.(gRNA_indices_KO / g)) + + # which triple combinations are present? + interactions = collect(combinations(genes_indices_KO, 3)) + + # Store represented triple combinations in 3D-matrix + for interaction in interactions + j = interaction[1] + k = interaction[2] + l = interaction[3] + X_interactions_count[j,k,l] = 1 + X_interactions_count[k,j,l] = 1 + X_interactions_count[l,j,k] = 1 + X_interactions_count[l,k,j] = 1 + X_interactions_count[j,l,k] = 1 + X_interactions_count[k,l,j] = 1 + + X_interactions_count[:,l,l] .= 1 + X_interactions_count[:,k,k] .= 1 + X_interactions_count[:,j,j] .= 1 + X_interactions_count[l,:,l] .= 1 + X_interactions_count[k,:,k] .= 1 + X_interactions_count[j,:,j] .= 1 + + X_interactions_count[j,j,:] .= 1 + X_interactions_count[k,k,:] .= 1 + X_interactions_count[l,l,:] .= 1 + end + end + push!(Nₓ₃_vec, Nₓ₃) + end + + # calculate expected value and standard deviation + E_Nₓ₃ = mean(Nₓ₃_vec); sd_Nₓ₃ = std(Nₓ₃_vec) + + return E_Nₓ₃, sd_Nₓ₃ +end + + +""" + BioCCP_Nₓ₃(x, + g, + r, + n_gRNA_total, + p_gRNA_freq, + p_gRNA_edit, + ϵ_KO) + +Computes the expected value and the standard deviation of the minimal plant library size +for full coverage of all triple combinations of gene knockouts in a multiplex CRISPR/Cas experiment (E[Nx,3] and σ[Nx,3]) using BioCCP + +***INPUT*** +x: number of target genes in the experiment +g: number of gRNAs designed per target gene +r: number of gRNA sequences per combinatorial gRNA/Cas construct +n_gRNA_total: total number of gRNAs in the experiment +p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!) +p_gRNA_edit: vector with genome editing efficiencies for all gRNAs +ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout + +***OUTPUT*** +E_Nₓ₃: expecteded value of the plant library size for full coverage of all triple combinations of gene knockouts +sd_Nₓ₃: standard deviation on the plant library size for full coverage of all triple combinations of gene knockouts +""" +function BioCCP_Nₓ₃(x, + g, + r, + n_gRNA_total, + p_gRNA_freq, + p_gRNA_edit, + ϵ_KO) + + # how many triple combinations of gRNAs + ind_combinations_gRNA = collect(combinations(1:n_gRNA_total, 3)) + n_combinations_gRNA = length(ind_combinations_gRNA) + + # calculate probability and activity of triple gRNA combinations + p_combinations_gRNA_library = zeros(n_combinations_gRNA) + p_combinations_gRNA_act = zeros(n_combinations_gRNA) + for i in 1:n_combinations_gRNA + p_combinations_gRNA_library[i] = p_gRNA_freq[ind_combinations_gRNA[i][1]] * p_gRNA_freq[ind_combinations_gRNA[i][2]] * p_gRNA_freq[ind_combinations_gRNA[i][3]] + p_combinations_gRNA_act[i] = p_gRNA_edit[ind_combinations_gRNA[i][1]] * p_gRNA_edit[ind_combinations_gRNA[i][2]] * p_gRNA_edit[ind_combinations_gRNA[i][3]] + end + + # normalize probability gRNA combinations + p_combinations_gRNA_library /= sum(p_combinations_gRNA_library) + + # select triple gRNA combinations of which each component codes for different gene (goal is to study combinations of knockouts in different genes) + p_combinations_gRNA_library_interest = [] + p_combinations_gRNA_act_interest = [] + ind_combinations_gRNA_interest = [] + for i in 1:n_combinations_gRNA + if ceil(ind_combinations_gRNA[i][1]/g) != ceil(ind_combinations_gRNA[i][2]/g) && ceil(ind_combinations_gRNA[i][1]/g) != ceil(ind_combinations_gRNA[i][3]/g) && ceil(ind_combinations_gRNA[i][3]/g) != ceil(ind_combinations_gRNA[i][2]/g) + push!(p_combinations_gRNA_library_interest, p_combinations_gRNA_library[i]) + push!(p_combinations_gRNA_act_interest, p_combinations_gRNA_act[i]) + push!(ind_combinations_gRNA_interest, ind_combinations_gRNA[i]) + end + end + + n_combinations_gRNA_interest = length(p_combinations_gRNA_library_interest) + p_combinations_gRNA = p_combinations_gRNA_library_interest .* p_combinations_gRNA_act_interest * ϵ_KO^3 + + # sum up probabilities or gRNA combinations for corresponding gene knockout combinations + p_genes_matrix = zeros(x, x, x) + for i in 1:n_combinations_gRNA_interest + gene1 = Int(ceil(ind_combinations_gRNA_interest[i][1]/g)) + gene2 = Int(ceil(ind_combinations_gRNA_interest[i][2]/g)) + gene3 = Int(ceil(ind_combinations_gRNA_interest[i][3]/g)) + p_genes_matrix[gene1, gene2, gene3] += p_combinations_gRNA[i] + end + + combinations_genes = collect(combinations(1:x, 3)) + p_genes = [] + for combination in combinations_genes + push!(p_genes, p_genes_matrix[combination[1], combination[2], combination[3]]) + end + + n_combinations_genes = length(p_genes) + combinations_pp = length(collect(combinations(1:r, 3))) + + # apply BioCCP functions + E_Nₓ₃ = expectation_minsamplesize(n_combinations_genes; p=p_genes, r=combinations_pp, normalize=false) + sd_Nₓ₃ = std_minsamplesize(n_combinations_genes; p=p_genes, r=combinations_pp, normalize=false) + + return E_Nₓ₃, sd_Nₓ₃ +end + +""" + BioCCP_Pₓ₁(x, + N, + g, + r, + n_gRNA_total, + p_gRNA_freq, + p_gRNA_edit, + ϵ_KO) + +Computes the probability of full coverage of all single gene knockouts (Px,1) for an experiment with given plant library size using BioCCP + +***INPUT*** +x: number of target genes in the experiment +N: plant library size +g: number of gRNAs designed per target gene +r: number of gRNA sequences per combinatorial gRNA/Cas construct +n_gRNA_total: total number of gRNAs in the experiment +p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!) +p_gRNA_edit: vector with genome editing efficiencies for all gRNAs +ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout + +***OUTPUT*** +Pₓ₁: probability of full coverage of all single gene knockouts + +""" +function BioCCP_Pₓ₁(x, + N, + g, + r, + n_gRNA_total, + p_gRNA_freq, + p_gRNA_edit, + ϵ_KO) + + # prepare input + p_gRNAs = p_gRNA_freq .* p_gRNA_edit * ϵ_KO + p_genes = [sum(p_gRNAs[i:i+g-1]) for i in 1:g:n_gRNA_total] + + # apply BioCCP function + Pₓ₁ = success_probability(x, N; p=p_genes, r=r, normalize=false) + + return Pₓ₁ +end + +""" +BioCCP_γₓ₁(x, + N, + g, + r, + n_gRNA_total, + p_gRNA_freq, + p_gRNA_edit, + ϵ_KO) + +Computes the expected coverage of all single gene knockouts (Px,1) for an experiment with given plant library size using BioCCP + +***INPUT*** +x: number of target genes in the experiment +N: plant library size +g: number of gRNAs designed per target gene +r: number of gRNA sequences per combinatorial gRNA/Cas construct +n_gRNA_total: total number of gRNAs in the experiment +p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!) +p_gRNA_edit: vector with genome editing efficiencies for all gRNAs +ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout + +***OUTPUT*** +γₓ₁: expected coverage of all single gene knockouts +""" +function BioCCP_γₓ₁(x, + N, + g, + r, + n_gRNA_total, + p_gRNA_freq, + p_gRNA_edit, + ϵ_KO) + + p_gRNAs = p_gRNA_freq .* p_gRNA_edit * ϵ_KO + p_genes = [sum(p_gRNAs[i:i+g-1]) for i in 1:g:n_gRNA_total] + + # Apply BioCCP function + γₓ₁ = expectation_fraction_collected(x, N; p=p_genes, r=r, normalize=false) + + return γₓ₁ +end + + +""" + BioCCP_Pₓ₂(x, + N, + g, + r, + n_gRNA_total, + p_gRNA_freq, + p_gRNA_edit, + ϵ_KO) + +Computes the probability of full coverage of all pairwise combinations of gene knockouts (Px,2) for an experiment with given plant library size using BioCCP + +***INPUT*** +x: number of target genes in the experiment +N: plant library size +g: number of gRNAs designed per target gene +r: number of gRNA sequences per combinatorial gRNA/Cas construct +n_gRNA_total: total number of gRNAs in the experiment +p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!) +p_gRNA_edit: vector with genome editing efficiencies for all gRNAs +ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout + +***OUTPUT*** +Pₓ₂: probability of full coverage of all pairwise combinations of gene knockouts +""" +function BioCCP_Pₓ₂(x, + N, + g, + r, + n_gRNA_total, + p_gRNA_freq, + p_gRNA_edit, + ϵ_KO) + + # how many pairwise combinations of gRNAs + ind_combinations_gRNA = collect(combinations(1:n_gRNA_total, 2)) + n_combinations_gRNA = length(ind_combinations_gRNA) + + # calculate probability and activity of gRNA combinations + p_combinations_gRNA_library = zeros(n_combinations_gRNA) + p_combinations_gRNA_act = zeros(n_combinations_gRNA) + for i in 1:n_combinations_gRNA + p_combinations_gRNA_library[i] = p_gRNA_freq[ind_combinations_gRNA[i][1]] * p_gRNA_freq[ind_combinations_gRNA[i][2]] + p_combinations_gRNA_act[i] = p_gRNA_edit[ind_combinations_gRNA[i][1]] * p_gRNA_edit[ind_combinations_gRNA[i][2]] + end + + # normalize probability gRNA combinations + p_combinations_gRNA_library /= sum(p_combinations_gRNA_library) + + # select pairwise gRNA combinations of which each component codes for different gene (goal is to study combinations of knockouts in different genes) + p_combinations_gRNA_library_interest = [] + p_combinations_gRNA_act_interest = [] + ind_combinations_gRNA_interest = [] + for i in 1:n_combinations_gRNA + if ceil(ind_combinations_gRNA[i][1]/g) != ceil(ind_combinations_gRNA[i][2]/g) + push!(p_combinations_gRNA_library_interest, p_combinations_gRNA_library[i]) + push!(p_combinations_gRNA_act_interest, p_combinations_gRNA_act[i]) + push!(ind_combinations_gRNA_interest, ind_combinations_gRNA[i]) + end + end + + n_combinations_gRNA_interest = length(p_combinations_gRNA_library_interest) + p_combinations_gRNA = p_combinations_gRNA_library_interest .* p_combinations_gRNA_act_interest * ϵ_KO^2 + + # sum up probabilities or gRNA combinations for corresponding gene knockout combinations + p_genes_matrix = zeros(x, x) + for i in 1:n_combinations_gRNA_interest + gene1 = Int(ceil(ind_combinations_gRNA_interest[i][1]/g)) + gene2 = Int(ceil(ind_combinations_gRNA_interest[i][2]/g)) + p_genes_matrix[gene1, gene2] += p_combinations_gRNA[i] + end + + p_genes = collect([p_genes_matrix[i, j] for j in 2:size(p_genes_matrix, 1) for i in 1:j-1]) + n_combinations_genes = length(p_genes) + combinations_pp = length(collect(combinations(1:r, 2))) + + # Apply BioCCP function + Pₓ₂ = success_probability(n_combinations_genes, N; p=p_genes, r=combinations_pp, normalize=false) + + return Pₓ₂ +end + +""" + BioCCP_γₓ₂(x, + N, + g, + r, + n_gRNA_total, + p_gRNA_freq, + p_gRNA_edit, + ϵ_KO) + +Computes the expected coverage of all pairwise combinations of gene knockouts (γx,2) for an experiment with given plant library size using BioCCP + +***INPUT*** +x: number of target genes in the experiment +N: plant library size +g: number of gRNAs designed per target gene +r: number of gRNA sequences per combinatorial gRNA/Cas construct +n_gRNA_total: total number of gRNAs in the experiment +p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!) +p_gRNA_edit: vector with genome editing efficiencies for all gRNAs +ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout + +***OUTPUT*** +γₓ₂: expected coverage of all pairwise combinations of gene knockouts +""" +function BioCCP_γₓ₂(x, + N, + g, + r, + n_gRNA_total, + p_gRNA_freq, + p_gRNA_edit, + ϵ_KO) + + # how many pairwise combinations of gRNAs + ind_combinations_gRNA = collect(combinations(1:n_gRNA_total, 2)) + n_combinations_gRNA = length(ind_combinations_gRNA) + + # calculate probability and activity of gRNA combinations + p_combinations_gRNA_library = zeros(n_combinations_gRNA) + p_combinations_gRNA_act = zeros(n_combinations_gRNA) + for i in 1:n_combinations_gRNA + p_combinations_gRNA_library[i] = p_gRNA_freq[ind_combinations_gRNA[i][1]] * p_gRNA_freq[ind_combinations_gRNA[i][2]] + p_combinations_gRNA_act[i] = p_gRNA_edit[ind_combinations_gRNA[i][1]] * p_gRNA_edit[ind_combinations_gRNA[i][2]] + end + + # normalize probability gRNA combinations + p_combinations_gRNA_library /= sum(p_combinations_gRNA_library) + + # select pairwise gRNA combinations of which each component codes for different gene (goal is to study combinations of knockouts in different genes) + p_combinations_gRNA_library_interest = [] + p_combinations_gRNA_act_interest = [] + ind_combinations_gRNA_interest = [] + for i in 1:n_combinations_gRNA + if ceil(ind_combinations_gRNA[i][1]/g) != ceil(ind_combinations_gRNA[i][2]/g) + push!(p_combinations_gRNA_library_interest, p_combinations_gRNA_library[i]) + push!(p_combinations_gRNA_act_interest, p_combinations_gRNA_act[i]) + push!(ind_combinations_gRNA_interest, ind_combinations_gRNA[i]) + end + end + + n_combinations_gRNA_interest = length(p_combinations_gRNA_library_interest) + p_combinations_gRNA = p_combinations_gRNA_library_interest .* p_combinations_gRNA_act_interest * ϵ_KO^2 + + # sum up probabilities or gRNA combinations for corresponding gene knockout combinations + p_genes_matrix = zeros(x, x) + for i in 1:n_combinations_gRNA_interest + gene1 = Int(ceil(ind_combinations_gRNA_interest[i][1]/g)) + gene2 = Int(ceil(ind_combinations_gRNA_interest[i][2]/g)) + p_genes_matrix[gene1, gene2] += p_combinations_gRNA[i] + end + + p_genes = collect([p_genes_matrix[i, j] for j in 2:size(p_genes_matrix, 1) for i in 1:j-1]) + n_combinations_genes = length(p_genes) + combinations_pp = length(collect(combinations(1:r, 2))) + + # Apply BioCCP function + γₓ₂ = expectation_fraction_collected(n_combinations_genes, N; p=p_genes, r=combinations_pp, normalize=false) + + return γₓ₂ +end + + +""" + BioCCP_γₓ₃(x, + N, + g, + r, + n_gRNA_total, + p_gRNA_freq, + p_gRNA_edit, + ϵ_KO) + +Computes the expected coverage of all triple combinations of gene knockouts (γx,3) for an experiment with given plant library size using BioCCP + +***INPUT*** +x: number of target genes in the experiment +N: plant library size +g: number of gRNAs designed per target gene +r: number of gRNA sequences per combinatorial gRNA/Cas construct +n_gRNA_total: total number of gRNAs in the experiment +p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!) +p_gRNA_edit: vector with genome editing efficiencies for all gRNAs +ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout + +***OUTPUT*** +γₓ₃: expected coverage of all triple combinations of gene knockouts +""" +function BioCCP_γₓ₃(x, + N, + g, + r, + n_gRNA_total, + p_gRNA_freq, + p_gRNA_edit, + ϵ_KO) + + # how many triple combinations of gRNAs + ind_combinations_gRNA = collect(combinations(1:n_gRNA_total, 3)) + n_combinations_gRNA = length(ind_combinations_gRNA) + + # calculate probability and activity of triple gRNA combinations + p_combinations_gRNA_library = zeros(n_combinations_gRNA) + p_combinations_gRNA_act = zeros(n_combinations_gRNA) + for i in 1:n_combinations_gRNA + p_combinations_gRNA_library[i] = p_gRNA_freq[ind_combinations_gRNA[i][1]] * p_gRNA_freq[ind_combinations_gRNA[i][2]] * p_gRNA_freq[ind_combinations_gRNA[i][3]] + p_combinations_gRNA_act[i] = p_gRNA_edit[ind_combinations_gRNA[i][1]] * p_gRNA_edit[ind_combinations_gRNA[i][2]] * p_gRNA_edit[ind_combinations_gRNA[i][3]] + end + + # normalize probability gRNA combinations + p_combinations_gRNA_library /= sum(p_combinations_gRNA_library) + + # select triple gRNA combinations of which each component codes for different gene (goal is to study combinations of knockouts in different genes) + p_combinations_gRNA_library_interest = [] + p_combinations_gRNA_act_interest = [] + ind_combinations_gRNA_interest = [] + for i in 1:n_combinations_gRNA + if ceil(ind_combinations_gRNA[i][1]/g) != ceil(ind_combinations_gRNA[i][2]/g) && ceil(ind_combinations_gRNA[i][1]/g) != ceil(ind_combinations_gRNA[i][3]/g) && ceil(ind_combinations_gRNA[i][3]/g) != ceil(ind_combinations_gRNA[i][2]/g) + push!(p_combinations_gRNA_library_interest, p_combinations_gRNA_library[i]) + push!(p_combinations_gRNA_act_interest, p_combinations_gRNA_act[i]) + push!(ind_combinations_gRNA_interest, ind_combinations_gRNA[i]) + end + end + + n_combinations_gRNA_interest = length(p_combinations_gRNA_library_interest) + p_combinations_gRNA = p_combinations_gRNA_library_interest .* p_combinations_gRNA_act_interest * ϵ_KO^3 + + # sum up probabilities or gRNA combinations for corresponding gene knockout combinations + p_genes_matrix = zeros(x, x, x) + for i in 1:n_combinations_gRNA_interest + gene1 = Int(ceil(ind_combinations_gRNA_interest[i][1]/g)) + gene2 = Int(ceil(ind_combinations_gRNA_interest[i][2]/g)) + gene3 = Int(ceil(ind_combinations_gRNA_interest[i][3]/g)) + p_genes_matrix[gene1, gene2, gene3] += p_combinations_gRNA[i] + end + + combinations_genes = collect(combinations(1:x, 3)) + p_genes = [] + for combination in combinations_genes + push!(p_genes, p_genes_matrix[combination[1], combination[2], combination[3]]) + end + + n_combinations_genes = length(p_genes) + combinations_pp = length(collect(combinations(1:r, 3))) + + # Apply BioCCP function + γₓ₃ = expectation_fraction_collected(n_combinations_genes, N; p=p_genes, r=combinations_pp, normalize=false) + + return γₓ₃ +end + +""" + BioCCP_Pₓ₃(x, + N, + g, + r, + n_gRNA_total, + p_gRNA_freq, + p_gRNA_edit, + ϵ_KO) + +Computes the probability of full coverage of all triple combinations of gene knockouts (Px,3) for an experiment with given plant library size using BioCCP + +***INPUT*** +x: number of target genes in the experiment +N: plant library size +g: number of gRNAs designed per target gene +r: number of gRNA sequences per combinatorial gRNA/Cas construct +n_gRNA_total: total number of gRNAs in the experiment +p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!) +p_gRNA_edit: vector with genome editing efficiencies for all gRNAs +ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout + +***OUTPUT*** +Pₓ₃: probability of full coverage of all triple combinations of gene knockouts +""" +function BioCCP_Pₓ₃(x, + N, + g, + r, + n_gRNA_total, + p_gRNA_freq, + p_gRNA_edit, + ϵ_KO) + + # how many triple combinations of gRNAs + ind_combinations_gRNA = collect(combinations(1:n_gRNA_total, 3)) + n_combinations_gRNA = length(ind_combinations_gRNA) + + # calculate probability and activity of triple gRNA combinations + p_combinations_gRNA_library = zeros(n_combinations_gRNA) + p_combinations_gRNA_act = zeros(n_combinations_gRNA) + for i in 1:n_combinations_gRNA + p_combinations_gRNA_library[i] = p_gRNA_freq[ind_combinations_gRNA[i][1]] * p_gRNA_freq[ind_combinations_gRNA[i][2]] * p_gRNA_freq[ind_combinations_gRNA[i][3]] + p_combinations_gRNA_act[i] = p_gRNA_edit[ind_combinations_gRNA[i][1]] * p_gRNA_edit[ind_combinations_gRNA[i][2]] * p_gRNA_edit[ind_combinations_gRNA[i][3]] + end + + # normalize probability gRNA combinations + p_combinations_gRNA_library /= sum(p_combinations_gRNA_library) + + # select triple gRNA combinations of which each component codes for different gene (goal is to study combinations of knockouts in different genes) + p_combinations_gRNA_library_interest = [] + p_combinations_gRNA_act_interest = [] + ind_combinations_gRNA_interest = [] + for i in 1:n_combinations_gRNA + if ceil(ind_combinations_gRNA[i][1]/g) != ceil(ind_combinations_gRNA[i][2]/g) && ceil(ind_combinations_gRNA[i][1]/g) != ceil(ind_combinations_gRNA[i][3]/g) && ceil(ind_combinations_gRNA[i][3]/g) != ceil(ind_combinations_gRNA[i][2]/g) + push!(p_combinations_gRNA_library_interest, p_combinations_gRNA_library[i]) + push!(p_combinations_gRNA_act_interest, p_combinations_gRNA_act[i]) + push!(ind_combinations_gRNA_interest, ind_combinations_gRNA[i]) + end + end + + n_combinations_gRNA_interest = length(p_combinations_gRNA_library_interest) + p_combinations_gRNA = p_combinations_gRNA_library_interest .* p_combinations_gRNA_act_interest * ϵ_KO^3 + + # sum up probabilities or gRNA combinations for corresponding gene knockout combinations + p_genes_matrix = zeros(x, x, x) + for i in 1:n_combinations_gRNA_interest + gene1 = Int(ceil(ind_combinations_gRNA_interest[i][1]/g)) + gene2 = Int(ceil(ind_combinations_gRNA_interest[i][2]/g)) + gene3 = Int(ceil(ind_combinations_gRNA_interest[i][3]/g)) + p_genes_matrix[gene1, gene2, gene3] += p_combinations_gRNA[i] + end + + combinations_genes = collect(combinations(1:x, 3)) + p_genes = [] + for combination in combinations_genes + push!(p_genes, p_genes_matrix[combination[1], combination[2], combination[3]]) + end + + n_combinations_genes = length(p_genes) + combinations_pp = length(collect(combinations(1:r, 3))) + + # Apply BioCCP function + Pₓ₃ = success_probability(n_combinations_genes, N; p=p_genes, r=combinations_pp, normalize=false) + + return Pₓ₃ +end + + \ No newline at end of file diff -r 000000000000 -r cc0957c46408 README.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.rst Thu May 12 17:39:18 2022 +0000 @@ -0,0 +1,4 @@ +MultiplexCrisprDOE +================== +provides simulation- and BioCCP-based approaches for computing the minimal plant library size +that guarantees full combinatorial coverage (and other relevant statistics) for multiplex CRISPR/Cas experiments in plants. \ No newline at end of file diff -r 000000000000 -r cc0957c46408 main.jl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/main.jl Thu May 12 17:39:18 2022 +0000 @@ -0,0 +1,612 @@ +using Pkg +Pkg.add("Plots"); +Pkg.add("Distributions"); +Pkg.add("LinearAlgebra"); +Pkg.add("Combinatorics"); +Pkg.add("BioCCP"); +Pkg.add("ArgParse"); +Pkg.add("XLSX"); +Pkg.add("DataFrames"); +Pkg.add("Weave"); +Pkg.add("DataStructures"); +Pkg.add("PrettyTables"); + +using Random +using Plots +using Distributions +using LinearAlgebra +using Combinatorics +using BioCCP +using ArgParse +using XLSX +using DataFrames +using Weave +using DataStructures +using PrettyTables + +global current_dir = pwd() +include("MultiplexCrisprDOE.jl"); + +function main(args) + + aps = ArgParseSettings("MultiplexCrisprDOE") + + @add_arg_table! aps begin + "gfd" #, "gRNA_freq_dist" + action = :command # adds a command which will be read from an argument + help = "gRNA/Cas9 frequencies" + "ged" #, "gRNA_edit_dist" + action = :command + help = "gRNA/Cas9 editing efficiencies" + "sim" # simulation + action = :command + help = "simulation-based approaches for computing the minimal plant library size that guarantees full combinatorial coverage (and other relevant statistics)" + "ccp" # bioccp + action = :command + help = "BioCCP-based approaches for computing the minimal plant library size that guarantees full combinatorial coverage (and other relevant statistics)" + end + + @add_arg_table! aps["gfd"] begin # add command arg_table: same as usual, but invoked on s["grna"] + "m" + arg_type = Int + help = "plant library size" + "sd" + arg_type = Int + help = "the standard deviation on the gRNA abundances (in terms of absolute or relative frequency)" + "l" + arg_type = Int + help = "minimal gRNA abundance (in terms of absolute or relative frequency)" + "u" + arg_type = Int + help = "maximal gRNA abundance (in terms of absolute or relative frequency)" + "n" #, "--n_gRNA_total" + arg_type = Int + help = "the total number of gRNAs in the experiment" + "--normalize" + action = :store_true + # arg_type = Bool + # default = true + help = "if provided, the gRNA abundances (absolute frequencies) are converted into relative frequencies" + "--visualize" + action = :store_true + # arg_type = Bool + # default = false + help = "if provided, a histogram of all gRNA abundances is plotted" + "--out_file" + arg_type = String + default = "gRNA_reads" + help = "Output excel file prefix" + end + + @add_arg_table! aps["ged"] begin # add command arg_table: same as usual, but invoked on s["grna"] + "f_act" + arg_type = Float16 + help = "fraction of all gRNAs that is active" + "eps_edit_act" + arg_type = Float16 + help = "Average genome editing efficiency for active gRNAs - mean of the genome editing efficiency distribution for active gRNAs" + "eps_edit_inact" + arg_type = Float16 + help = "Average genome editing efficiency for inactive gRNAs - mean of the genome editing efficiency distribution for inactive gRNAs" + "sd_act" + arg_type = Float16 + help = "standard deviation of the genome editing efficiency distributions for active and inactive gRNAs" + "n_gRNA_total" + arg_type = Int + help = "the total number of gRNAs in the experiment" + "--visualize" + action = :store_true + # arg_type = Bool + # default = false + help = "if provided a histogram of all genome editing efficiency is plotted" + "--out_file" + arg_type = String + default = "gRNA_edit" + help = "Output excel file prefix" + end + + @add_arg_table! aps["sim"] begin + "M" #, "--mode" + # action = :command + # dest_name = "M" + arg_type = Int + range_tester = x -> 1 <= x <= 4 + help = """Select simulation mode (1: simulate_Nₓ₁; 2: simulate_Nₓ₂; 3: simulate_Nₓ₃; 4: simulate_Nₓ₂_countKOs)""" + "x" + arg_type = Int + help = "number of target genes in the experiment" + "g" + arg_type = Int + help = "number of gRNAs designed per target gene" + "r" + arg_type = Int + help = "number of gRNA sequences per combinatorial gRNA/Cas construct" + "t"#, "--n_gRNA_total" + arg_type = Int + help = "total number of gRNAs in the experiment" + "f"#, "--p_gRNA_freq" + arg_type = String #Vector{Float64} + help = "vector with relative frequencies for all gRNAs in the construct library (normalized!)" + "e"#, "--p_gRNA_edit" + arg_type = String #Vector{Float64} + help = "vector with genome editing efficiencies for all gRNAs" + "E"#, "--ϵ_KO" + arg_type=Float16 + help = "global knockout efficiency; fraction of mutations leading to effective gene knockout" + "--i", "--iter" + arg_type = Int + default = 500 + help = "number of CRISPR/Cas experiments that are simulated" + end + + @add_arg_table! aps["ccp"] begin + "M"#, "--mode" + arg_type = Int + range_tester = x -> 1 <= x <= 9 + help = """Select BioCCP mode (1: BioCCP_Nₓ₁; 2: BioCCP_Nₓ₂; 3: BioCCP_Nₓ₃; 4: BioCCP_Pₓ₁; 5: BioCCP_Pₓ₂ ; + 6: BioCCP_Pₓ₃; 7: BioCCP_γₓ₁; 8: BioCCP_γₓ₂; 9: BioCCP_γₓ₃)""" + "x" + arg_type = Int + help = "number of target genes in the experiment" + "N" + arg_type = Int + help = "(Minimum) plant library size" + "--s", "--step" + arg_type = Int + default = 5 + range_tester = x -> 1 <= x <= 10 + help = "Step size for plant library size (optional for calculating expected combinatorial coverage / plant library size)" + "--MN", "--max_pl_size" + arg_type = Int + default = 4000 + help = "Maximum plant library size (optional for calculating expected combinatorial coverage / plant library size)" + "g" + arg_type = Int + help = "number of gRNAs designed per target gene" + "r" + arg_type = Int + help = "number of gRNA sequences per combinatorial gRNA/Cas construct" + "t"#, "--n_gRNA_total" + arg_type = Int + help = "total number of gRNAs in the experiment" + "f"#, "--p_gRNA_freq" + arg_type = String #Vector{Float64} + help = "File containing vector with relative frequencies for all gRNAs in the construct library (normalized!)" + "e"#, "--p_gRNA_edit" + arg_type = String #Vector{Float64} + help = "File containing vector with genome editing efficiencies for all gRNAs" + "E"#, "--ϵ_KO" + arg_type=Float16 + help = "global knockout efficiency; fraction of mutations leading to effective gene knockout" + end + + parsed_args = parse_args(args, aps) + command_args = parsed_args[parsed_args["%COMMAND%"]] + println(command_args) + + tool_info = OrderedDict() + args_info = OrderedDict() + grna_dict = Dict() + out_dict = Dict() + if parsed_args["%COMMAND%"] == "gfd" + tool_info["method"] = "gRNA_ frequency _distribution" + tool_info["description"] = "Generates vector with frequencies in the combinatorial "* + "gRNA/Cas9 construct library for all gRNAs" + tool_info["mode"] = "" + tool_info["mode_description"] = "" + args_info["Plant library size"] = command_args["m"] + args_info["SD on the gRNA abundances"] = command_args["sd"] + args_info["Minimal gRNA abundance"] = command_args["l"] + args_info["Maximal gRNA abundance"] = command_args["u"] + args_info["Total number of gRNAs"] = command_args["n"] + args_info["Convert gRNA abundances to relative frequencies"] = string(command_args["normalize"]) + args_info["Plot gRNA abundances"] = string(command_args["visualize"]) + + m = command_args["m"] + sd = command_args["sd"] + l = command_args["l"] + u = command_args["u"] + n_gRNA_total = command_args["n"] + norm = command_args["normalize"] + viz = command_args["visualize"] + + println(string(norm)) + println(string(viz)) + + p_gRNA_reads = gRNA_frequency_distribution(m, sd, l, u, n_gRNA_total; normalize = norm, visualize = false) + grna_dict["p_gRNA_reads"] = p_gRNA_reads + + # println(p_gRNA_reads) + # write to excel file + fn = command_args["out_file"] * ".xlsx" + labels = ["gRNA_read"] + columns = Vector() + push!(columns, p_gRNA_reads) + XLSX.openxlsx(fn, mode="w") do xf + sheet = xf[1] + XLSX.writetable!(sheet, columns, labels) + end + + out_dict["output file"] = fn + + elseif parsed_args["%COMMAND%"] == "ged" + tool_info["method"] = "gRNA_ edit _distribution" + tool_info["description"] = "Generates vector with genome editing efficiencies "* + "for all the gRNAs in the experiment" + tool_info["mode"] = "" + tool_info["mode_description"] = "" + args_info["Fraction of active gRNAs"] = command_args["f_act"] + args_info["Average genome editing efficiency of active gRNAs"] = command_args["eps_edit_act"] + args_info["Average genome editing efficiency of inactive gRNAs"] = command_args["eps_edit_inact"] + args_info["Standard deviation"] = command_args["sd_act"] + args_info["Total number of gRNAs"] = command_args["n_gRNA_total"] + args_info["Plot genome editing efficiency"] = string(command_args["visualize"]) + + f_act = command_args["f_act"] + eps_edit_act = command_args["eps_edit_act"] + eps_edit_inact = command_args["eps_edit_inact"] + sd_act = command_args["sd_act"] + n_gRNA_total = command_args["n_gRNA_total"] + viz = ["visualize"] + + p_gRNA_edit = gRNA_edit_distribution(f_act, eps_edit_act, eps_edit_inact, sd_act, n_gRNA_total; visualize=false) + grna_dict["p_gRNA_edit"] = p_gRNA_edit + # write to excel file + fn = command_args["out_file"] * ".xlsx" + labels = ["gRNA_edit_efficiency"] + columns = Vector() + push!(columns, p_gRNA_edit) + XLSX.openxlsx(fn, mode="w") do xf + sheet = xf[1] + XLSX.writetable!(sheet, columns, labels) + end + + out_dict["output file"] = fn + + elseif parsed_args["%COMMAND%"] == "sim" || parsed_args["%COMMAND%"] == "ccp" + + filename = command_args["f"] + sheet = 1 + data = DataFrame(XLSX.readtable(filename, sheet)...) + p_gRNA_reads = data[!,"gRNA_read"] + p_gRNA_reads_normalized = p_gRNA_reads/sum(p_gRNA_reads) # normalize + f = p_gRNA_reads_normalized + grna_dict["p_gRNA_reads"] = f + + filename = command_args["e"] + sheet = 1 + data = DataFrame(XLSX.readtable(filename, sheet)...) + p_gRNA_edit = data[!,"gRNA_edit_efficiency"] + e = p_gRNA_edit + grna_dict["p_gRNA_edit"] = e + + x = command_args["x"] + g = command_args["g"] + r = command_args["r"] + t = command_args["t"] # n_gRNA_total + E = command_args["E"] # ϵ_KO # iter = 500 + + args_info["# of target genes in the experiment"] = command_args["x"] + args_info["# of gRNAs designed per target gene"] = command_args["g"] + args_info["# of gRNAs / combi gRNA/Cas construct"] = command_args["r"] + args_info["Total number of gRNAs"] = command_args["t"] + args_info["Relative frequencies for all gRNAs"] = command_args["f"] + args_info["Genome editing efficiencies for all gRNAs"] = command_args["e"] + args_info["Global knockout efficiency"] = command_args["E"] + + if parsed_args["%COMMAND%"] == "sim" + tool_info["method"] = "simulation" + tool_info["description"] = "simulation-based approaches for computing the minimal "* + "plant library size that guarantees full combinatorial "* + "coverage (and other relevant statistics)" + i = command_args["i"] # iter = 500 + args_info["# of simulated experiments"] = command_args["i"] + + if command_args["M"] == 1 + tool_info["mode"] = "simulate_Nx1" + tool_info["mode_description"] = "Computes the expected value and the standard deviation "* + "of the minimal plant library size for full coverage of "* + "all single gene knockouts (E[Nx,1] and σ[Nx,1]) using simulation" + E_sim, sd_sim = simulate_Nₓ₁(x, g, r, t, f, e, E; iter=i) + out_dict["E_sim"] = E_sim + out_dict["sd_sim"] = sd_sim + + elseif command_args["M"] == 2 + tool_info["mode"] = "simulate_Nx2" + tool_info["mode_description"] = "Computes the expected value and the standard deviation of "* + "the minimal plant library size for full coverage of "* + "all pairwise combinations of gene knockouts in a "* + "multiplex CRISPR/Cas experiment (E[Nx,2] and σ[Nx,2]) using simulation" + + E_sim, sd_sim = simulate_Nₓ₂(x, g, r, t, f, e, E; iter=i) + out_dict["E_sim"] = E_sim + out_dict["sd_sim"] = sd_sim + + elseif command_args["M"] == 3 + tool_info["mode"] = "simulate_Nx3" + tool_info["mode_description"] = "Computes the expected value and the standard deviation of "* + "the minimal plant library size for full coverage of "* + "all triple combinations of gene knockouts in a "* + "multiplex CRISPR/Cas experiment (E[Nx,3] and σ[Nx,3]) using simulation" + + E_sim, sd_sim = simulate_Nₓ₃(x, g, r, t, f, e, E; iter=i) + out_dict["E_sim"] = E_sim + out_dict["sd_sim"] = sd_sim + + elseif command_args["M"] == 4 + tool_info["mode"] = "simulate_Nx2_countKOs" + tool_info["mode_description"] = "Counts of the number of knockouts per plant in the experiment" + + n_KOs_vec = simulate_Nₓ₂_countKOs(x, g, r, t, f, e, E; iter=i) + out_dict["n_KOs_vec"] = n_KOs_vec + + # write to excel file + fn = "countKOs.xlsx" + labels = ["countKOs"] + columns = Vector() + push!(columns, n_KOs_vec) + XLSX.openxlsx(fn, mode="w") do xf + sheet = xf[1] + XLSX.writetable!(sheet, columns, labels) + end + + out_dict["output file"] = fn + + end + + elseif parsed_args["%COMMAND%"] == "ccp" + tool_info["method"] = "BioCCP" + tool_info["description"] = "BioCCP-based approaches for computing the minimal "* + "plant library size that guarantees full combinatorial "* + "coverage (and other relevant statistics)" + + N = command_args["N"] + if haskey(command_args,"s") && haskey(command_args,"MN") + s = command_args["s"] + MN = command_args["MN"] + args_info["Step size"] = command_args["s"] + args_info["Maximum Plant library size"] = command_args["MN"] + end + args_info["Plant library size"] = command_args["N"] + + + if command_args["M"] == 1 + tool_info["mode"] = "BioCCP_Nx1" + tool_info["mode_description"] = "Computes the expected value and the standard deviation of "* + "the minimal plant library size for full coverage of all "* + "single gene knockouts (E[Nx,1] and σ[Nx,1]) using BioCCP" + + E_sim, sd_sim = BioCCP_Nₓ₁(x, g, r, t, f, e, E) + out_dict["E_sim"] = E_sim + out_dict["sd_sim"] = sd_sim + + elseif command_args["M"] == 2 + tool_info["mode"] = "BioCCP_Nx2" + tool_info["mode_description"] = "Computes the expected value and the standard deviation of "* + "the minimal plant library size for full coverage of all "* + "pairwise combinations of gene knockouts in a multiplex "* + "CRISPR/Cas experiment (E[Nx,2] and σ[Nx,2]) using BioCCP" + + E_sim, sd_sim = BioCCP_Nₓ₂(x, g, r, t, f, e, E) + out_dict["E_sim"] = E_sim + out_dict["sd_sim"] = sd_sim + + elseif command_args["M"] == 3 + tool_info["mode"] = "BioCCP_Nx3" + tool_info["mode_description"] = "Computes the expected value and the standard deviation of "* + "the minimal plant library size for full coverage of all triple combinations of "* + "gene knockouts in a multiplex CRISPR/Cas experiment (E[Nx,3] and σ[Nx,3]) using BioCCP" + + E_sim, sd_sim = BioCCP_Nₓ₃(x, g, r, t, f, e, E) + out_dict["E_sim"] = E_sim + out_dict["sd_sim"] = sd_sim + + elseif command_args["M"] == 4 + tool_info["mode"] = "BioCCP_Px1" + tool_info["mode_description"] = "Computes the probability of full coverage of "* + "all single gene knockouts (Px,1) for an experiment with given "* + "plant library size using BioCCP" + + if s != nothing && MN != nothing + plant_library_sizes = N:s:MN + else + plant_library_sizes = N + end + PT = [] + global N_95_P = nothing + for N in plant_library_sizes + Pr = BioCCP_Pₓ₁(x, N, g, r, t, f, e, E) + push!(PT, Pr) + if Pr < 0.95 + N_95_P = N + end + end + # P = BioCCP_Pₓ₁(x, N, g, r, t, f, e, E) + out_dict["P_sim"] = PT + out_dict["N_95_P"] = N_95_P + out_dict["pls"] = plant_library_sizes + + elseif command_args["M"] == 5 + tool_info["mode"] = "BioCCP_Px2" + tool_info["mode_description"] = "Computes the probability of full coverage of "* + "all pairwise combinations of gene knockouts (Px,2) "* + "for an experiment with given plant library size using BioCCP" + + if s != nothing && MN != nothing + plant_library_sizes = N:s:MN + else + plant_library_sizes = N + end + PT = [] + global N_95_P = nothing + for N in plant_library_sizes + Pr = BioCCP_Pₓ₂(x, N, g, r, t, f, e, E) + push!(PT, Pr) + if Pr < 0.95 + N_95_P = N + end + end + # P = BioCCP_Pₓ₂(x, N, g, r, t, f, e, E) + out_dict["P_sim"] = PT + out_dict["N_95_P"] = N_95_P + out_dict["pls"] = plant_library_sizes + + elseif command_args["M"] == 6 + tool_info["mode"] = "BioCCP_Px3" + tool_info["mode_description"] = "Computes the probability of full coverage of all "* + "triple combinations of gene knockouts (Px,3) for an experiment "* + "with given plant library size using BioCCP" + + if s != nothing && MN != nothing + plant_library_sizes = N:s:MN + else + plant_library_sizes = N + end + PT = [] + global N_95_P = nothing + for N in plant_library_sizes + Pr = BioCCP_Pₓ₃(x, N, g, r, t, f, e, E) + push!(PT, Pr) + if Pr < 0.95 + N_95_P = N + end + end + # P = BioCCP_Pₓ₃(x, N, g, r, t, f, e, E) + out_dict["P_sim"] = PT + out_dict["N_95_P"] = N_95_P + out_dict["pls"] = plant_library_sizes + + elseif command_args["M"] == 7 + tool_info["mode"] = "BioCCP_γx1" + tool_info["mode_description"] = "Computes the expected coverage of all "* + "single gene knockouts (E[γx,1]) for an experiment "* + "with given plant library size using BioCCP" + if s != nothing && MN != nothing + plant_library_sizes = N:s:MN + else + plant_library_sizes = N + end + exp_cov = [] + global N_95 = nothing + for N in plant_library_sizes + E_cov = BioCCP_γₓ₁(x, N, g, r, t, f, e, E) + push!(exp_cov, E_cov) + if E_cov < 0.95 + N_95 = N + end + end + # E_sim, sd_sim = BioCCP_γₓ₁(x, N, g, r, t, f, e, E) + out_dict["E_cov"] = exp_cov + out_dict["N_95"] = N_95 + out_dict["pls"] = plant_library_sizes + + elseif command_args["M"] == 8 + tool_info["mode"] = "BioCCP_γx2" + tool_info["mode_description"] = "Computes the expected coverage of all "* + "pairwise combinations of gene knockouts (E[γx,2]) for an experiment with "* + "given plant library size using BioCCP" + + if s != nothing && MN != nothing + plant_library_sizes = N:s:MN + else + plant_library_sizes = N + end + exp_cov = [] + global N_95 = nothing + for N in plant_library_sizes + E_cov = BioCCP_γₓ₂(x, N, g, r, t, f, e, E) + push!(exp_cov, E_cov) + if E_cov < 0.95 + N_95 = N + end + end + # E_sim, sd_sim = BioCCP_γₓ₂(x, N, g, r, t, f, e, E) + out_dict["E_cov"] = exp_cov + out_dict["N_95"] = N_95 + out_dict["pls"] = plant_library_sizes + + elseif command_args["M"] == 9 + tool_info["mode"] = "BioCCP_γx3" + tool_info["mode_description"] = "Computes the expected coverage of all "* + "triple combinations of gene knockouts (E[γx,3]) for an experiment with "* + "given plant library size using BioCCP" + + if s != nothing && MN != nothing + plant_library_sizes = N:s:MN + else + plant_library_sizes = N + end + exp_cov = [] + global N_95 = nothing + for N in plant_library_sizes + E_cov = BioCCP_γₓ₃(x, N, g, r, t, f, e, E) + push!(exp_cov, E_cov) + if E_cov < 0.95 + N_95 = N + end + end + # E_sim, sd_sim = BioCCP_γₓ₃(x, N, g, r, t, f, e, E) + out_dict["E_cov"] = exp_cov + out_dict["N_95"] = N_95 + out_dict["pls"] = plant_library_sizes + + end + end + end + + println(parsed_args) + println("Parsed args:") + for (key,val) in parsed_args + println(" $key => $(repr(val))") + end + println() + println("Command: ", parsed_args["%COMMAND%"]) + # h1 = histogram(grna_dict["p_gRNA_reads"], label="", + # xlabel="Number of reads per gRNA", + # linecolor="white", + # normalize=:probability, + # xtickfontsize=10,ytickfontsize=10, + # color=:mediumturquoise, size=(600,350), bins = 25, + # ylabel="Relative frequency", + # title="gRNA frequency distribution") + + # h2 = histogram(grna_dict["p_gRNA_edit"], + # normalize = :probability, + # linecolor = "white", + # label="", + # color=:turquoise4, + # xtickfontsize=10,ytickfontsize=10, xlim = (0, 1), + # xticks=(0:0.1:1), + # bins = 150, + # xlabel="gRNA editing efficiency", + # ylabel="Relative frequency", + # title="gRNA genome editing effiency distribution") + + # p_plot = plot(plant_library_sizes, Pₓ₂, label="Pₓ₂", + # title="Probability of full combinatorial coverage with respect to plant library size", + # xlabel="N", ylabel="Pₓₖ", + # xticks = (0:500:50000, string.(0:500:50000)), + # size=(900,400), color=:turquoise4, linewidth=2) + # hline!([0.95], linestyle=:dash, color=:grey, label="Pₓₖ = 0.95", legend=:bottomright) + + # exp_plot = plot(plant_library_sizes, expected_γₓ₂, + # label="E[γₓ₂]", title="Expected combinatorial coverage w.r.t. plant library size", + # xlabel="N", ylabel="E[γₓₖ]", + # xticks = (0:500:50000, string.(0:500:50000)), + # size=(800,400), color=:turquoise4, linewidth=2) + # hline!([0.95], linestyle=:dash, color=:grey, label="E[γₓₖ] = 0.95", legend=:bottomright) + + + ENV["GKSwstype"]="nul" + weave(string(@__DIR__) * "/report.jmd", + args = (parsed_args = parsed_args, + tool_info = tool_info, + args_info = args_info, + grna_dict = grna_dict, + #h1 = h1, h2 = h2, + output = out_dict); + doctype = "md2html", out_path = :pwd) + ENV["GKSwstype"]="gksqt" +end + +main(ARGS) diff -r 000000000000 -r cc0957c46408 mcdoe.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mcdoe.xml Thu May 12 17:39:18 2022 +0000 @@ -0,0 +1,341 @@ + + + julia + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + tools['tool_selector']=='gfd' + + + tools['tool_selector']=='ged' + + + tools['tool_selector']=='sim' and tools['bioccp_mode_selector']=='4' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +@misc{githubMultiplexCrisprDOE, + author = {LastTODO, FirstTODO}, + year = {TODO}, + title = {MultiplexCrisprDOE}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/kirstvh/MultiplexCrisprDOE}, +} + + \ No newline at end of file diff -r 000000000000 -r cc0957c46408 report.jmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/report.jmd Thu May 12 17:39:18 2022 +0000 @@ -0,0 +1,102 @@ +--- +title: MultiplexCrisprDOE +--- + + +```julia; echo = false; results = "hidden" +using Pkg +"Plots" ∉ keys(Pkg.project().dependencies) && Pkg.add("Plots") +#"DSP" ∉ keys(Pkg.project().dependencies) && Pkg.add("DSP") +#"Images" ∉ keys(Pkg.project().dependencies) && Pkg.add("Images") +"DataStructures" ∉ keys(Pkg.project().dependencies) && Pkg.add("DataStructures") +"PrettyTables" ∉ keys(Pkg.project().dependencies) && Pkg.add("PrettyTables") +"DataFrames" ∉ keys(Pkg.project().dependencies) && Pkg.add("DataFrames") +"Latexify" ∉ keys(Pkg.project().dependencies) && Pkg.add("Latexify") +``` +## Tool + +* **Method:** `j println(WEAVE_ARGS.tool_info["method"])` + +* **Description:** `j println(WEAVE_ARGS.tool_info["description"])` + +* **Mode:** `j println(WEAVE_ARGS.tool_info["mode"])` + +* **Mode description:** `j println(WEAVE_ARGS.tool_info["mode_description"])` + +## Variables + +```julia; echo = false +using DataFrames +using PrettyTables +df = DataFrame("Argument" => collect(keys(WEAVE_ARGS.args_info)), "Value" => collect(values(WEAVE_ARGS.args_info))) +#pt = pretty_table(df, nosubheader=true; alignment=:l) +``` + +```julia; echo = false + using Plots + if haskey(WEAVE_ARGS.grna_dict,"p_gRNA_reads") + h1 = histogram(WEAVE_ARGS.grna_dict["p_gRNA_reads"], label="", + xlabel="Number of reads per gRNA", + linecolor="white", + normalize=:probability, + xtickfontsize=10,ytickfontsize=10, + color=:mediumturquoise, size=(600,350), bins = 25, + ylabel="Relative frequency", + title="gRNA frequency distribution") + display(h1) + end +``` + +```julia; echo = false + using Plots + if haskey(WEAVE_ARGS.grna_dict,"p_gRNA_edit") + h2 = histogram(WEAVE_ARGS.grna_dict["p_gRNA_edit"], + normalize = :probability, + linecolor = "white", + label="", + color=:turquoise4, + xtickfontsize=10,ytickfontsize=10, xlim = (0, 1), + xticks=(0:0.1:1), + bins = 150, + xlabel="gRNA editing efficiency", + ylabel="Relative frequency", + title="gRNA genome editing effiency distribution") + display(h2) + end +``` + +```julia; echo = false + using Plots + if haskey(WEAVE_ARGS.output,"output file") + println("Output written to:") + println(WEAVE_ARGS.output["output file"]) + elseif haskey(WEAVE_ARGS.output,"E_sim") + E_sim = WEAVE_ARGS.output["E_sim"] + sd_sim = WEAVE_ARGS.output["sd_sim"] + k = WEAVE_ARGS.args_info["# of gRNAs / combi gRNA/Cas construct"] + x = WEAVE_ARGS.args_info["# of target genes in the experiment"] + println("**How many plants need to be included in the plant library (on average) to obtain full coverage of all k-combinations of gene knockouts?**") + println("On average, ", Int(ceil(E_sim)), " plants need to be sampled at random to observe all ", k, "-combinations of ", x, " gene knockouts. Standard deviation = ", Int(ceil(sd_sim)), " plants") + elseif haskey(WEAVE_ARGS.output,"P_sim") + p = plot(WEAVE_ARGS.output["pls"], WEAVE_ARGS.output["P_sim"], label="Pₓ₂", + title="Probability of full combinatorial coverage with respect to plant library size", + xlabel="N", ylabel="Pₓₖ", + xticks = (0:500:50000, string.(0:500:50000)), + size=(900,400), color=:turquoise4, linewidth=2) + hline!([0.95], linestyle=:dash, color=:grey, label="Pₓₖ = 0.95", legend=:bottomright) + display(p) + println("At a given number of plants, what is the probability that all pairwise combinations of gene knockouts are observed?") + println("N_95_P: ", WEAVE_ARGS.output["N_95_P"]) + elseif haskey(WEAVE_ARGS.output,"E_cov") + p = plot(WEAVE_ARGS.output["pls"], WEAVE_ARGS.output["E_cov"], + label="E[γₓ₂]", title="Expected combinatorial coverage w.r.t. plant library size", + xlabel="N", ylabel="E[γₓₖ]", + xticks = (0:500:50000, string.(0:500:50000)), + size=(800,400), color=:turquoise4, linewidth=2) + hline!([0.95], linestyle=:dash, color=:grey, label="E[γₓₖ] = 0.95", legend=:bottomright) + display(p) + println("At a given number of plants, what is the expected coverage of pairwise gene knockout combinations?") + println("N_95: ", WEAVE_ARGS.output["N_95"]) + end +``` + diff -r 000000000000 -r cc0957c46408 test-data/example_data.xlsx Binary file test-data/example_data.xlsx has changed diff -r 000000000000 -r cc0957c46408 test-data/test_ccp_report.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_ccp_report.html Thu May 12 17:39:18 2022 +0000 @@ -0,0 +1,707 @@ + + + + + + MultiplexCrisprDOE + + + + + + + + + + + + + + + +
+
+
+
+

MultiplexCrisprDOE

+ + +
+ + + + + + +

Tool

+
    +
  • Method: BioCCP

    +
  • +
  • Description: BioCCP-based approaches for computing the minimal plant library size that guarantees full combinatorial coverage (and other relevant statistics)

    +
  • +
  • Mode: BioCCP_γx3

    +
  • +
  • Mode description: Computes the expected coverage of all triple combinations of gene knockouts (E[γx,3]) for an experiment with given plant library size using BioCCP

    +
  • +
+

Variables

+ + +

10 rows × 2 columns

ArgumentValue
AnyAny
1# of target genes in the experiment20
2# of gRNAs designed per target gene6
3# of gRNAs / combi gRNA/Cas construct3
4Total number of gRNAs120
5Relative frequencies for all gRNAsexample_data.xlsx
6Genome editing efficiencies for all gRNAsexample_data.xlsx
7Global knockout efficiency0.8
8Step size5
9Maximum Plant library size6000
10Plant library size0
+ + + + + +
+At a given number of plants, what is the expected coverage of pairwise gene
+ knockout combinations?
+N_95: 6000
+
+ + + + +
+ +
+
+
+ + + diff -r 000000000000 -r cc0957c46408 test-data/test_countKOs.xlsx Binary file test-data/test_countKOs.xlsx has changed diff -r 000000000000 -r cc0957c46408 test-data/test_gRNA_edit.xlsx Binary file test-data/test_gRNA_edit.xlsx has changed diff -r 000000000000 -r cc0957c46408 test-data/test_gRNA_reads.xlsx Binary file test-data/test_gRNA_reads.xlsx has changed diff -r 000000000000 -r cc0957c46408 test-data/test_ged_report.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_ged_report.html Thu May 12 17:39:18 2022 +0000 @@ -0,0 +1,704 @@ + + + + + + MultiplexCrisprDOE + + + + + + + + + + + + + + + +
+
+
+
+

MultiplexCrisprDOE

+ + +
+ + + + + + +

Tool

+
    +
  • Method: gRNA_ edit _distribution

    +
  • +
  • Description: Generates vector with genome editing efficiencies for all the gRNAs in the experiment

    +
  • +
  • Mode:

    +
  • +
  • Mode description:

    +
  • +
+

Variables

+ + +

6 rows × 2 columns

ArgumentValue
AnyAny
1Fraction of active gRNAs0.9
2Average genome editing efficiency of active gRNAs0.95
3Average genome editing efficiency of inactive gRNAs0.1
4Standard deviation0.01
5Total number of gRNAs120
6Plot genome editing efficiencyfalse
+ + + + +
+Output written to:
+gRNA_edit.xlsx
+
+ + + +
+ +
+
+
+ + + diff -r 000000000000 -r cc0957c46408 test-data/test_gfd_report.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_gfd_report.html Thu May 12 17:39:18 2022 +0000 @@ -0,0 +1,704 @@ + + + + + + MultiplexCrisprDOE + + + + + + + + + + + + + + + +
+
+
+
+

MultiplexCrisprDOE

+ + +
+ + + + + + +

Tool

+
    +
  • Method: gRNA_ frequency _distribution

    +
  • +
  • Description: Generates vector with frequencies in the combinatorial gRNA/Cas9 construct library for all gRNAs

    +
  • +
  • Mode:

    +
  • +
  • Mode description:

    +
  • +
+

Variables

+ + +

7 rows × 2 columns

ArgumentValue
AnyAny
1Plant library size75
2SD on the gRNA abundances25
3Minimal gRNA abundance50
4Maximal gRNA abundance100
5Total number of gRNAs120
6Convert gRNA abundances to relative frequenciesfalse
7Plot gRNA abundancesfalse
+ + + + +
+Output written to:
+gRNA_reads.xlsx
+
+ + + +
+ +
+
+
+ + + diff -r 000000000000 -r cc0957c46408 test-data/test_sim_report.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_sim_report.html Thu May 12 17:39:18 2022 +0000 @@ -0,0 +1,705 @@ + + + + + + MultiplexCrisprDOE + + + + + + + + + + + + + + + +
+
+
+
+

MultiplexCrisprDOE

+ + +
+ + + + + + +

Tool

+
    +
  • Method: simulation

    +
  • +
  • Description: simulation-based approaches for computing the minimal plant library size that guarantees full combinatorial coverage (and other relevant statistics)

    +
  • +
  • Mode: simulateNx2countKOs

    +
  • +
  • Mode description: Counts of the number of knockouts per plant in the experiment

    +
  • +
+

Variables

+ + +

8 rows × 2 columns

ArgumentValue
AnyAny
1# of target genes in the experiment20
2# of gRNAs designed per target gene6
3# of gRNAs / combi gRNA/Cas construct2
4Total number of gRNAs120
5Relative frequencies for all gRNAsexample_data.xlsx
6Genome editing efficiencies for all gRNAsexample_data.xlsx
7Global knockout efficiency0.8
8# of simulated experiments10
+ + + + + +
+Output written to:
+countKOs.xlsx
+
+ + + +
+ +
+
+
+ + +