view MultiplexCrisprDOE.jl @ 0:cc0957c46408 draft

"planemo upload for repository https://github.com/kirstvh/MultiplexCrisprDOE commit b6c1b1860eee82b06ed4a592d1f9eee6886be318-dirty"
author padge
date Thu, 12 May 2022 17:39:18 +0000
parents
children
line wrap: on
line source

"""
    gRNA_frequency_distribution(m, 
                                sd, 
                                l, 
                                u, 
                                n_gRNA_total; 
                                normalize = true, 
                                visualize = false)

Generates vector with frequencies in the combinatorial gRNA/Cas9 construct library for all gRNAs

***INPUT***
m: the average abundance of the gRNAs (in terms of absolute or relative frequency)
sd: the standard deviation on the gRNA abundances (in terms of absolute or relative frequency)
l: minimal gRNA abundance (in terms of absolute or relative frequency)
u: maximal gRNA abundance (in terms of absolute or relative frequency)
n_gRNA_total: the total number of gRNAs in the experiment
normalize: if set to "true", the gRNA abundances (absolute frequencies) are converted into relative frequencies
visualize: if set to "true", a histogram of all gRNA abundances is plotted

***OUTPUT***
p_gRNA_freq: vector with frequencies for all gRNAs in the construct library
"""
function gRNA_frequency_distribution(m, 
                                    sd, 
                                    l, 
                                    u, 
                                    n_gRNA_total; 
                                    normalize = true, 
                                    visualize = false)

    d_gRNA_freq = truncated(Normal(m, sd), l, u)  # gRNA frequency distribution
    p_gRNA_freq = collect(rand(d_gRNA_freq, n_gRNA_total))  # sample gRNA frequencies from distribution
    
    if normalize # convert into relative frequencies
        p_gRNA_freq /= sum(p_gRNA_freq)
    end

    if visualize
        return histogram(p_gRNA_freq, label="", 
            xlabel="Number of reads per gRNA", 
            linecolor="white", 
            normalize=:probability,
            xtickfontsize=10,ytickfontsize=10,
            color=:mediumturquoise, size=(600,350), bins = 25,
            ylabel="Relative frequency", 
            title="gRNA frequency distribution")
    else
        return p_gRNA_freq
    end
end

"""
    gRNA_edit_distribution(f_act, 
                            ϵ_edit_act, 
                            ϵ_edit_inact, 
                            sd_act, 
                            n_gRNA_total; 
                            visualize = false)   

Generates vector with genome editing efficiencies for all the gRNAs in the experiment. 

***INPUT***
f_act: fraction of all gRNAs that is active
ϵ_edit_act: Average genome editing efficiency for active gRNAs - mean of the genome editing efficiency distribution for active gRNAs
ϵ_edit_inact: Average genome editing efficiency for inactive gRNAs - mean of the genome editing efficiency distribution for inactive gRNAs
sd_act: standard deviation of the genome editing efficiency distributions for active and inactive gRNAs
n_gRNA_total: the total number of gRNAs in the experiment
visualize: if set to "true", a histogram of all genome editing efficiency is plotted

***OUTPUT***
p_gRNA_edit: vector with genome editing efficiencies for all gRNAs
"""
function gRNA_edit_distribution(f_act, ϵ_edit_act, ϵ_edit_inact, sd_act, n_gRNA_total; visualize=false)   
    d_act = Binomial(1, f_act) # there is a probability f_act that a gRNA is active
    d_high_act = truncated(Normal(ϵ_edit_act, sd_act), 0.01, 1)  # average genome editing efficiency for active gRNAs is equal to ϵ_edit_act
    d_low_act = truncated(Normal(ϵ_edit_inact, sd_act), 0.01, 1) # average genome editing efficiency for inactive gRNAs is equal to ϵ_edit_inact
    p_gRNA_edit = zeros(n_gRNA_total) # initialize vector with genome editing efficiencies for gRNAs
    
    for i in 1:n_gRNA_total
        if rand(d_act, 1) == [1]  # gRNA is active
            p_gRNA_edit[i] = rand(d_high_act, 1)[1]
        else  # gRNA is inactive
            p_gRNA_edit[i] = rand(d_low_act, 1)[1]
        end
    end

    if visualize
        return histogram(p_gRNA_edit, 
                normalize = :probability,
                linecolor = "white",
                label="", 
                color=:turquoise4,
                xtickfontsize=10,ytickfontsize=10, xlim = (0, 1),
                xticks=(0:0.1:1),
                bins = 150,
                xlabel="gRNA editing efficiency", 
                ylabel="Relative frequency", 
                title="gRNA genome editing effiency distribution")

    else
        return p_gRNA_edit
    end
end

"""
    simulate_Nₓ₁(x, 
                g, 
                r, 
                n_gRNA_total, 
                p_gRNA_freq, 
                p_gRNA_edit, 
                ϵ_KO; 
                iter = 500)

Computes the expected value and the standard deviation of the minimal plant library size for full coverage of all single gene knockouts (E[Nx,1] and σ[Nx,1]) using simulation

***INPUT***
x: number of target genes in the experiment
g: number of gRNAs designed per target gene
r: number of gRNA sequences per combinatorial gRNA/Cas construct
n_gRNA_total: total number of gRNAs in the experiment
p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!)
p_gRNA_edit: vector with genome editing efficiencies for all gRNAs 
ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout
iter: number of CRISPR/Cas experiments that are simulated to obtain E[Nₓ₁] and σ[Nₓ₁]

***OUTPUT***
E_Nₓ₁: expected value of the plant library size for full coverage of all single gene knockouts
sd_Nₓ₁: standard deviation on the plant library size for full coverage of all single gene knockouts
"""
function simulate_Nₓ₁(x, 
                    g, 
                    r, 
                    n_gRNA_total, 
                    p_gRNA_freq, 
                    p_gRNA_edit, 
                    ϵ_KO; 
                    iter = 500)

    @assert x * g == n_gRNA_total     
    Nₓ₁_vec = [] #stores number of plants to reach full coverage for each simulated experiment
        for i in 1:iter       
            genes_vec = [] # Initialize vector to store single gene knockouts that are observed in plants
            Nₓ₁ = 0
            while genes_vec != collect(1:x) # check if all possible single gene knockouts are present: if no full coverage, sample an additional plant           
                Nₓ₁ += 1 # count how many plants must be sampled to observe all single gene knockouts
                
                # sample combinatorial gRNA/Cas9 construct
                gRNA_indices_construct = findall((rand(Multinomial(r, p_gRNA_freq))) .!= 0)
                
                # execute mutations
                gRNA_indices_mutations = [gRNA for gRNA in gRNA_indices_construct if rand(Binomial(1, p_gRNA_edit[gRNA])) == 1]
            
                # effective gene knockout (loss-of-function) ?
                gRNA_indices_KO = [gRNA for gRNA in gRNA_indices_mutations if rand(Binomial(1, ϵ_KO)) == 1]
            
                # which genes are knocked out?
                genes_indices_KO = Int.(ceil.(gRNA_indices_KO / g)) 
                append!(genes_vec, genes_indices_KO)

                # update vector with observed gene knockouts
                genes_vec = Int.(sort(unique(genes_vec)))
            end
            push!(Nₓ₁_vec, Nₓ₁) # add plant library size for full coverage of current experiment to vector         
        end

    # Calculate expected value and standard deviation
    E_Nₓ₁ = mean(Nₓ₁_vec); 
    sd_Nₓ₁ = std(Nₓ₁_vec)

    return E_Nₓ₁, sd_Nₓ₁
end
   

"""
    BioCCP_Nₓ₁(x, 
                g, 
                r, 
                n_gRNA_total, 
                p_gRNA_freq, 
                p_gRNA_edit, 
                ϵ_KO)

Computes the expected value and the standard deviation of the minimal plant library size for 
full coverage of all single gene knockouts (E[Nx,1] and σ[Nx,1]) using BioCCP

***INPUT***
x: number of target genes in the experiment
g: number of gRNAs designed per target gene
r: number of gRNA sequences per combinatorial gRNA/Cas construct
n_gRNA_total: total number of gRNAs in the experiment
p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!)
p_gRNA_edit: vector with genome editing efficiencies for all gRNAs 
ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout

***OUTPUT***
E_Nₓ₁ : expected value of the plant library size for full coverage of all single gene knockouts
sd_Nₓ₁ : standard deviation on the plant library size for full coverage of all single gene knockouts
"""
function BioCCP_Nₓ₁(x, 
                    g, 
                    r, 
                    n_gRNA_total, 
                    p_gRNA_freq, 
                    p_gRNA_edit, 
                    ϵ_KO)
    
    # prepare input for BioCCP functions
    p_gRNAs = p_gRNA_freq .* p_gRNA_edit * ϵ_KO  # calculate probability for each gRNA to induce effective gene knockout
    p_genes = [sum(p_gRNAs[i:i+g-1]) for i in 1:g:n_gRNA_total]  # obtain probability of single gene knockout by summing up probability of all corresponding gRNAs to induce effective gene knockout
    
    # Apply BioCCP functions
    E_Nₓ₁ = expectation_minsamplesize(x; p=p_genes, r=r, normalize=false)
    sd_Nₓ₁ = std_minsamplesize(x; p=p_genes, r=r, normalize=false)

    return E_Nₓ₁, sd_Nₓ₁
end


"""
    simulate_Nₓ₂(x, 
                    g, 
                    r, 
                    n_gRNA_total, 
                    p_gRNA_freq, 
                    p_gRNA_edit, 
                    ϵ_KO; 
                    iter=500)

Computes the expected value and the standard deviation of the minimal plant library size for full coverage of all pairwise combinations of gene knockouts 
in a multiplex CRISPR/Cas experiment (E[Nx,2] and σ[Nx,2]) using simulation

***INPUT***
x: number of target genes in the experiment
g: number of gRNAs designed per target gene
r: number of gRNA sequences per combinatorial gRNA/Cas construct
n_gRNA_total: total number of gRNAs in the experiment
p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!)
p_gRNA_edit: vector with genome editing efficiencies for all gRNAs 
ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout
iter: number of CRISPR/Cas experiments that are simulated to obtain E[Nₓ₂] and σ[Nₓ₂]

***OUTPUT***
E_Nₓ₂ : expected value of the plant library size for full coverage of all pairwise combinations of gene knockouts
sd_Nₓ₂ : standard deviation on the plant library size for full coverage of all pairwise combinations of gene knockouts
"""
function simulate_Nₓ₂(x, 
                    g, 
                    r, 
                    n_gRNA_total, 
                    p_gRNA_freq, 
                    p_gRNA_edit, 
                    ϵ_KO; 
                    iter = 500)

    @assert x * g == n_gRNA_total    
    Nₓ₂_vec = [] #stores number of plants to reach full coverage of all pairwise combinations of gene knockouts for each simulated experiment
       
    for i in 1:iter     
        X_interactions_count = zeros(x, x) # Initialize matrix to count pairwise interactions
        Nₓ₂ = 0
        while X_interactions_count != ones(x, x) # check if all pairwise combinations are present
            Nₓ₂ += 1 # count how many plants must be sampled to fill pairwise interaction matrix
                
            # sample combinatorial gRNA/Cas9 construct
            gRNA_indices_construct = findall((rand(Multinomial(r, p_gRNA_freq))) .!= 0)
                
            # execute mutations
            gRNA_indices_mutations = [gRNA for gRNA in gRNA_indices_construct if rand(Binomial(1, p_gRNA_edit[gRNA])) == 1]
            
            # effective gene knockout (loss-of-function) ?
            gRNA_indices_KO = [gRNA for gRNA in gRNA_indices_mutations if rand(Binomial(1, ϵ_KO)) == 1]
            
            # which genes are knocked out?
            genes_indices_KO = Int.(ceil.(gRNA_indices_KO / g))
            
            # which pairwise combinations are present?
            interactions = collect(combinations(genes_indices_KO, 2))
                
            # Store represented combinations in matrix
            for interaction in interactions
                j = interaction[1]; k = interaction[2]
                X_interactions_count[j,k] = 1; X_interactions_count[k,j] = 1; X_interactions_count[j,j] = 1; X_interactions_count[k,k] = 1          
            end  
        end

        push!(Nₓ₂_vec, Nₓ₂)               
        end

    # Calculate expected value and standard deviation
    E_Nₓ₂ = mean(Nₓ₂_vec)
    sd_Nₓ₂ = std(Nₓ₂_vec)
        
    return E_Nₓ₂, sd_Nₓ₂
end

"""
    BioCCP_Nₓ₂(x, 
                g, 
                r, 
                n_gRNA_total, 
                p_gRNA_freq, 
                p_gRNA_edit, ϵ_KO)

Computes the expected value and the standard deviation of the minimal plant library size for full coverage of all pairwise combinations of gene knockouts in a multiplex CRISPR/Cas experiment (E[Nx,2] and σ[Nx,2]) using BioCCP

    ***INPUT***
x: number of target genes in the experiment
g: number of gRNAs designed per target gene
r: number of gRNA sequences per combinatorial gRNA/Cas construct
n_gRNA_total: total number of gRNAs in the experiment
p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!)
p_gRNA_edit: vector with genome editing efficiencies for all gRNAs 
ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout

***OUTPUT***
E_Nₓ₂: expected value of the plant library size for full coverage of all pairwise combinations of gene knockouts
sd_Nₓ₂: standard deviation on the plant library size for full coverage of all pairwise combinations of gene knockouts
"""
function BioCCP_Nₓ₂(x, 
                                         g, 
                                         r, 
                                         n_gRNA_total, 
                                         p_gRNA_freq, 
                                         p_gRNA_edit, ϵ_KO)
    
    # how many pairwise combinations of gRNAs
    ind_combinations_gRNA = collect(combinations(1:n_gRNA_total, 2))
    n_combinations_gRNA = length(ind_combinations_gRNA)
    
    # calculate probability and activity of gRNA combinations
    p_combinations_gRNA_library = zeros(n_combinations_gRNA)
    p_combinations_gRNA_act = zeros(n_combinations_gRNA)
    for i in 1:n_combinations_gRNA
        p_combinations_gRNA_library[i] = p_gRNA_freq[ind_combinations_gRNA[i][1]] * p_gRNA_freq[ind_combinations_gRNA[i][2]]
        p_combinations_gRNA_act[i] = p_gRNA_edit[ind_combinations_gRNA[i][1]] * p_gRNA_edit[ind_combinations_gRNA[i][2]]
    end
    
    # normalize probability gRNA combinations
    p_combinations_gRNA_library /= sum(p_combinations_gRNA_library)

    # select pairwise gRNA combinations of which each component codes for different gene (goal is to study combinations of knockouts in different genes)
    p_combinations_gRNA_library_interest = []
    p_combinations_gRNA_act_interest = []
    ind_combinations_gRNA_interest = []
    for i in 1:n_combinations_gRNA
        if ceil(ind_combinations_gRNA[i][1]/g) != ceil(ind_combinations_gRNA[i][2]/g)
            push!(p_combinations_gRNA_library_interest, p_combinations_gRNA_library[i])
            push!(p_combinations_gRNA_act_interest, p_combinations_gRNA_act[i])
            push!(ind_combinations_gRNA_interest, ind_combinations_gRNA[i])
        end
    end
        
    n_combinations_gRNA_interest = length(p_combinations_gRNA_library_interest)
    p_combinations_gRNA = p_combinations_gRNA_library_interest .* p_combinations_gRNA_act_interest * ϵ_KO^2

    # sum up probabilities or gRNA combinations for corresponding gene knockout combinations
    p_genes_matrix = zeros(x, x)
    for i in 1:n_combinations_gRNA_interest
        gene1 = Int(ceil(ind_combinations_gRNA_interest[i][1]/g))
        gene2 = Int(ceil(ind_combinations_gRNA_interest[i][2]/g))
        p_genes_matrix[gene1, gene2] += p_combinations_gRNA[i]
    end
    p_genes = collect([p_genes_matrix[i, j] for j in 2:size(p_genes_matrix, 1) for i in 1:j-1])  
    n_combinations_genes = length(p_genes)
    combinations_pp = length(collect(combinations(1:r, 2)))
    
    # Apply BioCCP functions
    E_Nₓ₂ = expectation_minsamplesize(n_combinations_genes; p=p_genes, r=combinations_pp, normalize=false)
    sd_Nₓ₂ = std_minsamplesize(n_combinations_genes; p=p_genes, r=combinations_pp, normalize=false)

    return E_Nₓ₂, sd_Nₓ₂
end

"""
    simulate_Nₓ₂_countKOs(x, 
                                         g, 
                                         r, 
                                         n_gRNA_total, 
                                         p_gRNA_freq, 
                                         p_gRNA_edit, ϵ_KO; iter=100000)

Counts the number of knockouts per plant in the experiment.

***INPUT***
x: number of target genes in the experiment
g: number of gRNAs designed per target gene
r: number of gRNA sequences per combinatorial gRNA/Cas construct
n_gRNA_total: total number of gRNAs in the experiment
p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!)
p_gRNA_edit: vector with genome editing efficiencies for all gRNAs 
ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout
iter: number of plants that are sampled 

***OUTPUT***
n_KOs_vec: vector with the number of knockouts for each plant 
"""
function simulate_Nₓ₂_countKOs(x, 
                                g, 
                                r, 
                                n_gRNA_total, 
                                p_gRNA_freq, 
                                p_gRNA_edit, 
                                ϵ_KO; 
                                iter = 100000)
     
    @assert x * g == n_gRNA_total 

    n_KOs_vec = []  

    for j in 1:iter                           
        # sample combinatorial gRNA/Cas9 construct
        gRNA_indices_construct = findall((rand(Multinomial(r, p_gRNA_freq))) .!= 0)
                
        # execute mutations
        gRNA_indices_mutations = [gRNA for gRNA in gRNA_indices_construct if rand(Binomial(1, p_gRNA_edit[gRNA])) == 1]
            
        # effective gene knockout (loss-of-function) ?
        gRNA_indices_KO = [gRNA for gRNA in gRNA_indices_mutations if rand(Binomial(1, ϵ_KO)) == 1]
            
        # which genes are knocked out?
        genes_indices_KO = Int.(ceil.(gRNA_indices_KO / g))
            
        push!(n_KOs_vec, length(unique((genes_indices_KO))))
    end  

    return n_KOs_vec
end

"""
    simulate_Nₓ₃(x, 
                    g, 
                    r, 
                    n_gRNA_total, 
                    p_gRNA_freq, 
                    p_gRNA_edit, 
                    ϵ_KO; 
                    iter=500)

Computes the expected value and the standard deviation of the minimal plant library size for full coverage of all triple combinations of gene knockouts in 
a multiplex CRISPR/Cas experiment (E[Nx,3] and σ[Nx,3]) using simulation

***INPUT***
x: number of target genes in the experiment
g: number of gRNAs designed per target gene
r: number of gRNA sequences per combinatorial gRNA/Cas construct
n_gRNA_total: total number of gRNAs in the experiment
p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!)
p_gRNA_edit: vector with genome editing efficiencies for all gRNAs 
ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout
iter: number of CRISPR/Cas experiments that are simulated to obtain E[Nₓ₃] and σ[Nₓ₃]

***OUTPUT***
E_Nₓ₃: expecteded value of the plant library size for full coverage of all triple combinations of gene knockouts
sd_Nₓ₃: standard deviation on the plant library size for full coverage of all triple combinations of gene knockouts
"""
function simulate_Nₓ₃(x, 
                    g, 
                    r, 
                    n_gRNA_total, 
                    p_gRNA_freq, 
                    p_gRNA_edit, 
                    ϵ_KO; 
                    iter = 500)

    @assert x * g == n_gRNA_total

    Nₓ₃_vec = [] # stores number of plants required for each experiment

    for i in 1:iter       
        # println("got till here")
        X_interactions_count = zeros(x, x, x) # Initialize matrix to count triple interactions
        # println(X_interactions_count)
        # println(ones(x, x, x))
        Nₓ₃ = 0

        while X_interactions_count != ones(x, x, x) # check if all triple combinations are present
            Nₓ₃ += 1 # count how many plants must be sampled to fill triple interaction matrix
                
            # sample combinatorial gRNA/Cas9 construct
            gRNA_indices_construct = findall((rand(Multinomial(r, p_gRNA_freq))) .!= 0)
                
            # execute mutations
            gRNA_indices_mutations = [gRNA for gRNA in gRNA_indices_construct if rand(Binomial(1, p_gRNA_edit[gRNA])) == 1]
            
            # effective gene knockout (loss-of-function) ?
            gRNA_indices_KO = [gRNA for gRNA in gRNA_indices_mutations if rand(Binomial(1, ϵ_KO)) == 1]
            
            # which genes are knocked out?
            genes_indices_KO = Int.(ceil.(gRNA_indices_KO / g))
            
            # which triple combinations are present?
            interactions = collect(combinations(genes_indices_KO, 3))
                
            # Store represented triple combinations in 3D-matrix
            for interaction in interactions
                j = interaction[1]
                k = interaction[2]
                l = interaction[3]
                X_interactions_count[j,k,l] = 1
                X_interactions_count[k,j,l] = 1
                X_interactions_count[l,j,k] = 1
                X_interactions_count[l,k,j] = 1
                X_interactions_count[j,l,k] = 1
                X_interactions_count[k,l,j] = 1
        
                X_interactions_count[:,l,l] .= 1
                X_interactions_count[:,k,k] .= 1
                X_interactions_count[:,j,j] .= 1
                X_interactions_count[l,:,l] .= 1
                X_interactions_count[k,:,k] .= 1
                X_interactions_count[j,:,j] .= 1
        
                X_interactions_count[j,j,:] .= 1
                X_interactions_count[k,k,:] .= 1
                X_interactions_count[l,l,:] .= 1
            end  
        end
        push!(Nₓ₃_vec, Nₓ₃)       
    end

    # calculate expected value and standard deviation
    E_Nₓ₃ = mean(Nₓ₃_vec); sd_Nₓ₃ = std(Nₓ₃_vec)

    return E_Nₓ₃, sd_Nₓ₃
end
   

"""
    BioCCP_Nₓ₃(x, 
            g, 
            r, 
            n_gRNA_total, 
            p_gRNA_freq, 
            p_gRNA_edit, 
            ϵ_KO)

Computes the expected value and the standard deviation of the minimal plant library size 
for full coverage of all triple combinations of gene knockouts in a multiplex CRISPR/Cas experiment (E[Nx,3] and σ[Nx,3]) using BioCCP

***INPUT***
x: number of target genes in the experiment
g: number of gRNAs designed per target gene
r: number of gRNA sequences per combinatorial gRNA/Cas construct
n_gRNA_total: total number of gRNAs in the experiment
p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!)
p_gRNA_edit: vector with genome editing efficiencies for all gRNAs 
ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout

***OUTPUT***
E_Nₓ₃: expecteded value of the plant library size for full coverage of all triple combinations of gene knockouts
sd_Nₓ₃: standard deviation on the plant library size for full coverage of all triple combinations of gene knockouts
"""
function BioCCP_Nₓ₃(x, 
                    g, 
                    r, 
                    n_gRNA_total, 
                    p_gRNA_freq, 
                    p_gRNA_edit, 
                    ϵ_KO)
    
    # how many triple combinations of gRNAs
    ind_combinations_gRNA = collect(combinations(1:n_gRNA_total, 3))
    n_combinations_gRNA = length(ind_combinations_gRNA)
    
    # calculate probability and activity of triple gRNA combinations
    p_combinations_gRNA_library = zeros(n_combinations_gRNA)
    p_combinations_gRNA_act = zeros(n_combinations_gRNA)
    for i in 1:n_combinations_gRNA
        p_combinations_gRNA_library[i] = p_gRNA_freq[ind_combinations_gRNA[i][1]] * p_gRNA_freq[ind_combinations_gRNA[i][2]] * p_gRNA_freq[ind_combinations_gRNA[i][3]]
        p_combinations_gRNA_act[i] = p_gRNA_edit[ind_combinations_gRNA[i][1]] * p_gRNA_edit[ind_combinations_gRNA[i][2]] * p_gRNA_edit[ind_combinations_gRNA[i][3]]
    end
    
    # normalize probability gRNA combinations
    p_combinations_gRNA_library /= sum(p_combinations_gRNA_library)

    # select triple gRNA combinations of which each component codes for different gene (goal is to study combinations of knockouts in different genes)
    p_combinations_gRNA_library_interest = []
    p_combinations_gRNA_act_interest = []
    ind_combinations_gRNA_interest = []
    for i in 1:n_combinations_gRNA
        if ceil(ind_combinations_gRNA[i][1]/g) != ceil(ind_combinations_gRNA[i][2]/g) && ceil(ind_combinations_gRNA[i][1]/g) != ceil(ind_combinations_gRNA[i][3]/g) && ceil(ind_combinations_gRNA[i][3]/g) != ceil(ind_combinations_gRNA[i][2]/g)
            push!(p_combinations_gRNA_library_interest, p_combinations_gRNA_library[i])
            push!(p_combinations_gRNA_act_interest, p_combinations_gRNA_act[i])
            push!(ind_combinations_gRNA_interest, ind_combinations_gRNA[i])
        end
    end
        
    n_combinations_gRNA_interest = length(p_combinations_gRNA_library_interest)
    p_combinations_gRNA = p_combinations_gRNA_library_interest .* p_combinations_gRNA_act_interest * ϵ_KO^3

    # sum up probabilities or gRNA combinations for corresponding gene knockout combinations
    p_genes_matrix = zeros(x, x, x)
    for i in 1:n_combinations_gRNA_interest
        gene1 = Int(ceil(ind_combinations_gRNA_interest[i][1]/g))
        gene2 = Int(ceil(ind_combinations_gRNA_interest[i][2]/g))
        gene3 = Int(ceil(ind_combinations_gRNA_interest[i][3]/g))
        p_genes_matrix[gene1, gene2, gene3] += p_combinations_gRNA[i]
    end
    
    combinations_genes = collect(combinations(1:x, 3))
    p_genes = []
        for combination in combinations_genes
            push!(p_genes, p_genes_matrix[combination[1], combination[2], combination[3]])
        end
        
    n_combinations_genes = length(p_genes)
    combinations_pp = length(collect(combinations(1:r, 3)))
    
    # apply BioCCP functions
    E_Nₓ₃ = expectation_minsamplesize(n_combinations_genes; p=p_genes, r=combinations_pp, normalize=false)
    sd_Nₓ₃ = std_minsamplesize(n_combinations_genes; p=p_genes, r=combinations_pp, normalize=false)

    return E_Nₓ₃, sd_Nₓ₃
end

"""
    BioCCP_Pₓ₁(x, 
                N,
                g, 
                r, 
                n_gRNA_total, 
                p_gRNA_freq, 
                p_gRNA_edit, 
                ϵ_KO)

Computes the probability of full coverage of all single gene knockouts (Px,1) for an experiment with given plant library size using BioCCP

***INPUT***
x: number of target genes in the experiment
N: plant library size
g: number of gRNAs designed per target gene
r: number of gRNA sequences per combinatorial gRNA/Cas construct
n_gRNA_total: total number of gRNAs in the experiment
p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!)
p_gRNA_edit: vector with genome editing efficiencies for all gRNAs 
ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout

***OUTPUT***
Pₓ₁: probability of full coverage of all single gene knockouts

"""
function BioCCP_Pₓ₁(x, 
                    N,
                    g, 
                    r, 
                    n_gRNA_total, 
                    p_gRNA_freq, 
                    p_gRNA_edit, 
                    ϵ_KO)
    
    # prepare input
    p_gRNAs = p_gRNA_freq .* p_gRNA_edit * ϵ_KO
    p_genes = [sum(p_gRNAs[i:i+g-1]) for i in 1:g:n_gRNA_total]
    
    # apply BioCCP function
    Pₓ₁ = success_probability(x, N; p=p_genes, r=r, normalize=false) 

    return Pₓ₁
end

"""
BioCCP_γₓ₁(x, 
            N,
            g, 
            r, 
            n_gRNA_total, 
            p_gRNA_freq, 
            p_gRNA_edit, 
            ϵ_KO)

Computes the expected coverage of all single gene knockouts (Px,1) for an experiment with given plant library size using BioCCP

***INPUT***
x: number of target genes in the experiment
N: plant library size
g: number of gRNAs designed per target gene
r: number of gRNA sequences per combinatorial gRNA/Cas construct
n_gRNA_total: total number of gRNAs in the experiment
p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!)
p_gRNA_edit: vector with genome editing efficiencies for all gRNAs 
ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout

***OUTPUT***
γₓ₁: expected coverage of all single gene knockouts
"""
function BioCCP_γₓ₁(x, 
                    N,
                    g, 
                    r, 
                    n_gRNA_total, 
                    p_gRNA_freq, 
                    p_gRNA_edit, 
                    ϵ_KO)
    
    p_gRNAs = p_gRNA_freq .* p_gRNA_edit * ϵ_KO
    p_genes = [sum(p_gRNAs[i:i+g-1]) for i in 1:g:n_gRNA_total]

    # Apply BioCCP function
    γₓ₁ = expectation_fraction_collected(x, N; p=p_genes, r=r, normalize=false) 

    return γₓ₁ 
end


"""
    BioCCP_Pₓ₂(x, 
                N,
                g, 
                r, 
                n_gRNA_total, 
                p_gRNA_freq, 
                p_gRNA_edit, 
                ϵ_KO)

Computes the probability of full coverage of all pairwise combinations of gene knockouts (Px,2) for an experiment with given plant library size using BioCCP

***INPUT***
x: number of target genes in the experiment
N: plant library size
g: number of gRNAs designed per target gene
r: number of gRNA sequences per combinatorial gRNA/Cas construct
n_gRNA_total: total number of gRNAs in the experiment
p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!)
p_gRNA_edit: vector with genome editing efficiencies for all gRNAs 
ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout
                
***OUTPUT***
Pₓ₂: probability of full coverage of all pairwise combinations of gene knockouts
"""
function BioCCP_Pₓ₂(x, 
                    N,
                    g, 
                    r, 
                    n_gRNA_total, 
                    p_gRNA_freq, 
                    p_gRNA_edit, 
                    ϵ_KO)
    
    # how many pairwise combinations of gRNAs
    ind_combinations_gRNA = collect(combinations(1:n_gRNA_total, 2))
    n_combinations_gRNA = length(ind_combinations_gRNA)
    
    # calculate probability and activity of gRNA combinations
    p_combinations_gRNA_library = zeros(n_combinations_gRNA)
    p_combinations_gRNA_act = zeros(n_combinations_gRNA)
    for i in 1:n_combinations_gRNA
        p_combinations_gRNA_library[i] = p_gRNA_freq[ind_combinations_gRNA[i][1]] * p_gRNA_freq[ind_combinations_gRNA[i][2]]
        p_combinations_gRNA_act[i] = p_gRNA_edit[ind_combinations_gRNA[i][1]] * p_gRNA_edit[ind_combinations_gRNA[i][2]]
    end
    
    # normalize probability gRNA combinations
    p_combinations_gRNA_library /= sum(p_combinations_gRNA_library)

    # select pairwise gRNA combinations of which each component codes for different gene (goal is to study combinations of knockouts in different genes)
    p_combinations_gRNA_library_interest = []
    p_combinations_gRNA_act_interest = []
    ind_combinations_gRNA_interest = []
    for i in 1:n_combinations_gRNA
        if ceil(ind_combinations_gRNA[i][1]/g) != ceil(ind_combinations_gRNA[i][2]/g)
            push!(p_combinations_gRNA_library_interest, p_combinations_gRNA_library[i])
            push!(p_combinations_gRNA_act_interest, p_combinations_gRNA_act[i])
            push!(ind_combinations_gRNA_interest, ind_combinations_gRNA[i])
        end
    end
        
    n_combinations_gRNA_interest = length(p_combinations_gRNA_library_interest)
    p_combinations_gRNA = p_combinations_gRNA_library_interest .* p_combinations_gRNA_act_interest * ϵ_KO^2

    # sum up probabilities or gRNA combinations for corresponding gene knockout combinations
    p_genes_matrix = zeros(x, x)
    for i in 1:n_combinations_gRNA_interest
        gene1 = Int(ceil(ind_combinations_gRNA_interest[i][1]/g))
        gene2 = Int(ceil(ind_combinations_gRNA_interest[i][2]/g))
        p_genes_matrix[gene1, gene2] += p_combinations_gRNA[i]
    end

    p_genes = collect([p_genes_matrix[i, j] for j in 2:size(p_genes_matrix, 1) for i in 1:j-1])  
    n_combinations_genes = length(p_genes)
    combinations_pp = length(collect(combinations(1:r, 2)))
    
    # Apply BioCCP function
    Pₓ₂ = success_probability(n_combinations_genes, N; p=p_genes, r=combinations_pp, normalize=false)

    return Pₓ₂ 
end

"""
    BioCCP_γₓ₂(x, 
                N,
                g, 
                r, 
                n_gRNA_total, 
                p_gRNA_freq, 
                p_gRNA_edit, 
                ϵ_KO)

Computes the expected coverage of all pairwise combinations of gene knockouts (γx,2) for an experiment with given plant library size using BioCCP

***INPUT***
x: number of target genes in the experiment
N: plant library size
g: number of gRNAs designed per target gene
r: number of gRNA sequences per combinatorial gRNA/Cas construct
n_gRNA_total: total number of gRNAs in the experiment
p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!)
p_gRNA_edit: vector with genome editing efficiencies for all gRNAs 
ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout
                
***OUTPUT***
γₓ₂: expected coverage of all pairwise combinations of gene knockouts
"""
function BioCCP_γₓ₂(x, 
                    N,
                    g, 
                    r, 
                    n_gRNA_total, 
                    p_gRNA_freq, 
                    p_gRNA_edit, 
                    ϵ_KO)
    
    # how many pairwise combinations of gRNAs
    ind_combinations_gRNA = collect(combinations(1:n_gRNA_total, 2))
    n_combinations_gRNA = length(ind_combinations_gRNA)
    
    # calculate probability and activity of gRNA combinations
    p_combinations_gRNA_library = zeros(n_combinations_gRNA)
    p_combinations_gRNA_act = zeros(n_combinations_gRNA)
    for i in 1:n_combinations_gRNA
        p_combinations_gRNA_library[i] = p_gRNA_freq[ind_combinations_gRNA[i][1]] * p_gRNA_freq[ind_combinations_gRNA[i][2]]
        p_combinations_gRNA_act[i] = p_gRNA_edit[ind_combinations_gRNA[i][1]] * p_gRNA_edit[ind_combinations_gRNA[i][2]]
    end
    
    # normalize probability gRNA combinations
    p_combinations_gRNA_library /= sum(p_combinations_gRNA_library)

    # select pairwise gRNA combinations of which each component codes for different gene (goal is to study combinations of knockouts in different genes)
    p_combinations_gRNA_library_interest = []
    p_combinations_gRNA_act_interest = []
    ind_combinations_gRNA_interest = []
    for i in 1:n_combinations_gRNA
        if ceil(ind_combinations_gRNA[i][1]/g) != ceil(ind_combinations_gRNA[i][2]/g)
            push!(p_combinations_gRNA_library_interest, p_combinations_gRNA_library[i])
            push!(p_combinations_gRNA_act_interest, p_combinations_gRNA_act[i])
            push!(ind_combinations_gRNA_interest, ind_combinations_gRNA[i])
        end
    end
        
    n_combinations_gRNA_interest = length(p_combinations_gRNA_library_interest)
    p_combinations_gRNA = p_combinations_gRNA_library_interest .* p_combinations_gRNA_act_interest * ϵ_KO^2

    # sum up probabilities or gRNA combinations for corresponding gene knockout combinations
    p_genes_matrix = zeros(x, x)
    for i in 1:n_combinations_gRNA_interest
        gene1 = Int(ceil(ind_combinations_gRNA_interest[i][1]/g))
        gene2 = Int(ceil(ind_combinations_gRNA_interest[i][2]/g))
        p_genes_matrix[gene1, gene2] += p_combinations_gRNA[i]
    end

    p_genes = collect([p_genes_matrix[i, j] for j in 2:size(p_genes_matrix, 1) for i in 1:j-1])  
    n_combinations_genes = length(p_genes)
    combinations_pp = length(collect(combinations(1:r, 2)))
    
    # Apply BioCCP function
    γₓ₂ = expectation_fraction_collected(n_combinations_genes, N; p=p_genes, r=combinations_pp, normalize=false)

    return γₓ₂ 
end


"""
    BioCCP_γₓ₃(x, 
                N,
                g, 
                r, 
                n_gRNA_total, 
                p_gRNA_freq, 
                p_gRNA_edit, 
                ϵ_KO)

Computes the expected coverage of all triple combinations of gene knockouts (γx,3) for an experiment with given plant library size using BioCCP

***INPUT***
x: number of target genes in the experiment
N: plant library size
g: number of gRNAs designed per target gene
r: number of gRNA sequences per combinatorial gRNA/Cas construct
n_gRNA_total: total number of gRNAs in the experiment
p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!)
p_gRNA_edit: vector with genome editing efficiencies for all gRNAs 
ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout
                
***OUTPUT***
γₓ₃: expected coverage of all triple combinations of gene knockouts
"""
function BioCCP_γₓ₃(x, 
                N,
                g, 
                r, 
                n_gRNA_total, 
                p_gRNA_freq, 
                p_gRNA_edit, 
                ϵ_KO)
    
    # how many triple combinations of gRNAs
    ind_combinations_gRNA = collect(combinations(1:n_gRNA_total, 3))
    n_combinations_gRNA = length(ind_combinations_gRNA)
    
    # calculate probability and activity of triple gRNA combinations
    p_combinations_gRNA_library = zeros(n_combinations_gRNA)
    p_combinations_gRNA_act = zeros(n_combinations_gRNA)
    for i in 1:n_combinations_gRNA
        p_combinations_gRNA_library[i] = p_gRNA_freq[ind_combinations_gRNA[i][1]] * p_gRNA_freq[ind_combinations_gRNA[i][2]] * p_gRNA_freq[ind_combinations_gRNA[i][3]]
        p_combinations_gRNA_act[i] = p_gRNA_edit[ind_combinations_gRNA[i][1]] * p_gRNA_edit[ind_combinations_gRNA[i][2]] * p_gRNA_edit[ind_combinations_gRNA[i][3]]
    end
    
    # normalize probability gRNA combinations
    p_combinations_gRNA_library /= sum(p_combinations_gRNA_library)

    # select triple gRNA combinations of which each component codes for different gene (goal is to study combinations of knockouts in different genes)
    p_combinations_gRNA_library_interest = []
    p_combinations_gRNA_act_interest = []
    ind_combinations_gRNA_interest = []
    for i in 1:n_combinations_gRNA
        if ceil(ind_combinations_gRNA[i][1]/g) != ceil(ind_combinations_gRNA[i][2]/g) && ceil(ind_combinations_gRNA[i][1]/g) != ceil(ind_combinations_gRNA[i][3]/g) && ceil(ind_combinations_gRNA[i][3]/g) != ceil(ind_combinations_gRNA[i][2]/g)
            push!(p_combinations_gRNA_library_interest, p_combinations_gRNA_library[i])
            push!(p_combinations_gRNA_act_interest, p_combinations_gRNA_act[i])
            push!(ind_combinations_gRNA_interest, ind_combinations_gRNA[i])
        end
    end
        
    n_combinations_gRNA_interest = length(p_combinations_gRNA_library_interest)
    p_combinations_gRNA = p_combinations_gRNA_library_interest .* p_combinations_gRNA_act_interest * ϵ_KO^3

    # sum up probabilities or gRNA combinations for corresponding gene knockout combinations
    p_genes_matrix = zeros(x, x, x)
    for i in 1:n_combinations_gRNA_interest
        gene1 = Int(ceil(ind_combinations_gRNA_interest[i][1]/g))
        gene2 = Int(ceil(ind_combinations_gRNA_interest[i][2]/g))
        gene3 = Int(ceil(ind_combinations_gRNA_interest[i][3]/g))
        p_genes_matrix[gene1, gene2, gene3] += p_combinations_gRNA[i]
    end
    
    combinations_genes = collect(combinations(1:x, 3))
    p_genes = []
        for combination in combinations_genes
            push!(p_genes, p_genes_matrix[combination[1], combination[2], combination[3]])
        end
        
    n_combinations_genes = length(p_genes)
    combinations_pp = length(collect(combinations(1:r, 3)))
    
    # Apply BioCCP function
    γₓ₃ = expectation_fraction_collected(n_combinations_genes, N; p=p_genes, r=combinations_pp, normalize=false)

    return γₓ₃ 
end

"""
    BioCCP_Pₓ₃(x, 
                N,
                g, 
                r, 
                n_gRNA_total, 
                p_gRNA_freq, 
                p_gRNA_edit, 
                ϵ_KO)

Computes the probability of full coverage of all triple combinations of gene knockouts (Px,3) for an experiment with given plant library size using BioCCP

***INPUT***
x: number of target genes in the experiment
N: plant library size
g: number of gRNAs designed per target gene
r: number of gRNA sequences per combinatorial gRNA/Cas construct
n_gRNA_total: total number of gRNAs in the experiment
p_gRNA_freq: vector with relative frequencies for all gRNAs in the construct library (normalized!)
p_gRNA_edit: vector with genome editing efficiencies for all gRNAs 
ϵ_KO: global knockout efficiency; fraction of mutations leading to effective gene knockout
                
***OUTPUT***
Pₓ₃: probability of full coverage of all triple combinations of gene knockouts
"""
function BioCCP_Pₓ₃(x, 
                N,
                g, 
                r, 
                n_gRNA_total, 
                p_gRNA_freq, 
                p_gRNA_edit, 
                ϵ_KO)
    
    # how many triple combinations of gRNAs
    ind_combinations_gRNA = collect(combinations(1:n_gRNA_total, 3))
    n_combinations_gRNA = length(ind_combinations_gRNA)
    
    # calculate probability and activity of triple gRNA combinations
    p_combinations_gRNA_library = zeros(n_combinations_gRNA)
    p_combinations_gRNA_act = zeros(n_combinations_gRNA)
    for i in 1:n_combinations_gRNA
        p_combinations_gRNA_library[i] = p_gRNA_freq[ind_combinations_gRNA[i][1]] * p_gRNA_freq[ind_combinations_gRNA[i][2]] * p_gRNA_freq[ind_combinations_gRNA[i][3]]
        p_combinations_gRNA_act[i] = p_gRNA_edit[ind_combinations_gRNA[i][1]] * p_gRNA_edit[ind_combinations_gRNA[i][2]] * p_gRNA_edit[ind_combinations_gRNA[i][3]]
    end
    
    # normalize probability gRNA combinations
    p_combinations_gRNA_library /= sum(p_combinations_gRNA_library)

    # select triple gRNA combinations of which each component codes for different gene (goal is to study combinations of knockouts in different genes)
    p_combinations_gRNA_library_interest = []
    p_combinations_gRNA_act_interest = []
    ind_combinations_gRNA_interest = []
    for i in 1:n_combinations_gRNA
        if ceil(ind_combinations_gRNA[i][1]/g) != ceil(ind_combinations_gRNA[i][2]/g) && ceil(ind_combinations_gRNA[i][1]/g) != ceil(ind_combinations_gRNA[i][3]/g) && ceil(ind_combinations_gRNA[i][3]/g) != ceil(ind_combinations_gRNA[i][2]/g)
            push!(p_combinations_gRNA_library_interest, p_combinations_gRNA_library[i])
            push!(p_combinations_gRNA_act_interest, p_combinations_gRNA_act[i])
            push!(ind_combinations_gRNA_interest, ind_combinations_gRNA[i])
        end
    end
        
    n_combinations_gRNA_interest = length(p_combinations_gRNA_library_interest)
    p_combinations_gRNA = p_combinations_gRNA_library_interest .* p_combinations_gRNA_act_interest * ϵ_KO^3

    # sum up probabilities or gRNA combinations for corresponding gene knockout combinations
    p_genes_matrix = zeros(x, x, x)
    for i in 1:n_combinations_gRNA_interest
        gene1 = Int(ceil(ind_combinations_gRNA_interest[i][1]/g))
        gene2 = Int(ceil(ind_combinations_gRNA_interest[i][2]/g))
        gene3 = Int(ceil(ind_combinations_gRNA_interest[i][3]/g))
        p_genes_matrix[gene1, gene2, gene3] += p_combinations_gRNA[i]
    end
    
    combinations_genes = collect(combinations(1:x, 3))
    p_genes = []
        for combination in combinations_genes
            push!(p_genes, p_genes_matrix[combination[1], combination[2], combination[3]])
        end
        
    n_combinations_genes = length(p_genes)
    combinations_pp = length(collect(combinations(1:r, 3)))
    
    # Apply BioCCP function
    Pₓ₃ = success_probability(n_combinations_genes, N; p=p_genes, r=combinations_pp, normalize=false)

    return Pₓ₃ 
end