view KinaMine-Galaxy-7-7/src/kinamine/Run.java @ 0:67635b462045 draft

Uploaded
author jfb
date Tue, 20 Feb 2018 14:31:15 -0500
parents
children
line wrap: on
line source

/**
 *****************************************************************************
 * <p>
 * Copyright (c) Regents of the University of Minnesota. All Rights Reserved.
 * <p>
 * Author: Kevin Murray University of Minnesota - (murra668@umn.edu)
 * <p>
 *****************************************************************************
 */
package kinamine;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

/**
 * Main KinaMine object container. A run contains ArrayLists of peptide and
 * proteins from the extracted file and list of amino acid chars.
 *
 * @version 1.0
 * @author murra668
 */
public final class Run {

    /** List of peptide. */
    public final ArrayList<Peptide> pepList;

    /** Non-redundant database. */
    public final Map<String, Protein> database;

    /** Non-redundant collection of motifs. */
    public final Map<String, Motif> motifs;

    /** Amino Acids and Properties. */
    public static final AminoAcid ACIDS = new AminoAcid();

    /**
     * Constructs a run and processes the submitted peptide report and 
     * fasta database for motif generation.
     *
     * @param peps lines from peptide report
     * @param prots lines for fasta database
     * @param score FDR score
     */
    public Run(ArrayList<String> peps, ArrayList<String> prots, double score) {

        this.pepList = new ArrayList<>();
        this.database = new HashMap();
        this.motifs = new HashMap();

        /** Extract peptides and generate custom protein database. */
        extractPeptides(peps, score);

        /** Extracts the proteins from the fasta database. */
        extractDatabase(prots);

        /** Generate motifs for each peptide. */
        generateMotifs();

    }

    /**
     * Extracts individual peptides from the peptide report and creates new
     * peptide objects, returns a list of peptides.
     *
     * @param lines Distinct peptide summary.
     * @param score FDR score.
     * @return ArrayList of peptides
     */
    private void extractPeptides(ArrayList<String> lines,
            double score) {

        /** Process each line of the peptide report */
        for (String line : lines) {

            /** Peptide report is tabular. */
            String[] pepInfo = line.split("\\t");

            /**
             * Check to see if the peptide possesses a Conf score greater than
             * the FDR threshold, has a peptide id, and contains a
             * phospho-tyrosine.
             */
            if (Double.valueOf(pepInfo[7]) > score
                    & !"".equals(pepInfo[3])
                    & pepInfo[9].contains("Phospho(Y)")) {

                List<String> temp = Arrays.asList(pepInfo[3].split(";"));
                ArrayList<String> ids = new ArrayList<>();
                temp.stream().forEach((id) -> {
                    ids.add(id.trim());
                });

                /** Add IDs to inclusion list, if not present. */
                for (String ref : ids) {
                    if (!ref.contains("RRRRR")
                            & !database.containsKey(ref.trim())) {
                        this.database.put(ref, null);
                    }
                }

                /** Construct a new peptide object. */
                Peptide peptide = new Peptide(pepInfo, ids);

                /** Add the peptide to list if unique. */
                this.pepList.add(peptide);
            }
        }
    }

    /**
     * Extracts individual proteins from a FASTA database, creating new protein
     * objects from each entry and adding them to a master list.
     *
     * @param fastaDatabase
     * @return ArrayList of Proteins.
     */
    private void extractDatabase(ArrayList<String> fastaDatabase) {

        Set<String> protList = this.database.keySet();

        /** Loop through each entry. */
        for (String line : fastaDatabase) {

            /** FASTA database is tabular file. */
            String[] protInfo = line.split("\\t");

            /** Search if protein is in inclusion list. */
            for (String name : protList) {

                if (protInfo[0].contains(name)) {

                    /** Add protein to list. */
                    this.database.replace(name, new Protein(protInfo));
                }
            }
        }
    }

    /**
     * Generates motifs around each phospho-tyrosine of each peptide in pepList.
     * Motifs are -4 to 4 amino acids surrounding tyr. Not all entries from
     * distinct peptide summary have IDs. Presently, those entries are excluded.
     * For each peptide, find the corresponding protein, so the number of
     * phospho-tyr can be recorded and the sequence can be utilized if the motif
     * can not be generated from peptide sequence alone.
     */
    private void generateMotifs() {

        for (Peptide peptide : pepList) {

            /** Check to see if peptide has reference accession. */
            if (hasProtID(peptide)) {

                /** For phospho-tyrosine in the peptide. */
                for (int index : peptide.tyrIndex) {

                    /** Generate the motif using peptide sequence. */
                    genSeq(peptide, index);

                }
            }
        }
    }

    /**
     * Find the proteins associated with the peptide ID in the database. Mark
     * each protein's phospho-tyrosine field if visited.
     *
     * @param id
     * @return protein sequence
     */
    private void markMod(List<String> id) {

        /** Loop through each reference. */
        for (String ref : id) {

            /** If found, mark pY and capture sequence. */
            if (database.containsKey(ref)) {
                database.get(ref).phosphoTyr++;
            }
        }
    }

    /**
     * Generate the peptide motif using the given index and sequence. Peptide
     * motifs are the immediate -4 to +4 around a given index.
     * <p>
     * Some peptides may not have enough sequence to generate full motif.
     *
     * @param peptide
     * @param index index of phospho-tyrosine in seq
     * @param pSeq protein sequence
     */
    private void genSeq(Peptide peptide, int index) {

        String motif = "";
        String seq = peptide.seq;

        /** Select surrounding amino acids. */
//        if (index - 4 >= 1 & index + 4 <= seq.length()) {
//            motif = seq.substring(index - 5, index + 4);
//            index = 5;
//        } else if (index - 4 < 1 & index + 4 <= seq.length()) {
//            motif = seq.substring(0, index + 4);
//        } else if (index - 4 >= 1 & index + 4 > seq.length()) {
//            motif = seq.substring(index - 5, seq.length());
//            index = 5;
//        } else {
//            motif = seq;
//        }
        if (index - 7 >= 1 & index + 7 <= seq.length()) {
            motif = seq.substring(index - 8, index + 7);
            index = 8;
        } else if (index - 7 < 1 & index + 7 <= seq.length()) {
            motif = seq.substring(0, index + 7);
        } else if (index - 7 >= 1 & index + 7 > seq.length()) {
            motif = seq.substring(index - 8, seq.length());
            index = 8;
        } else {
            motif = seq;
        }

        addMotif(motif, peptide, index);

    }

    /**
     * Determine if peptide has a protein ID.
     *
     * @param peptide
     * @return
     */
    public boolean hasProtID(Peptide peptide) {
        return !peptide.id.isEmpty();
    }

    /**
     * Adds seq to motif map. Also pair peptide refs and index of phospho-
     * -tyrosine.
     *
     * @param seq
     * @param ref
     * @param index
     */
    private void addMotif(String seq, Peptide peptide, int index) {

        /** Check if sequence is unique. */
        if (!motifs.containsKey(seq)) {
            ArrayList<String> regenSeqs = regenSeq(peptide.id, seq, index);
            motifs.put(seq, new Motif(seq, peptide.ref, index, regenSeqs));
            markMod(peptide.id);
        } else {
            List<String> refs = peptide.id;
            List<String> ids = parseRef(motifs.get(seq).ref);
            ArrayList<String> newID = new ArrayList<>();

            for (String ref : refs) {
                if (!ids.contains(ref)) {
                    newID.add(ref);
                }
            }

            if (!newID.isEmpty()) {
                markMod(newID);
                ids.addAll(newID);
                String temp = "";
                for (String id : ids) {
                    temp += id + ";";
                }
                ArrayList<String> regenSeqs = regenSeq(ids, seq, index);
                motifs.put(seq, new Motif(seq, temp, index, regenSeqs));
            }
        }
    }

    /** Parse the reference string of peptide.
     *
     * @param ref
     * @return
     */
    public List<String> parseRef(String ref) {
        List<String> temp = Arrays.asList(ref.split(";"));
        ArrayList<String> ids = new ArrayList<>();
        temp.stream().forEach((id) -> {
            ids.add(id.trim());
        });

        return ids;
    }

    private ArrayList<String> regenSeq(List<String> ids, String seq, int i) {

        ArrayList<String> seqs = new ArrayList<>();

        for (String id : ids) {
            if (database.containsKey(id)) {
                String prot = database.get(id).seq;
                int index = prot.indexOf(seq) + i;
                
                String motif = "";
                
                if (index - 7 >= 1 & index + 7 <= prot.length()) {
                    motif = prot.substring(index - 8, index + 7);
                } else if (index - 7 < 1 & index + 7 <= prot.length()) {
                    motif = prot.substring(0, index + 7);
                } else if (index - 7 >= 1 & index + 7 > prot.length()) {
                    motif = prot.substring(index - 8, prot.length());
                } else {
                    motif = prot;
                }
                
                if (!seqs.contains(motif)){
                    seqs.add(motif);
                }

            }
        }
        
        return seqs;

    }

}