Mercurial > repos > jfb > kinamine7_7
diff KinaMine-Galaxy-7-7/src/kinamine/Run.java @ 0:67635b462045 draft
Uploaded
author | jfb |
---|---|
date | Tue, 20 Feb 2018 14:31:15 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/KinaMine-Galaxy-7-7/src/kinamine/Run.java Tue Feb 20 14:31:15 2018 -0500 @@ -0,0 +1,327 @@ +/** + ***************************************************************************** + * <p> + * Copyright (c) Regents of the University of Minnesota. All Rights Reserved. + * <p> + * Author: Kevin Murray University of Minnesota - (murra668@umn.edu) + * <p> + ***************************************************************************** + */ +package kinamine; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * Main KinaMine object container. A run contains ArrayLists of peptide and + * proteins from the extracted file and list of amino acid chars. + * + * @version 1.0 + * @author murra668 + */ +public final class Run { + + /** List of peptide. */ + public final ArrayList<Peptide> pepList; + + /** Non-redundant database. */ + public final Map<String, Protein> database; + + /** Non-redundant collection of motifs. */ + public final Map<String, Motif> motifs; + + /** Amino Acids and Properties. */ + public static final AminoAcid ACIDS = new AminoAcid(); + + /** + * Constructs a run and processes the submitted peptide report and + * fasta database for motif generation. + * + * @param peps lines from peptide report + * @param prots lines for fasta database + * @param score FDR score + */ + public Run(ArrayList<String> peps, ArrayList<String> prots, double score) { + + this.pepList = new ArrayList<>(); + this.database = new HashMap(); + this.motifs = new HashMap(); + + /** Extract peptides and generate custom protein database. */ + extractPeptides(peps, score); + + /** Extracts the proteins from the fasta database. */ + extractDatabase(prots); + + /** Generate motifs for each peptide. */ + generateMotifs(); + + } + + /** + * Extracts individual peptides from the peptide report and creates new + * peptide objects, returns a list of peptides. + * + * @param lines Distinct peptide summary. + * @param score FDR score. + * @return ArrayList of peptides + */ + private void extractPeptides(ArrayList<String> lines, + double score) { + + /** Process each line of the peptide report */ + for (String line : lines) { + + /** Peptide report is tabular. */ + String[] pepInfo = line.split("\\t"); + + /** + * Check to see if the peptide possesses a Conf score greater than + * the FDR threshold, has a peptide id, and contains a + * phospho-tyrosine. + */ + if (Double.valueOf(pepInfo[7]) > score + & !"".equals(pepInfo[3]) + & pepInfo[9].contains("Phospho(Y)")) { + + List<String> temp = Arrays.asList(pepInfo[3].split(";")); + ArrayList<String> ids = new ArrayList<>(); + temp.stream().forEach((id) -> { + ids.add(id.trim()); + }); + + /** Add IDs to inclusion list, if not present. */ + for (String ref : ids) { + if (!ref.contains("RRRRR") + & !database.containsKey(ref.trim())) { + this.database.put(ref, null); + } + } + + /** Construct a new peptide object. */ + Peptide peptide = new Peptide(pepInfo, ids); + + /** Add the peptide to list if unique. */ + this.pepList.add(peptide); + } + } + } + + /** + * Extracts individual proteins from a FASTA database, creating new protein + * objects from each entry and adding them to a master list. + * + * @param fastaDatabase + * @return ArrayList of Proteins. + */ + private void extractDatabase(ArrayList<String> fastaDatabase) { + + Set<String> protList = this.database.keySet(); + + /** Loop through each entry. */ + for (String line : fastaDatabase) { + + /** FASTA database is tabular file. */ + String[] protInfo = line.split("\\t"); + + /** Search if protein is in inclusion list. */ + for (String name : protList) { + + if (protInfo[0].contains(name)) { + + /** Add protein to list. */ + this.database.replace(name, new Protein(protInfo)); + } + } + } + } + + /** + * Generates motifs around each phospho-tyrosine of each peptide in pepList. + * Motifs are -4 to 4 amino acids surrounding tyr. Not all entries from + * distinct peptide summary have IDs. Presently, those entries are excluded. + * For each peptide, find the corresponding protein, so the number of + * phospho-tyr can be recorded and the sequence can be utilized if the motif + * can not be generated from peptide sequence alone. + */ + private void generateMotifs() { + + for (Peptide peptide : pepList) { + + /** Check to see if peptide has reference accession. */ + if (hasProtID(peptide)) { + + /** For phospho-tyrosine in the peptide. */ + for (int index : peptide.tyrIndex) { + + /** Generate the motif using peptide sequence. */ + genSeq(peptide, index); + + } + } + } + } + + /** + * Find the proteins associated with the peptide ID in the database. Mark + * each protein's phospho-tyrosine field if visited. + * + * @param id + * @return protein sequence + */ + private void markMod(List<String> id) { + + /** Loop through each reference. */ + for (String ref : id) { + + /** If found, mark pY and capture sequence. */ + if (database.containsKey(ref)) { + database.get(ref).phosphoTyr++; + } + } + } + + /** + * Generate the peptide motif using the given index and sequence. Peptide + * motifs are the immediate -4 to +4 around a given index. + * <p> + * Some peptides may not have enough sequence to generate full motif. + * + * @param peptide + * @param index index of phospho-tyrosine in seq + * @param pSeq protein sequence + */ + private void genSeq(Peptide peptide, int index) { + + String motif = ""; + String seq = peptide.seq; + + /** Select surrounding amino acids. */ +// if (index - 4 >= 1 & index + 4 <= seq.length()) { +// motif = seq.substring(index - 5, index + 4); +// index = 5; +// } else if (index - 4 < 1 & index + 4 <= seq.length()) { +// motif = seq.substring(0, index + 4); +// } else if (index - 4 >= 1 & index + 4 > seq.length()) { +// motif = seq.substring(index - 5, seq.length()); +// index = 5; +// } else { +// motif = seq; +// } + if (index - 7 >= 1 & index + 7 <= seq.length()) { + motif = seq.substring(index - 8, index + 7); + index = 8; + } else if (index - 7 < 1 & index + 7 <= seq.length()) { + motif = seq.substring(0, index + 7); + } else if (index - 7 >= 1 & index + 7 > seq.length()) { + motif = seq.substring(index - 8, seq.length()); + index = 8; + } else { + motif = seq; + } + + addMotif(motif, peptide, index); + + } + + /** + * Determine if peptide has a protein ID. + * + * @param peptide + * @return + */ + public boolean hasProtID(Peptide peptide) { + return !peptide.id.isEmpty(); + } + + /** + * Adds seq to motif map. Also pair peptide refs and index of phospho- + * -tyrosine. + * + * @param seq + * @param ref + * @param index + */ + private void addMotif(String seq, Peptide peptide, int index) { + + /** Check if sequence is unique. */ + if (!motifs.containsKey(seq)) { + ArrayList<String> regenSeqs = regenSeq(peptide.id, seq, index); + motifs.put(seq, new Motif(seq, peptide.ref, index, regenSeqs)); + markMod(peptide.id); + } else { + List<String> refs = peptide.id; + List<String> ids = parseRef(motifs.get(seq).ref); + ArrayList<String> newID = new ArrayList<>(); + + for (String ref : refs) { + if (!ids.contains(ref)) { + newID.add(ref); + } + } + + if (!newID.isEmpty()) { + markMod(newID); + ids.addAll(newID); + String temp = ""; + for (String id : ids) { + temp += id + ";"; + } + ArrayList<String> regenSeqs = regenSeq(ids, seq, index); + motifs.put(seq, new Motif(seq, temp, index, regenSeqs)); + } + } + } + + /** Parse the reference string of peptide. + * + * @param ref + * @return + */ + public List<String> parseRef(String ref) { + List<String> temp = Arrays.asList(ref.split(";")); + ArrayList<String> ids = new ArrayList<>(); + temp.stream().forEach((id) -> { + ids.add(id.trim()); + }); + + return ids; + } + + private ArrayList<String> regenSeq(List<String> ids, String seq, int i) { + + ArrayList<String> seqs = new ArrayList<>(); + + for (String id : ids) { + if (database.containsKey(id)) { + String prot = database.get(id).seq; + int index = prot.indexOf(seq) + i; + + String motif = ""; + + if (index - 7 >= 1 & index + 7 <= prot.length()) { + motif = prot.substring(index - 8, index + 7); + } else if (index - 7 < 1 & index + 7 <= prot.length()) { + motif = prot.substring(0, index + 7); + } else if (index - 7 >= 1 & index + 7 > prot.length()) { + motif = prot.substring(index - 8, prot.length()); + } else { + motif = prot; + } + + if (!seqs.contains(motif)){ + seqs.add(motif); + } + + } + } + + return seqs; + + } + +}