0
|
1 /**
|
|
2 *****************************************************************************
|
|
3 * <p>
|
|
4 * Copyright (c) Regents of the University of Minnesota. All Rights Reserved.
|
|
5 * <p>
|
|
6 * Author: Kevin Murray University of Minnesota - (murra668@umn.edu)
|
|
7 * <p>
|
|
8 *****************************************************************************
|
|
9 */
|
|
10 package kinamine;
|
|
11
|
|
12 import java.util.ArrayList;
|
|
13 import java.util.Arrays;
|
|
14 import java.util.HashMap;
|
|
15 import java.util.List;
|
|
16 import java.util.Map;
|
|
17 import java.util.Set;
|
|
18
|
|
19 /**
|
|
20 * Main KinaMine object container. A run contains ArrayLists of peptide and
|
|
21 * proteins from the extracted file and list of amino acid chars.
|
|
22 *
|
|
23 * @version 1.0
|
|
24 * @author murra668
|
|
25 */
|
|
26 public final class Run {
|
|
27
|
|
28 /** List of peptide. */
|
|
29 public final ArrayList<Peptide> pepList;
|
|
30
|
|
31 /** Non-redundant database. */
|
|
32 public final Map<String, Protein> database;
|
|
33
|
|
34 /** Non-redundant collection of motifs. */
|
|
35 public final Map<String, Motif> motifs;
|
|
36
|
|
37 /** Amino Acids and Properties. */
|
|
38 public static final AminoAcid ACIDS = new AminoAcid();
|
|
39
|
|
40 /**
|
|
41 * Constructs a run and processes the submitted peptide report and
|
|
42 * fasta database for motif generation.
|
|
43 *
|
|
44 * @param peps lines from peptide report
|
|
45 * @param prots lines for fasta database
|
|
46 * @param score FDR score
|
|
47 */
|
|
48 public Run(ArrayList<String> peps, ArrayList<String> prots, double score) {
|
|
49
|
|
50 this.pepList = new ArrayList<>();
|
|
51 this.database = new HashMap();
|
|
52 this.motifs = new HashMap();
|
|
53
|
|
54 /** Extract peptides and generate custom protein database. */
|
|
55 extractPeptides(peps, score);
|
|
56
|
|
57 /** Extracts the proteins from the fasta database. */
|
|
58 extractDatabase(prots);
|
|
59
|
|
60 /** Generate motifs for each peptide. */
|
|
61 generateMotifs();
|
|
62
|
|
63 }
|
|
64
|
|
65 /**
|
|
66 * Extracts individual peptides from the peptide report and creates new
|
|
67 * peptide objects, returns a list of peptides.
|
|
68 *
|
|
69 * @param lines Distinct peptide summary.
|
|
70 * @param score FDR score.
|
|
71 * @return ArrayList of peptides
|
|
72 */
|
|
73 private void extractPeptides(ArrayList<String> lines,
|
|
74 double score) {
|
|
75
|
|
76 /** Process each line of the peptide report */
|
|
77 for (String line : lines) {
|
|
78
|
|
79 /** Peptide report is tabular. */
|
|
80 String[] pepInfo = line.split("\\t");
|
|
81
|
|
82 /**
|
|
83 * Check to see if the peptide possesses a Conf score greater than
|
|
84 * the FDR threshold, has a peptide id, and contains a
|
|
85 * phospho-tyrosine.
|
|
86 */
|
|
87 if (Double.valueOf(pepInfo[7]) > score
|
|
88 & !"".equals(pepInfo[3])
|
|
89 & pepInfo[9].contains("Phospho(Y)")) {
|
|
90
|
|
91 List<String> temp = Arrays.asList(pepInfo[3].split(";"));
|
|
92 ArrayList<String> ids = new ArrayList<>();
|
|
93 temp.stream().forEach((id) -> {
|
|
94 ids.add(id.trim());
|
|
95 });
|
|
96
|
|
97 /** Add IDs to inclusion list, if not present. */
|
|
98 for (String ref : ids) {
|
|
99 if (!ref.contains("RRRRR")
|
|
100 & !database.containsKey(ref.trim())) {
|
|
101 this.database.put(ref, null);
|
|
102 }
|
|
103 }
|
|
104
|
|
105 /** Construct a new peptide object. */
|
|
106 Peptide peptide = new Peptide(pepInfo, ids);
|
|
107
|
|
108 /** Add the peptide to list if unique. */
|
|
109 this.pepList.add(peptide);
|
|
110 }
|
|
111 }
|
|
112 }
|
|
113
|
|
114 /**
|
|
115 * Extracts individual proteins from a FASTA database, creating new protein
|
|
116 * objects from each entry and adding them to a master list.
|
|
117 *
|
|
118 * @param fastaDatabase
|
|
119 * @return ArrayList of Proteins.
|
|
120 */
|
|
121 private void extractDatabase(ArrayList<String> fastaDatabase) {
|
|
122
|
|
123 Set<String> protList = this.database.keySet();
|
|
124
|
|
125 /** Loop through each entry. */
|
|
126 for (String line : fastaDatabase) {
|
|
127
|
|
128 /** FASTA database is tabular file. */
|
|
129 String[] protInfo = line.split("\\t");
|
|
130
|
|
131 /** Search if protein is in inclusion list. */
|
|
132 for (String name : protList) {
|
|
133
|
|
134 if (protInfo[0].contains(name)) {
|
|
135
|
|
136 /** Add protein to list. */
|
|
137 this.database.replace(name, new Protein(protInfo));
|
|
138 }
|
|
139 }
|
|
140 }
|
|
141 }
|
|
142
|
|
143 /**
|
|
144 * Generates motifs around each phospho-tyrosine of each peptide in pepList.
|
|
145 * Motifs are -4 to 4 amino acids surrounding tyr. Not all entries from
|
|
146 * distinct peptide summary have IDs. Presently, those entries are excluded.
|
|
147 * For each peptide, find the corresponding protein, so the number of
|
|
148 * phospho-tyr can be recorded and the sequence can be utilized if the motif
|
|
149 * can not be generated from peptide sequence alone.
|
|
150 */
|
|
151 private void generateMotifs() {
|
|
152
|
|
153 for (Peptide peptide : pepList) {
|
|
154
|
|
155 /** Check to see if peptide has reference accession. */
|
|
156 if (hasProtID(peptide)) {
|
|
157
|
|
158 /** For phospho-tyrosine in the peptide. */
|
|
159 for (int index : peptide.tyrIndex) {
|
|
160
|
|
161 /** Generate the motif using peptide sequence. */
|
|
162 genSeq(peptide, index);
|
|
163
|
|
164 }
|
|
165 }
|
|
166 }
|
|
167 }
|
|
168
|
|
169 /**
|
|
170 * Find the proteins associated with the peptide ID in the database. Mark
|
|
171 * each protein's phospho-tyrosine field if visited.
|
|
172 *
|
|
173 * @param id
|
|
174 * @return protein sequence
|
|
175 */
|
|
176 private void markMod(List<String> id) {
|
|
177
|
|
178 /** Loop through each reference. */
|
|
179 for (String ref : id) {
|
|
180
|
|
181 /** If found, mark pY and capture sequence. */
|
|
182 if (database.containsKey(ref)) {
|
|
183 database.get(ref).phosphoTyr++;
|
|
184 }
|
|
185 }
|
|
186 }
|
|
187
|
|
188 /**
|
|
189 * Generate the peptide motif using the given index and sequence. Peptide
|
|
190 * motifs are the immediate -4 to +4 around a given index.
|
|
191 * <p>
|
|
192 * Some peptides may not have enough sequence to generate full motif.
|
|
193 *
|
|
194 * @param peptide
|
|
195 * @param index index of phospho-tyrosine in seq
|
|
196 * @param pSeq protein sequence
|
|
197 */
|
|
198 private void genSeq(Peptide peptide, int index) {
|
|
199
|
|
200 String motif = "";
|
|
201 String seq = peptide.seq;
|
|
202
|
|
203 /** Select surrounding amino acids. */
|
|
204 // if (index - 4 >= 1 & index + 4 <= seq.length()) {
|
|
205 // motif = seq.substring(index - 5, index + 4);
|
|
206 // index = 5;
|
|
207 // } else if (index - 4 < 1 & index + 4 <= seq.length()) {
|
|
208 // motif = seq.substring(0, index + 4);
|
|
209 // } else if (index - 4 >= 1 & index + 4 > seq.length()) {
|
|
210 // motif = seq.substring(index - 5, seq.length());
|
|
211 // index = 5;
|
|
212 // } else {
|
|
213 // motif = seq;
|
|
214 // }
|
|
215 if (index - 7 >= 1 & index + 7 <= seq.length()) {
|
|
216 motif = seq.substring(index - 8, index + 7);
|
|
217 index = 8;
|
|
218 } else if (index - 7 < 1 & index + 7 <= seq.length()) {
|
|
219 motif = seq.substring(0, index + 7);
|
|
220 } else if (index - 7 >= 1 & index + 7 > seq.length()) {
|
|
221 motif = seq.substring(index - 8, seq.length());
|
|
222 index = 8;
|
|
223 } else {
|
|
224 motif = seq;
|
|
225 }
|
|
226
|
|
227 addMotif(motif, peptide, index);
|
|
228
|
|
229 }
|
|
230
|
|
231 /**
|
|
232 * Determine if peptide has a protein ID.
|
|
233 *
|
|
234 * @param peptide
|
|
235 * @return
|
|
236 */
|
|
237 public boolean hasProtID(Peptide peptide) {
|
|
238 return !peptide.id.isEmpty();
|
|
239 }
|
|
240
|
|
241 /**
|
|
242 * Adds seq to motif map. Also pair peptide refs and index of phospho-
|
|
243 * -tyrosine.
|
|
244 *
|
|
245 * @param seq
|
|
246 * @param ref
|
|
247 * @param index
|
|
248 */
|
|
249 private void addMotif(String seq, Peptide peptide, int index) {
|
|
250
|
|
251 /** Check if sequence is unique. */
|
|
252 if (!motifs.containsKey(seq)) {
|
|
253 ArrayList<String> regenSeqs = regenSeq(peptide.id, seq, index);
|
|
254 motifs.put(seq, new Motif(seq, peptide.ref, index, regenSeqs));
|
|
255 markMod(peptide.id);
|
|
256 } else {
|
|
257 List<String> refs = peptide.id;
|
|
258 List<String> ids = parseRef(motifs.get(seq).ref);
|
|
259 ArrayList<String> newID = new ArrayList<>();
|
|
260
|
|
261 for (String ref : refs) {
|
|
262 if (!ids.contains(ref)) {
|
|
263 newID.add(ref);
|
|
264 }
|
|
265 }
|
|
266
|
|
267 if (!newID.isEmpty()) {
|
|
268 markMod(newID);
|
|
269 ids.addAll(newID);
|
|
270 String temp = "";
|
|
271 for (String id : ids) {
|
|
272 temp += id + ";";
|
|
273 }
|
|
274 ArrayList<String> regenSeqs = regenSeq(ids, seq, index);
|
|
275 motifs.put(seq, new Motif(seq, temp, index, regenSeqs));
|
|
276 }
|
|
277 }
|
|
278 }
|
|
279
|
|
280 /** Parse the reference string of peptide.
|
|
281 *
|
|
282 * @param ref
|
|
283 * @return
|
|
284 */
|
|
285 public List<String> parseRef(String ref) {
|
|
286 List<String> temp = Arrays.asList(ref.split(";"));
|
|
287 ArrayList<String> ids = new ArrayList<>();
|
|
288 temp.stream().forEach((id) -> {
|
|
289 ids.add(id.trim());
|
|
290 });
|
|
291
|
|
292 return ids;
|
|
293 }
|
|
294
|
|
295 private ArrayList<String> regenSeq(List<String> ids, String seq, int i) {
|
|
296
|
|
297 ArrayList<String> seqs = new ArrayList<>();
|
|
298
|
|
299 for (String id : ids) {
|
|
300 if (database.containsKey(id)) {
|
|
301 String prot = database.get(id).seq;
|
|
302 int index = prot.indexOf(seq) + i;
|
|
303
|
|
304 String motif = "";
|
|
305
|
|
306 if (index - 7 >= 1 & index + 7 <= prot.length()) {
|
|
307 motif = prot.substring(index - 8, index + 7);
|
|
308 } else if (index - 7 < 1 & index + 7 <= prot.length()) {
|
|
309 motif = prot.substring(0, index + 7);
|
|
310 } else if (index - 7 >= 1 & index + 7 > prot.length()) {
|
|
311 motif = prot.substring(index - 8, prot.length());
|
|
312 } else {
|
|
313 motif = prot;
|
|
314 }
|
|
315
|
|
316 if (!seqs.contains(motif)){
|
|
317 seqs.add(motif);
|
|
318 }
|
|
319
|
|
320 }
|
|
321 }
|
|
322
|
|
323 return seqs;
|
|
324
|
|
325 }
|
|
326
|
|
327 }
|