Mercurial > repos > jfb > kinamine7_7
comparison KinaMine-Galaxy-7-7/src/kinamine/Run.java @ 0:67635b462045 draft
Uploaded
author | jfb |
---|---|
date | Tue, 20 Feb 2018 14:31:15 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:67635b462045 |
---|---|
1 /** | |
2 ***************************************************************************** | |
3 * <p> | |
4 * Copyright (c) Regents of the University of Minnesota. All Rights Reserved. | |
5 * <p> | |
6 * Author: Kevin Murray University of Minnesota - (murra668@umn.edu) | |
7 * <p> | |
8 ***************************************************************************** | |
9 */ | |
10 package kinamine; | |
11 | |
12 import java.util.ArrayList; | |
13 import java.util.Arrays; | |
14 import java.util.HashMap; | |
15 import java.util.List; | |
16 import java.util.Map; | |
17 import java.util.Set; | |
18 | |
19 /** | |
20 * Main KinaMine object container. A run contains ArrayLists of peptide and | |
21 * proteins from the extracted file and list of amino acid chars. | |
22 * | |
23 * @version 1.0 | |
24 * @author murra668 | |
25 */ | |
26 public final class Run { | |
27 | |
28 /** List of peptide. */ | |
29 public final ArrayList<Peptide> pepList; | |
30 | |
31 /** Non-redundant database. */ | |
32 public final Map<String, Protein> database; | |
33 | |
34 /** Non-redundant collection of motifs. */ | |
35 public final Map<String, Motif> motifs; | |
36 | |
37 /** Amino Acids and Properties. */ | |
38 public static final AminoAcid ACIDS = new AminoAcid(); | |
39 | |
40 /** | |
41 * Constructs a run and processes the submitted peptide report and | |
42 * fasta database for motif generation. | |
43 * | |
44 * @param peps lines from peptide report | |
45 * @param prots lines for fasta database | |
46 * @param score FDR score | |
47 */ | |
48 public Run(ArrayList<String> peps, ArrayList<String> prots, double score) { | |
49 | |
50 this.pepList = new ArrayList<>(); | |
51 this.database = new HashMap(); | |
52 this.motifs = new HashMap(); | |
53 | |
54 /** Extract peptides and generate custom protein database. */ | |
55 extractPeptides(peps, score); | |
56 | |
57 /** Extracts the proteins from the fasta database. */ | |
58 extractDatabase(prots); | |
59 | |
60 /** Generate motifs for each peptide. */ | |
61 generateMotifs(); | |
62 | |
63 } | |
64 | |
65 /** | |
66 * Extracts individual peptides from the peptide report and creates new | |
67 * peptide objects, returns a list of peptides. | |
68 * | |
69 * @param lines Distinct peptide summary. | |
70 * @param score FDR score. | |
71 * @return ArrayList of peptides | |
72 */ | |
73 private void extractPeptides(ArrayList<String> lines, | |
74 double score) { | |
75 | |
76 /** Process each line of the peptide report */ | |
77 for (String line : lines) { | |
78 | |
79 /** Peptide report is tabular. */ | |
80 String[] pepInfo = line.split("\\t"); | |
81 | |
82 /** | |
83 * Check to see if the peptide possesses a Conf score greater than | |
84 * the FDR threshold, has a peptide id, and contains a | |
85 * phospho-tyrosine. | |
86 */ | |
87 if (Double.valueOf(pepInfo[7]) > score | |
88 & !"".equals(pepInfo[3]) | |
89 & pepInfo[9].contains("Phospho(Y)")) { | |
90 | |
91 List<String> temp = Arrays.asList(pepInfo[3].split(";")); | |
92 ArrayList<String> ids = new ArrayList<>(); | |
93 temp.stream().forEach((id) -> { | |
94 ids.add(id.trim()); | |
95 }); | |
96 | |
97 /** Add IDs to inclusion list, if not present. */ | |
98 for (String ref : ids) { | |
99 if (!ref.contains("RRRRR") | |
100 & !database.containsKey(ref.trim())) { | |
101 this.database.put(ref, null); | |
102 } | |
103 } | |
104 | |
105 /** Construct a new peptide object. */ | |
106 Peptide peptide = new Peptide(pepInfo, ids); | |
107 | |
108 /** Add the peptide to list if unique. */ | |
109 this.pepList.add(peptide); | |
110 } | |
111 } | |
112 } | |
113 | |
114 /** | |
115 * Extracts individual proteins from a FASTA database, creating new protein | |
116 * objects from each entry and adding them to a master list. | |
117 * | |
118 * @param fastaDatabase | |
119 * @return ArrayList of Proteins. | |
120 */ | |
121 private void extractDatabase(ArrayList<String> fastaDatabase) { | |
122 | |
123 Set<String> protList = this.database.keySet(); | |
124 | |
125 /** Loop through each entry. */ | |
126 for (String line : fastaDatabase) { | |
127 | |
128 /** FASTA database is tabular file. */ | |
129 String[] protInfo = line.split("\\t"); | |
130 | |
131 /** Search if protein is in inclusion list. */ | |
132 for (String name : protList) { | |
133 | |
134 if (protInfo[0].contains(name)) { | |
135 | |
136 /** Add protein to list. */ | |
137 this.database.replace(name, new Protein(protInfo)); | |
138 } | |
139 } | |
140 } | |
141 } | |
142 | |
143 /** | |
144 * Generates motifs around each phospho-tyrosine of each peptide in pepList. | |
145 * Motifs are -4 to 4 amino acids surrounding tyr. Not all entries from | |
146 * distinct peptide summary have IDs. Presently, those entries are excluded. | |
147 * For each peptide, find the corresponding protein, so the number of | |
148 * phospho-tyr can be recorded and the sequence can be utilized if the motif | |
149 * can not be generated from peptide sequence alone. | |
150 */ | |
151 private void generateMotifs() { | |
152 | |
153 for (Peptide peptide : pepList) { | |
154 | |
155 /** Check to see if peptide has reference accession. */ | |
156 if (hasProtID(peptide)) { | |
157 | |
158 /** For phospho-tyrosine in the peptide. */ | |
159 for (int index : peptide.tyrIndex) { | |
160 | |
161 /** Generate the motif using peptide sequence. */ | |
162 genSeq(peptide, index); | |
163 | |
164 } | |
165 } | |
166 } | |
167 } | |
168 | |
169 /** | |
170 * Find the proteins associated with the peptide ID in the database. Mark | |
171 * each protein's phospho-tyrosine field if visited. | |
172 * | |
173 * @param id | |
174 * @return protein sequence | |
175 */ | |
176 private void markMod(List<String> id) { | |
177 | |
178 /** Loop through each reference. */ | |
179 for (String ref : id) { | |
180 | |
181 /** If found, mark pY and capture sequence. */ | |
182 if (database.containsKey(ref)) { | |
183 database.get(ref).phosphoTyr++; | |
184 } | |
185 } | |
186 } | |
187 | |
188 /** | |
189 * Generate the peptide motif using the given index and sequence. Peptide | |
190 * motifs are the immediate -4 to +4 around a given index. | |
191 * <p> | |
192 * Some peptides may not have enough sequence to generate full motif. | |
193 * | |
194 * @param peptide | |
195 * @param index index of phospho-tyrosine in seq | |
196 * @param pSeq protein sequence | |
197 */ | |
198 private void genSeq(Peptide peptide, int index) { | |
199 | |
200 String motif = ""; | |
201 String seq = peptide.seq; | |
202 | |
203 /** Select surrounding amino acids. */ | |
204 // if (index - 4 >= 1 & index + 4 <= seq.length()) { | |
205 // motif = seq.substring(index - 5, index + 4); | |
206 // index = 5; | |
207 // } else if (index - 4 < 1 & index + 4 <= seq.length()) { | |
208 // motif = seq.substring(0, index + 4); | |
209 // } else if (index - 4 >= 1 & index + 4 > seq.length()) { | |
210 // motif = seq.substring(index - 5, seq.length()); | |
211 // index = 5; | |
212 // } else { | |
213 // motif = seq; | |
214 // } | |
215 if (index - 7 >= 1 & index + 7 <= seq.length()) { | |
216 motif = seq.substring(index - 8, index + 7); | |
217 index = 8; | |
218 } else if (index - 7 < 1 & index + 7 <= seq.length()) { | |
219 motif = seq.substring(0, index + 7); | |
220 } else if (index - 7 >= 1 & index + 7 > seq.length()) { | |
221 motif = seq.substring(index - 8, seq.length()); | |
222 index = 8; | |
223 } else { | |
224 motif = seq; | |
225 } | |
226 | |
227 addMotif(motif, peptide, index); | |
228 | |
229 } | |
230 | |
231 /** | |
232 * Determine if peptide has a protein ID. | |
233 * | |
234 * @param peptide | |
235 * @return | |
236 */ | |
237 public boolean hasProtID(Peptide peptide) { | |
238 return !peptide.id.isEmpty(); | |
239 } | |
240 | |
241 /** | |
242 * Adds seq to motif map. Also pair peptide refs and index of phospho- | |
243 * -tyrosine. | |
244 * | |
245 * @param seq | |
246 * @param ref | |
247 * @param index | |
248 */ | |
249 private void addMotif(String seq, Peptide peptide, int index) { | |
250 | |
251 /** Check if sequence is unique. */ | |
252 if (!motifs.containsKey(seq)) { | |
253 ArrayList<String> regenSeqs = regenSeq(peptide.id, seq, index); | |
254 motifs.put(seq, new Motif(seq, peptide.ref, index, regenSeqs)); | |
255 markMod(peptide.id); | |
256 } else { | |
257 List<String> refs = peptide.id; | |
258 List<String> ids = parseRef(motifs.get(seq).ref); | |
259 ArrayList<String> newID = new ArrayList<>(); | |
260 | |
261 for (String ref : refs) { | |
262 if (!ids.contains(ref)) { | |
263 newID.add(ref); | |
264 } | |
265 } | |
266 | |
267 if (!newID.isEmpty()) { | |
268 markMod(newID); | |
269 ids.addAll(newID); | |
270 String temp = ""; | |
271 for (String id : ids) { | |
272 temp += id + ";"; | |
273 } | |
274 ArrayList<String> regenSeqs = regenSeq(ids, seq, index); | |
275 motifs.put(seq, new Motif(seq, temp, index, regenSeqs)); | |
276 } | |
277 } | |
278 } | |
279 | |
280 /** Parse the reference string of peptide. | |
281 * | |
282 * @param ref | |
283 * @return | |
284 */ | |
285 public List<String> parseRef(String ref) { | |
286 List<String> temp = Arrays.asList(ref.split(";")); | |
287 ArrayList<String> ids = new ArrayList<>(); | |
288 temp.stream().forEach((id) -> { | |
289 ids.add(id.trim()); | |
290 }); | |
291 | |
292 return ids; | |
293 } | |
294 | |
295 private ArrayList<String> regenSeq(List<String> ids, String seq, int i) { | |
296 | |
297 ArrayList<String> seqs = new ArrayList<>(); | |
298 | |
299 for (String id : ids) { | |
300 if (database.containsKey(id)) { | |
301 String prot = database.get(id).seq; | |
302 int index = prot.indexOf(seq) + i; | |
303 | |
304 String motif = ""; | |
305 | |
306 if (index - 7 >= 1 & index + 7 <= prot.length()) { | |
307 motif = prot.substring(index - 8, index + 7); | |
308 } else if (index - 7 < 1 & index + 7 <= prot.length()) { | |
309 motif = prot.substring(0, index + 7); | |
310 } else if (index - 7 >= 1 & index + 7 > prot.length()) { | |
311 motif = prot.substring(index - 8, prot.length()); | |
312 } else { | |
313 motif = prot; | |
314 } | |
315 | |
316 if (!seqs.contains(motif)){ | |
317 seqs.add(motif); | |
318 } | |
319 | |
320 } | |
321 } | |
322 | |
323 return seqs; | |
324 | |
325 } | |
326 | |
327 } |