comparison KinaMine-Galaxy-7-7/src/kinamine/Run.java @ 0:67635b462045 draft

Uploaded
author jfb
date Tue, 20 Feb 2018 14:31:15 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:67635b462045
1 /**
2 *****************************************************************************
3 * <p>
4 * Copyright (c) Regents of the University of Minnesota. All Rights Reserved.
5 * <p>
6 * Author: Kevin Murray University of Minnesota - (murra668@umn.edu)
7 * <p>
8 *****************************************************************************
9 */
10 package kinamine;
11
12 import java.util.ArrayList;
13 import java.util.Arrays;
14 import java.util.HashMap;
15 import java.util.List;
16 import java.util.Map;
17 import java.util.Set;
18
19 /**
20 * Main KinaMine object container. A run contains ArrayLists of peptide and
21 * proteins from the extracted file and list of amino acid chars.
22 *
23 * @version 1.0
24 * @author murra668
25 */
26 public final class Run {
27
28 /** List of peptide. */
29 public final ArrayList<Peptide> pepList;
30
31 /** Non-redundant database. */
32 public final Map<String, Protein> database;
33
34 /** Non-redundant collection of motifs. */
35 public final Map<String, Motif> motifs;
36
37 /** Amino Acids and Properties. */
38 public static final AminoAcid ACIDS = new AminoAcid();
39
40 /**
41 * Constructs a run and processes the submitted peptide report and
42 * fasta database for motif generation.
43 *
44 * @param peps lines from peptide report
45 * @param prots lines for fasta database
46 * @param score FDR score
47 */
48 public Run(ArrayList<String> peps, ArrayList<String> prots, double score) {
49
50 this.pepList = new ArrayList<>();
51 this.database = new HashMap();
52 this.motifs = new HashMap();
53
54 /** Extract peptides and generate custom protein database. */
55 extractPeptides(peps, score);
56
57 /** Extracts the proteins from the fasta database. */
58 extractDatabase(prots);
59
60 /** Generate motifs for each peptide. */
61 generateMotifs();
62
63 }
64
65 /**
66 * Extracts individual peptides from the peptide report and creates new
67 * peptide objects, returns a list of peptides.
68 *
69 * @param lines Distinct peptide summary.
70 * @param score FDR score.
71 * @return ArrayList of peptides
72 */
73 private void extractPeptides(ArrayList<String> lines,
74 double score) {
75
76 /** Process each line of the peptide report */
77 for (String line : lines) {
78
79 /** Peptide report is tabular. */
80 String[] pepInfo = line.split("\\t");
81
82 /**
83 * Check to see if the peptide possesses a Conf score greater than
84 * the FDR threshold, has a peptide id, and contains a
85 * phospho-tyrosine.
86 */
87 if (Double.valueOf(pepInfo[7]) > score
88 & !"".equals(pepInfo[3])
89 & pepInfo[9].contains("Phospho(Y)")) {
90
91 List<String> temp = Arrays.asList(pepInfo[3].split(";"));
92 ArrayList<String> ids = new ArrayList<>();
93 temp.stream().forEach((id) -> {
94 ids.add(id.trim());
95 });
96
97 /** Add IDs to inclusion list, if not present. */
98 for (String ref : ids) {
99 if (!ref.contains("RRRRR")
100 & !database.containsKey(ref.trim())) {
101 this.database.put(ref, null);
102 }
103 }
104
105 /** Construct a new peptide object. */
106 Peptide peptide = new Peptide(pepInfo, ids);
107
108 /** Add the peptide to list if unique. */
109 this.pepList.add(peptide);
110 }
111 }
112 }
113
114 /**
115 * Extracts individual proteins from a FASTA database, creating new protein
116 * objects from each entry and adding them to a master list.
117 *
118 * @param fastaDatabase
119 * @return ArrayList of Proteins.
120 */
121 private void extractDatabase(ArrayList<String> fastaDatabase) {
122
123 Set<String> protList = this.database.keySet();
124
125 /** Loop through each entry. */
126 for (String line : fastaDatabase) {
127
128 /** FASTA database is tabular file. */
129 String[] protInfo = line.split("\\t");
130
131 /** Search if protein is in inclusion list. */
132 for (String name : protList) {
133
134 if (protInfo[0].contains(name)) {
135
136 /** Add protein to list. */
137 this.database.replace(name, new Protein(protInfo));
138 }
139 }
140 }
141 }
142
143 /**
144 * Generates motifs around each phospho-tyrosine of each peptide in pepList.
145 * Motifs are -4 to 4 amino acids surrounding tyr. Not all entries from
146 * distinct peptide summary have IDs. Presently, those entries are excluded.
147 * For each peptide, find the corresponding protein, so the number of
148 * phospho-tyr can be recorded and the sequence can be utilized if the motif
149 * can not be generated from peptide sequence alone.
150 */
151 private void generateMotifs() {
152
153 for (Peptide peptide : pepList) {
154
155 /** Check to see if peptide has reference accession. */
156 if (hasProtID(peptide)) {
157
158 /** For phospho-tyrosine in the peptide. */
159 for (int index : peptide.tyrIndex) {
160
161 /** Generate the motif using peptide sequence. */
162 genSeq(peptide, index);
163
164 }
165 }
166 }
167 }
168
169 /**
170 * Find the proteins associated with the peptide ID in the database. Mark
171 * each protein's phospho-tyrosine field if visited.
172 *
173 * @param id
174 * @return protein sequence
175 */
176 private void markMod(List<String> id) {
177
178 /** Loop through each reference. */
179 for (String ref : id) {
180
181 /** If found, mark pY and capture sequence. */
182 if (database.containsKey(ref)) {
183 database.get(ref).phosphoTyr++;
184 }
185 }
186 }
187
188 /**
189 * Generate the peptide motif using the given index and sequence. Peptide
190 * motifs are the immediate -4 to +4 around a given index.
191 * <p>
192 * Some peptides may not have enough sequence to generate full motif.
193 *
194 * @param peptide
195 * @param index index of phospho-tyrosine in seq
196 * @param pSeq protein sequence
197 */
198 private void genSeq(Peptide peptide, int index) {
199
200 String motif = "";
201 String seq = peptide.seq;
202
203 /** Select surrounding amino acids. */
204 // if (index - 4 >= 1 & index + 4 <= seq.length()) {
205 // motif = seq.substring(index - 5, index + 4);
206 // index = 5;
207 // } else if (index - 4 < 1 & index + 4 <= seq.length()) {
208 // motif = seq.substring(0, index + 4);
209 // } else if (index - 4 >= 1 & index + 4 > seq.length()) {
210 // motif = seq.substring(index - 5, seq.length());
211 // index = 5;
212 // } else {
213 // motif = seq;
214 // }
215 if (index - 7 >= 1 & index + 7 <= seq.length()) {
216 motif = seq.substring(index - 8, index + 7);
217 index = 8;
218 } else if (index - 7 < 1 & index + 7 <= seq.length()) {
219 motif = seq.substring(0, index + 7);
220 } else if (index - 7 >= 1 & index + 7 > seq.length()) {
221 motif = seq.substring(index - 8, seq.length());
222 index = 8;
223 } else {
224 motif = seq;
225 }
226
227 addMotif(motif, peptide, index);
228
229 }
230
231 /**
232 * Determine if peptide has a protein ID.
233 *
234 * @param peptide
235 * @return
236 */
237 public boolean hasProtID(Peptide peptide) {
238 return !peptide.id.isEmpty();
239 }
240
241 /**
242 * Adds seq to motif map. Also pair peptide refs and index of phospho-
243 * -tyrosine.
244 *
245 * @param seq
246 * @param ref
247 * @param index
248 */
249 private void addMotif(String seq, Peptide peptide, int index) {
250
251 /** Check if sequence is unique. */
252 if (!motifs.containsKey(seq)) {
253 ArrayList<String> regenSeqs = regenSeq(peptide.id, seq, index);
254 motifs.put(seq, new Motif(seq, peptide.ref, index, regenSeqs));
255 markMod(peptide.id);
256 } else {
257 List<String> refs = peptide.id;
258 List<String> ids = parseRef(motifs.get(seq).ref);
259 ArrayList<String> newID = new ArrayList<>();
260
261 for (String ref : refs) {
262 if (!ids.contains(ref)) {
263 newID.add(ref);
264 }
265 }
266
267 if (!newID.isEmpty()) {
268 markMod(newID);
269 ids.addAll(newID);
270 String temp = "";
271 for (String id : ids) {
272 temp += id + ";";
273 }
274 ArrayList<String> regenSeqs = regenSeq(ids, seq, index);
275 motifs.put(seq, new Motif(seq, temp, index, regenSeqs));
276 }
277 }
278 }
279
280 /** Parse the reference string of peptide.
281 *
282 * @param ref
283 * @return
284 */
285 public List<String> parseRef(String ref) {
286 List<String> temp = Arrays.asList(ref.split(";"));
287 ArrayList<String> ids = new ArrayList<>();
288 temp.stream().forEach((id) -> {
289 ids.add(id.trim());
290 });
291
292 return ids;
293 }
294
295 private ArrayList<String> regenSeq(List<String> ids, String seq, int i) {
296
297 ArrayList<String> seqs = new ArrayList<>();
298
299 for (String id : ids) {
300 if (database.containsKey(id)) {
301 String prot = database.get(id).seq;
302 int index = prot.indexOf(seq) + i;
303
304 String motif = "";
305
306 if (index - 7 >= 1 & index + 7 <= prot.length()) {
307 motif = prot.substring(index - 8, index + 7);
308 } else if (index - 7 < 1 & index + 7 <= prot.length()) {
309 motif = prot.substring(0, index + 7);
310 } else if (index - 7 >= 1 & index + 7 > prot.length()) {
311 motif = prot.substring(index - 8, prot.length());
312 } else {
313 motif = prot;
314 }
315
316 if (!seqs.contains(motif)){
317 seqs.add(motif);
318 }
319
320 }
321 }
322
323 return seqs;
324
325 }
326
327 }