annotate KinaMine-Galaxy-7-7/src/kinamine/Run.java @ 0:67635b462045 draft

Uploaded
author jfb
date Tue, 20 Feb 2018 14:31:15 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
67635b462045 Uploaded
jfb
parents:
diff changeset
1 /**
67635b462045 Uploaded
jfb
parents:
diff changeset
2 *****************************************************************************
67635b462045 Uploaded
jfb
parents:
diff changeset
3 * <p>
67635b462045 Uploaded
jfb
parents:
diff changeset
4 * Copyright (c) Regents of the University of Minnesota. All Rights Reserved.
67635b462045 Uploaded
jfb
parents:
diff changeset
5 * <p>
67635b462045 Uploaded
jfb
parents:
diff changeset
6 * Author: Kevin Murray University of Minnesota - (murra668@umn.edu)
67635b462045 Uploaded
jfb
parents:
diff changeset
7 * <p>
67635b462045 Uploaded
jfb
parents:
diff changeset
8 *****************************************************************************
67635b462045 Uploaded
jfb
parents:
diff changeset
9 */
67635b462045 Uploaded
jfb
parents:
diff changeset
10 package kinamine;
67635b462045 Uploaded
jfb
parents:
diff changeset
11
67635b462045 Uploaded
jfb
parents:
diff changeset
12 import java.util.ArrayList;
67635b462045 Uploaded
jfb
parents:
diff changeset
13 import java.util.Arrays;
67635b462045 Uploaded
jfb
parents:
diff changeset
14 import java.util.HashMap;
67635b462045 Uploaded
jfb
parents:
diff changeset
15 import java.util.List;
67635b462045 Uploaded
jfb
parents:
diff changeset
16 import java.util.Map;
67635b462045 Uploaded
jfb
parents:
diff changeset
17 import java.util.Set;
67635b462045 Uploaded
jfb
parents:
diff changeset
18
67635b462045 Uploaded
jfb
parents:
diff changeset
19 /**
67635b462045 Uploaded
jfb
parents:
diff changeset
20 * Main KinaMine object container. A run contains ArrayLists of peptide and
67635b462045 Uploaded
jfb
parents:
diff changeset
21 * proteins from the extracted file and list of amino acid chars.
67635b462045 Uploaded
jfb
parents:
diff changeset
22 *
67635b462045 Uploaded
jfb
parents:
diff changeset
23 * @version 1.0
67635b462045 Uploaded
jfb
parents:
diff changeset
24 * @author murra668
67635b462045 Uploaded
jfb
parents:
diff changeset
25 */
67635b462045 Uploaded
jfb
parents:
diff changeset
26 public final class Run {
67635b462045 Uploaded
jfb
parents:
diff changeset
27
67635b462045 Uploaded
jfb
parents:
diff changeset
28 /** List of peptide. */
67635b462045 Uploaded
jfb
parents:
diff changeset
29 public final ArrayList<Peptide> pepList;
67635b462045 Uploaded
jfb
parents:
diff changeset
30
67635b462045 Uploaded
jfb
parents:
diff changeset
31 /** Non-redundant database. */
67635b462045 Uploaded
jfb
parents:
diff changeset
32 public final Map<String, Protein> database;
67635b462045 Uploaded
jfb
parents:
diff changeset
33
67635b462045 Uploaded
jfb
parents:
diff changeset
34 /** Non-redundant collection of motifs. */
67635b462045 Uploaded
jfb
parents:
diff changeset
35 public final Map<String, Motif> motifs;
67635b462045 Uploaded
jfb
parents:
diff changeset
36
67635b462045 Uploaded
jfb
parents:
diff changeset
37 /** Amino Acids and Properties. */
67635b462045 Uploaded
jfb
parents:
diff changeset
38 public static final AminoAcid ACIDS = new AminoAcid();
67635b462045 Uploaded
jfb
parents:
diff changeset
39
67635b462045 Uploaded
jfb
parents:
diff changeset
40 /**
67635b462045 Uploaded
jfb
parents:
diff changeset
41 * Constructs a run and processes the submitted peptide report and
67635b462045 Uploaded
jfb
parents:
diff changeset
42 * fasta database for motif generation.
67635b462045 Uploaded
jfb
parents:
diff changeset
43 *
67635b462045 Uploaded
jfb
parents:
diff changeset
44 * @param peps lines from peptide report
67635b462045 Uploaded
jfb
parents:
diff changeset
45 * @param prots lines for fasta database
67635b462045 Uploaded
jfb
parents:
diff changeset
46 * @param score FDR score
67635b462045 Uploaded
jfb
parents:
diff changeset
47 */
67635b462045 Uploaded
jfb
parents:
diff changeset
48 public Run(ArrayList<String> peps, ArrayList<String> prots, double score) {
67635b462045 Uploaded
jfb
parents:
diff changeset
49
67635b462045 Uploaded
jfb
parents:
diff changeset
50 this.pepList = new ArrayList<>();
67635b462045 Uploaded
jfb
parents:
diff changeset
51 this.database = new HashMap();
67635b462045 Uploaded
jfb
parents:
diff changeset
52 this.motifs = new HashMap();
67635b462045 Uploaded
jfb
parents:
diff changeset
53
67635b462045 Uploaded
jfb
parents:
diff changeset
54 /** Extract peptides and generate custom protein database. */
67635b462045 Uploaded
jfb
parents:
diff changeset
55 extractPeptides(peps, score);
67635b462045 Uploaded
jfb
parents:
diff changeset
56
67635b462045 Uploaded
jfb
parents:
diff changeset
57 /** Extracts the proteins from the fasta database. */
67635b462045 Uploaded
jfb
parents:
diff changeset
58 extractDatabase(prots);
67635b462045 Uploaded
jfb
parents:
diff changeset
59
67635b462045 Uploaded
jfb
parents:
diff changeset
60 /** Generate motifs for each peptide. */
67635b462045 Uploaded
jfb
parents:
diff changeset
61 generateMotifs();
67635b462045 Uploaded
jfb
parents:
diff changeset
62
67635b462045 Uploaded
jfb
parents:
diff changeset
63 }
67635b462045 Uploaded
jfb
parents:
diff changeset
64
67635b462045 Uploaded
jfb
parents:
diff changeset
65 /**
67635b462045 Uploaded
jfb
parents:
diff changeset
66 * Extracts individual peptides from the peptide report and creates new
67635b462045 Uploaded
jfb
parents:
diff changeset
67 * peptide objects, returns a list of peptides.
67635b462045 Uploaded
jfb
parents:
diff changeset
68 *
67635b462045 Uploaded
jfb
parents:
diff changeset
69 * @param lines Distinct peptide summary.
67635b462045 Uploaded
jfb
parents:
diff changeset
70 * @param score FDR score.
67635b462045 Uploaded
jfb
parents:
diff changeset
71 * @return ArrayList of peptides
67635b462045 Uploaded
jfb
parents:
diff changeset
72 */
67635b462045 Uploaded
jfb
parents:
diff changeset
73 private void extractPeptides(ArrayList<String> lines,
67635b462045 Uploaded
jfb
parents:
diff changeset
74 double score) {
67635b462045 Uploaded
jfb
parents:
diff changeset
75
67635b462045 Uploaded
jfb
parents:
diff changeset
76 /** Process each line of the peptide report */
67635b462045 Uploaded
jfb
parents:
diff changeset
77 for (String line : lines) {
67635b462045 Uploaded
jfb
parents:
diff changeset
78
67635b462045 Uploaded
jfb
parents:
diff changeset
79 /** Peptide report is tabular. */
67635b462045 Uploaded
jfb
parents:
diff changeset
80 String[] pepInfo = line.split("\\t");
67635b462045 Uploaded
jfb
parents:
diff changeset
81
67635b462045 Uploaded
jfb
parents:
diff changeset
82 /**
67635b462045 Uploaded
jfb
parents:
diff changeset
83 * Check to see if the peptide possesses a Conf score greater than
67635b462045 Uploaded
jfb
parents:
diff changeset
84 * the FDR threshold, has a peptide id, and contains a
67635b462045 Uploaded
jfb
parents:
diff changeset
85 * phospho-tyrosine.
67635b462045 Uploaded
jfb
parents:
diff changeset
86 */
67635b462045 Uploaded
jfb
parents:
diff changeset
87 if (Double.valueOf(pepInfo[7]) > score
67635b462045 Uploaded
jfb
parents:
diff changeset
88 & !"".equals(pepInfo[3])
67635b462045 Uploaded
jfb
parents:
diff changeset
89 & pepInfo[9].contains("Phospho(Y)")) {
67635b462045 Uploaded
jfb
parents:
diff changeset
90
67635b462045 Uploaded
jfb
parents:
diff changeset
91 List<String> temp = Arrays.asList(pepInfo[3].split(";"));
67635b462045 Uploaded
jfb
parents:
diff changeset
92 ArrayList<String> ids = new ArrayList<>();
67635b462045 Uploaded
jfb
parents:
diff changeset
93 temp.stream().forEach((id) -> {
67635b462045 Uploaded
jfb
parents:
diff changeset
94 ids.add(id.trim());
67635b462045 Uploaded
jfb
parents:
diff changeset
95 });
67635b462045 Uploaded
jfb
parents:
diff changeset
96
67635b462045 Uploaded
jfb
parents:
diff changeset
97 /** Add IDs to inclusion list, if not present. */
67635b462045 Uploaded
jfb
parents:
diff changeset
98 for (String ref : ids) {
67635b462045 Uploaded
jfb
parents:
diff changeset
99 if (!ref.contains("RRRRR")
67635b462045 Uploaded
jfb
parents:
diff changeset
100 & !database.containsKey(ref.trim())) {
67635b462045 Uploaded
jfb
parents:
diff changeset
101 this.database.put(ref, null);
67635b462045 Uploaded
jfb
parents:
diff changeset
102 }
67635b462045 Uploaded
jfb
parents:
diff changeset
103 }
67635b462045 Uploaded
jfb
parents:
diff changeset
104
67635b462045 Uploaded
jfb
parents:
diff changeset
105 /** Construct a new peptide object. */
67635b462045 Uploaded
jfb
parents:
diff changeset
106 Peptide peptide = new Peptide(pepInfo, ids);
67635b462045 Uploaded
jfb
parents:
diff changeset
107
67635b462045 Uploaded
jfb
parents:
diff changeset
108 /** Add the peptide to list if unique. */
67635b462045 Uploaded
jfb
parents:
diff changeset
109 this.pepList.add(peptide);
67635b462045 Uploaded
jfb
parents:
diff changeset
110 }
67635b462045 Uploaded
jfb
parents:
diff changeset
111 }
67635b462045 Uploaded
jfb
parents:
diff changeset
112 }
67635b462045 Uploaded
jfb
parents:
diff changeset
113
67635b462045 Uploaded
jfb
parents:
diff changeset
114 /**
67635b462045 Uploaded
jfb
parents:
diff changeset
115 * Extracts individual proteins from a FASTA database, creating new protein
67635b462045 Uploaded
jfb
parents:
diff changeset
116 * objects from each entry and adding them to a master list.
67635b462045 Uploaded
jfb
parents:
diff changeset
117 *
67635b462045 Uploaded
jfb
parents:
diff changeset
118 * @param fastaDatabase
67635b462045 Uploaded
jfb
parents:
diff changeset
119 * @return ArrayList of Proteins.
67635b462045 Uploaded
jfb
parents:
diff changeset
120 */
67635b462045 Uploaded
jfb
parents:
diff changeset
121 private void extractDatabase(ArrayList<String> fastaDatabase) {
67635b462045 Uploaded
jfb
parents:
diff changeset
122
67635b462045 Uploaded
jfb
parents:
diff changeset
123 Set<String> protList = this.database.keySet();
67635b462045 Uploaded
jfb
parents:
diff changeset
124
67635b462045 Uploaded
jfb
parents:
diff changeset
125 /** Loop through each entry. */
67635b462045 Uploaded
jfb
parents:
diff changeset
126 for (String line : fastaDatabase) {
67635b462045 Uploaded
jfb
parents:
diff changeset
127
67635b462045 Uploaded
jfb
parents:
diff changeset
128 /** FASTA database is tabular file. */
67635b462045 Uploaded
jfb
parents:
diff changeset
129 String[] protInfo = line.split("\\t");
67635b462045 Uploaded
jfb
parents:
diff changeset
130
67635b462045 Uploaded
jfb
parents:
diff changeset
131 /** Search if protein is in inclusion list. */
67635b462045 Uploaded
jfb
parents:
diff changeset
132 for (String name : protList) {
67635b462045 Uploaded
jfb
parents:
diff changeset
133
67635b462045 Uploaded
jfb
parents:
diff changeset
134 if (protInfo[0].contains(name)) {
67635b462045 Uploaded
jfb
parents:
diff changeset
135
67635b462045 Uploaded
jfb
parents:
diff changeset
136 /** Add protein to list. */
67635b462045 Uploaded
jfb
parents:
diff changeset
137 this.database.replace(name, new Protein(protInfo));
67635b462045 Uploaded
jfb
parents:
diff changeset
138 }
67635b462045 Uploaded
jfb
parents:
diff changeset
139 }
67635b462045 Uploaded
jfb
parents:
diff changeset
140 }
67635b462045 Uploaded
jfb
parents:
diff changeset
141 }
67635b462045 Uploaded
jfb
parents:
diff changeset
142
67635b462045 Uploaded
jfb
parents:
diff changeset
143 /**
67635b462045 Uploaded
jfb
parents:
diff changeset
144 * Generates motifs around each phospho-tyrosine of each peptide in pepList.
67635b462045 Uploaded
jfb
parents:
diff changeset
145 * Motifs are -4 to 4 amino acids surrounding tyr. Not all entries from
67635b462045 Uploaded
jfb
parents:
diff changeset
146 * distinct peptide summary have IDs. Presently, those entries are excluded.
67635b462045 Uploaded
jfb
parents:
diff changeset
147 * For each peptide, find the corresponding protein, so the number of
67635b462045 Uploaded
jfb
parents:
diff changeset
148 * phospho-tyr can be recorded and the sequence can be utilized if the motif
67635b462045 Uploaded
jfb
parents:
diff changeset
149 * can not be generated from peptide sequence alone.
67635b462045 Uploaded
jfb
parents:
diff changeset
150 */
67635b462045 Uploaded
jfb
parents:
diff changeset
151 private void generateMotifs() {
67635b462045 Uploaded
jfb
parents:
diff changeset
152
67635b462045 Uploaded
jfb
parents:
diff changeset
153 for (Peptide peptide : pepList) {
67635b462045 Uploaded
jfb
parents:
diff changeset
154
67635b462045 Uploaded
jfb
parents:
diff changeset
155 /** Check to see if peptide has reference accession. */
67635b462045 Uploaded
jfb
parents:
diff changeset
156 if (hasProtID(peptide)) {
67635b462045 Uploaded
jfb
parents:
diff changeset
157
67635b462045 Uploaded
jfb
parents:
diff changeset
158 /** For phospho-tyrosine in the peptide. */
67635b462045 Uploaded
jfb
parents:
diff changeset
159 for (int index : peptide.tyrIndex) {
67635b462045 Uploaded
jfb
parents:
diff changeset
160
67635b462045 Uploaded
jfb
parents:
diff changeset
161 /** Generate the motif using peptide sequence. */
67635b462045 Uploaded
jfb
parents:
diff changeset
162 genSeq(peptide, index);
67635b462045 Uploaded
jfb
parents:
diff changeset
163
67635b462045 Uploaded
jfb
parents:
diff changeset
164 }
67635b462045 Uploaded
jfb
parents:
diff changeset
165 }
67635b462045 Uploaded
jfb
parents:
diff changeset
166 }
67635b462045 Uploaded
jfb
parents:
diff changeset
167 }
67635b462045 Uploaded
jfb
parents:
diff changeset
168
67635b462045 Uploaded
jfb
parents:
diff changeset
169 /**
67635b462045 Uploaded
jfb
parents:
diff changeset
170 * Find the proteins associated with the peptide ID in the database. Mark
67635b462045 Uploaded
jfb
parents:
diff changeset
171 * each protein's phospho-tyrosine field if visited.
67635b462045 Uploaded
jfb
parents:
diff changeset
172 *
67635b462045 Uploaded
jfb
parents:
diff changeset
173 * @param id
67635b462045 Uploaded
jfb
parents:
diff changeset
174 * @return protein sequence
67635b462045 Uploaded
jfb
parents:
diff changeset
175 */
67635b462045 Uploaded
jfb
parents:
diff changeset
176 private void markMod(List<String> id) {
67635b462045 Uploaded
jfb
parents:
diff changeset
177
67635b462045 Uploaded
jfb
parents:
diff changeset
178 /** Loop through each reference. */
67635b462045 Uploaded
jfb
parents:
diff changeset
179 for (String ref : id) {
67635b462045 Uploaded
jfb
parents:
diff changeset
180
67635b462045 Uploaded
jfb
parents:
diff changeset
181 /** If found, mark pY and capture sequence. */
67635b462045 Uploaded
jfb
parents:
diff changeset
182 if (database.containsKey(ref)) {
67635b462045 Uploaded
jfb
parents:
diff changeset
183 database.get(ref).phosphoTyr++;
67635b462045 Uploaded
jfb
parents:
diff changeset
184 }
67635b462045 Uploaded
jfb
parents:
diff changeset
185 }
67635b462045 Uploaded
jfb
parents:
diff changeset
186 }
67635b462045 Uploaded
jfb
parents:
diff changeset
187
67635b462045 Uploaded
jfb
parents:
diff changeset
188 /**
67635b462045 Uploaded
jfb
parents:
diff changeset
189 * Generate the peptide motif using the given index and sequence. Peptide
67635b462045 Uploaded
jfb
parents:
diff changeset
190 * motifs are the immediate -4 to +4 around a given index.
67635b462045 Uploaded
jfb
parents:
diff changeset
191 * <p>
67635b462045 Uploaded
jfb
parents:
diff changeset
192 * Some peptides may not have enough sequence to generate full motif.
67635b462045 Uploaded
jfb
parents:
diff changeset
193 *
67635b462045 Uploaded
jfb
parents:
diff changeset
194 * @param peptide
67635b462045 Uploaded
jfb
parents:
diff changeset
195 * @param index index of phospho-tyrosine in seq
67635b462045 Uploaded
jfb
parents:
diff changeset
196 * @param pSeq protein sequence
67635b462045 Uploaded
jfb
parents:
diff changeset
197 */
67635b462045 Uploaded
jfb
parents:
diff changeset
198 private void genSeq(Peptide peptide, int index) {
67635b462045 Uploaded
jfb
parents:
diff changeset
199
67635b462045 Uploaded
jfb
parents:
diff changeset
200 String motif = "";
67635b462045 Uploaded
jfb
parents:
diff changeset
201 String seq = peptide.seq;
67635b462045 Uploaded
jfb
parents:
diff changeset
202
67635b462045 Uploaded
jfb
parents:
diff changeset
203 /** Select surrounding amino acids. */
67635b462045 Uploaded
jfb
parents:
diff changeset
204 // if (index - 4 >= 1 & index + 4 <= seq.length()) {
67635b462045 Uploaded
jfb
parents:
diff changeset
205 // motif = seq.substring(index - 5, index + 4);
67635b462045 Uploaded
jfb
parents:
diff changeset
206 // index = 5;
67635b462045 Uploaded
jfb
parents:
diff changeset
207 // } else if (index - 4 < 1 & index + 4 <= seq.length()) {
67635b462045 Uploaded
jfb
parents:
diff changeset
208 // motif = seq.substring(0, index + 4);
67635b462045 Uploaded
jfb
parents:
diff changeset
209 // } else if (index - 4 >= 1 & index + 4 > seq.length()) {
67635b462045 Uploaded
jfb
parents:
diff changeset
210 // motif = seq.substring(index - 5, seq.length());
67635b462045 Uploaded
jfb
parents:
diff changeset
211 // index = 5;
67635b462045 Uploaded
jfb
parents:
diff changeset
212 // } else {
67635b462045 Uploaded
jfb
parents:
diff changeset
213 // motif = seq;
67635b462045 Uploaded
jfb
parents:
diff changeset
214 // }
67635b462045 Uploaded
jfb
parents:
diff changeset
215 if (index - 7 >= 1 & index + 7 <= seq.length()) {
67635b462045 Uploaded
jfb
parents:
diff changeset
216 motif = seq.substring(index - 8, index + 7);
67635b462045 Uploaded
jfb
parents:
diff changeset
217 index = 8;
67635b462045 Uploaded
jfb
parents:
diff changeset
218 } else if (index - 7 < 1 & index + 7 <= seq.length()) {
67635b462045 Uploaded
jfb
parents:
diff changeset
219 motif = seq.substring(0, index + 7);
67635b462045 Uploaded
jfb
parents:
diff changeset
220 } else if (index - 7 >= 1 & index + 7 > seq.length()) {
67635b462045 Uploaded
jfb
parents:
diff changeset
221 motif = seq.substring(index - 8, seq.length());
67635b462045 Uploaded
jfb
parents:
diff changeset
222 index = 8;
67635b462045 Uploaded
jfb
parents:
diff changeset
223 } else {
67635b462045 Uploaded
jfb
parents:
diff changeset
224 motif = seq;
67635b462045 Uploaded
jfb
parents:
diff changeset
225 }
67635b462045 Uploaded
jfb
parents:
diff changeset
226
67635b462045 Uploaded
jfb
parents:
diff changeset
227 addMotif(motif, peptide, index);
67635b462045 Uploaded
jfb
parents:
diff changeset
228
67635b462045 Uploaded
jfb
parents:
diff changeset
229 }
67635b462045 Uploaded
jfb
parents:
diff changeset
230
67635b462045 Uploaded
jfb
parents:
diff changeset
231 /**
67635b462045 Uploaded
jfb
parents:
diff changeset
232 * Determine if peptide has a protein ID.
67635b462045 Uploaded
jfb
parents:
diff changeset
233 *
67635b462045 Uploaded
jfb
parents:
diff changeset
234 * @param peptide
67635b462045 Uploaded
jfb
parents:
diff changeset
235 * @return
67635b462045 Uploaded
jfb
parents:
diff changeset
236 */
67635b462045 Uploaded
jfb
parents:
diff changeset
237 public boolean hasProtID(Peptide peptide) {
67635b462045 Uploaded
jfb
parents:
diff changeset
238 return !peptide.id.isEmpty();
67635b462045 Uploaded
jfb
parents:
diff changeset
239 }
67635b462045 Uploaded
jfb
parents:
diff changeset
240
67635b462045 Uploaded
jfb
parents:
diff changeset
241 /**
67635b462045 Uploaded
jfb
parents:
diff changeset
242 * Adds seq to motif map. Also pair peptide refs and index of phospho-
67635b462045 Uploaded
jfb
parents:
diff changeset
243 * -tyrosine.
67635b462045 Uploaded
jfb
parents:
diff changeset
244 *
67635b462045 Uploaded
jfb
parents:
diff changeset
245 * @param seq
67635b462045 Uploaded
jfb
parents:
diff changeset
246 * @param ref
67635b462045 Uploaded
jfb
parents:
diff changeset
247 * @param index
67635b462045 Uploaded
jfb
parents:
diff changeset
248 */
67635b462045 Uploaded
jfb
parents:
diff changeset
249 private void addMotif(String seq, Peptide peptide, int index) {
67635b462045 Uploaded
jfb
parents:
diff changeset
250
67635b462045 Uploaded
jfb
parents:
diff changeset
251 /** Check if sequence is unique. */
67635b462045 Uploaded
jfb
parents:
diff changeset
252 if (!motifs.containsKey(seq)) {
67635b462045 Uploaded
jfb
parents:
diff changeset
253 ArrayList<String> regenSeqs = regenSeq(peptide.id, seq, index);
67635b462045 Uploaded
jfb
parents:
diff changeset
254 motifs.put(seq, new Motif(seq, peptide.ref, index, regenSeqs));
67635b462045 Uploaded
jfb
parents:
diff changeset
255 markMod(peptide.id);
67635b462045 Uploaded
jfb
parents:
diff changeset
256 } else {
67635b462045 Uploaded
jfb
parents:
diff changeset
257 List<String> refs = peptide.id;
67635b462045 Uploaded
jfb
parents:
diff changeset
258 List<String> ids = parseRef(motifs.get(seq).ref);
67635b462045 Uploaded
jfb
parents:
diff changeset
259 ArrayList<String> newID = new ArrayList<>();
67635b462045 Uploaded
jfb
parents:
diff changeset
260
67635b462045 Uploaded
jfb
parents:
diff changeset
261 for (String ref : refs) {
67635b462045 Uploaded
jfb
parents:
diff changeset
262 if (!ids.contains(ref)) {
67635b462045 Uploaded
jfb
parents:
diff changeset
263 newID.add(ref);
67635b462045 Uploaded
jfb
parents:
diff changeset
264 }
67635b462045 Uploaded
jfb
parents:
diff changeset
265 }
67635b462045 Uploaded
jfb
parents:
diff changeset
266
67635b462045 Uploaded
jfb
parents:
diff changeset
267 if (!newID.isEmpty()) {
67635b462045 Uploaded
jfb
parents:
diff changeset
268 markMod(newID);
67635b462045 Uploaded
jfb
parents:
diff changeset
269 ids.addAll(newID);
67635b462045 Uploaded
jfb
parents:
diff changeset
270 String temp = "";
67635b462045 Uploaded
jfb
parents:
diff changeset
271 for (String id : ids) {
67635b462045 Uploaded
jfb
parents:
diff changeset
272 temp += id + ";";
67635b462045 Uploaded
jfb
parents:
diff changeset
273 }
67635b462045 Uploaded
jfb
parents:
diff changeset
274 ArrayList<String> regenSeqs = regenSeq(ids, seq, index);
67635b462045 Uploaded
jfb
parents:
diff changeset
275 motifs.put(seq, new Motif(seq, temp, index, regenSeqs));
67635b462045 Uploaded
jfb
parents:
diff changeset
276 }
67635b462045 Uploaded
jfb
parents:
diff changeset
277 }
67635b462045 Uploaded
jfb
parents:
diff changeset
278 }
67635b462045 Uploaded
jfb
parents:
diff changeset
279
67635b462045 Uploaded
jfb
parents:
diff changeset
280 /** Parse the reference string of peptide.
67635b462045 Uploaded
jfb
parents:
diff changeset
281 *
67635b462045 Uploaded
jfb
parents:
diff changeset
282 * @param ref
67635b462045 Uploaded
jfb
parents:
diff changeset
283 * @return
67635b462045 Uploaded
jfb
parents:
diff changeset
284 */
67635b462045 Uploaded
jfb
parents:
diff changeset
285 public List<String> parseRef(String ref) {
67635b462045 Uploaded
jfb
parents:
diff changeset
286 List<String> temp = Arrays.asList(ref.split(";"));
67635b462045 Uploaded
jfb
parents:
diff changeset
287 ArrayList<String> ids = new ArrayList<>();
67635b462045 Uploaded
jfb
parents:
diff changeset
288 temp.stream().forEach((id) -> {
67635b462045 Uploaded
jfb
parents:
diff changeset
289 ids.add(id.trim());
67635b462045 Uploaded
jfb
parents:
diff changeset
290 });
67635b462045 Uploaded
jfb
parents:
diff changeset
291
67635b462045 Uploaded
jfb
parents:
diff changeset
292 return ids;
67635b462045 Uploaded
jfb
parents:
diff changeset
293 }
67635b462045 Uploaded
jfb
parents:
diff changeset
294
67635b462045 Uploaded
jfb
parents:
diff changeset
295 private ArrayList<String> regenSeq(List<String> ids, String seq, int i) {
67635b462045 Uploaded
jfb
parents:
diff changeset
296
67635b462045 Uploaded
jfb
parents:
diff changeset
297 ArrayList<String> seqs = new ArrayList<>();
67635b462045 Uploaded
jfb
parents:
diff changeset
298
67635b462045 Uploaded
jfb
parents:
diff changeset
299 for (String id : ids) {
67635b462045 Uploaded
jfb
parents:
diff changeset
300 if (database.containsKey(id)) {
67635b462045 Uploaded
jfb
parents:
diff changeset
301 String prot = database.get(id).seq;
67635b462045 Uploaded
jfb
parents:
diff changeset
302 int index = prot.indexOf(seq) + i;
67635b462045 Uploaded
jfb
parents:
diff changeset
303
67635b462045 Uploaded
jfb
parents:
diff changeset
304 String motif = "";
67635b462045 Uploaded
jfb
parents:
diff changeset
305
67635b462045 Uploaded
jfb
parents:
diff changeset
306 if (index - 7 >= 1 & index + 7 <= prot.length()) {
67635b462045 Uploaded
jfb
parents:
diff changeset
307 motif = prot.substring(index - 8, index + 7);
67635b462045 Uploaded
jfb
parents:
diff changeset
308 } else if (index - 7 < 1 & index + 7 <= prot.length()) {
67635b462045 Uploaded
jfb
parents:
diff changeset
309 motif = prot.substring(0, index + 7);
67635b462045 Uploaded
jfb
parents:
diff changeset
310 } else if (index - 7 >= 1 & index + 7 > prot.length()) {
67635b462045 Uploaded
jfb
parents:
diff changeset
311 motif = prot.substring(index - 8, prot.length());
67635b462045 Uploaded
jfb
parents:
diff changeset
312 } else {
67635b462045 Uploaded
jfb
parents:
diff changeset
313 motif = prot;
67635b462045 Uploaded
jfb
parents:
diff changeset
314 }
67635b462045 Uploaded
jfb
parents:
diff changeset
315
67635b462045 Uploaded
jfb
parents:
diff changeset
316 if (!seqs.contains(motif)){
67635b462045 Uploaded
jfb
parents:
diff changeset
317 seqs.add(motif);
67635b462045 Uploaded
jfb
parents:
diff changeset
318 }
67635b462045 Uploaded
jfb
parents:
diff changeset
319
67635b462045 Uploaded
jfb
parents:
diff changeset
320 }
67635b462045 Uploaded
jfb
parents:
diff changeset
321 }
67635b462045 Uploaded
jfb
parents:
diff changeset
322
67635b462045 Uploaded
jfb
parents:
diff changeset
323 return seqs;
67635b462045 Uploaded
jfb
parents:
diff changeset
324
67635b462045 Uploaded
jfb
parents:
diff changeset
325 }
67635b462045 Uploaded
jfb
parents:
diff changeset
326
67635b462045 Uploaded
jfb
parents:
diff changeset
327 }