annotate igblast/igblast.r @ 52:124b7fd92a3e draft

Uploaded
author davidvanzessen
date Thu, 25 Feb 2021 13:36:15 +0000
parents afe85eb6572e
children 81b3eb11ed2c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
52
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
1 args <- commandArgs(trailingOnly = TRUE)
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
2
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
3 infile=args[1]
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
4 outfile=args[2]
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
5
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
6 blasted = read.table(infile, header=T, sep="\t", fill=T, stringsAsFactors=F, comment.char="")
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
7
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
8 blasted$ID = 1:nrow(blasted)
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
9 blasted$VDJ.Frame = "Out-of-frame"
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
10
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
11 search = blasted$inFrame == "true" & blasted$noStop == "false"
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
12 if(sum(search) > 0){
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
13 blasted[search ,]$VDJ.Frame = "In-frame with stop codon"
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
14 }
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
15
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
16 search = blasted$inFrame == "true" & blasted$noStop == "true"
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
17 if(sum(search) > 0){
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
18 blasted[search ,]$VDJ.Frame = "In-frame"
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
19 }
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
20
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
21 blasted$Top.V.Gene = blasted$vSegment
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
22 blasted$Top.D.Gene = blasted$dSegment
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
23 blasted$Top.J.Gene = blasted$jSegment
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
24 blasted$CDR1.Seq = blasted$cdr1aa
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
25 blasted$CDR1.Length = nchar(blasted$CDR1.Seq)
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
26 blasted$CDR2.Seq = blasted$cdr2aa
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
27 blasted$CDR2.Length = nchar(blasted$CDR2.Seq)
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
28 blasted$CDR3.Seq = blasted$cdr3aa
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
29 blasted$CDR3.Length = nchar(blasted$CDR3.Seq)
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
30 blasted$CDR3.Seq.DNA = blasted$cdr3nt
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
31 blasted$CDR3.Length.DNA = nchar(blasted$CDR3.Seq.DNA)
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
32 blasted$Strand = "+/-"
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
33 blasted$CDR3.Found.How = "found"
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
34
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
35 search = blasted$cdr3nt == ""
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
36 if(sum(search) > 0){
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
37 blasted[search,]$CDR3.Found.How = "NOT_FOUND"
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
38 }
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
39
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
40 blasted$AA.JUNCTION = blasted$CDR3.Seq
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
41
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
42 n = c("X.reads_count", "ID", "VDJ.Frame", "Top.V.Gene", "Top.D.Gene", "Top.J.Gene", "CDR1.Seq", "CDR1.Length", "CDR2.Seq", "CDR2.Length", "CDR3.Seq", "CDR3.Length", "CDR3.Seq.DNA", "CDR3.Length.DNA", "Strand", "CDR3.Found.How", "Functionality", "AA.JUNCTION")
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
43
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
44 n[!(n %in% names(blasted))]
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
45
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
46 blasted = blasted[,c("X.reads_count", "ID", "VDJ.Frame", "Top.V.Gene", "Top.D.Gene", "Top.J.Gene", "CDR1.Seq", "CDR1.Length", "CDR2.Seq", "CDR2.Length", "CDR3.Seq", "CDR3.Length", "CDR3.Seq.DNA", "CDR3.Length.DNA", "Strand", "CDR3.Found.How", "AA.JUNCTION")]
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
47
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
48 names(blasted) = c("frequency.count", "ID", "VDJ Frame", "Top V Gene", "Top D Gene", "Top J Gene", "CDR1 Seq", "CDR1 Length", "CDR2 Seq", "CDR2 Length", "CDR3 Seq", "CDR3 Length", "CDR3 Seq DNA", "CDR3 Length DNA", "Strand", "CDR3 Found How", "AA JUNCTION")
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
49
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
50 #duplicate rows based on frequency.count
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
51 blasted = blasted[rep(seq_len(nrow(blasted)), blasted$frequency.count),]
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
52 blasted$ID = 1:nrow(blasted)
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
53
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
54 blasted = blasted[,c("ID", "VDJ Frame", "Top V Gene", "Top D Gene", "Top J Gene", "CDR1 Seq", "CDR1 Length", "CDR2 Seq", "CDR2 Length", "CDR3 Seq", "CDR3 Length", "CDR3 Seq DNA", "CDR3 Length DNA", "Strand", "CDR3 Found How", "AA JUNCTION")]
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
55
124b7fd92a3e Uploaded
davidvanzessen
parents: 0
diff changeset
56 write.table(blasted, outfile, quote=F, sep="\t", row.names=F, col.names=T)