Mercurial > repos > davidvanzessen > argalaxy_tools
annotate igblast/igblast.r @ 54:81b3eb11ed2c draft
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
author | rhpvorderman |
---|---|
date | Tue, 16 Nov 2021 15:42:32 +0000 |
parents | 124b7fd92a3e |
children |
rev | line source |
---|---|
54
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
1 args <- commandArgs(trailingOnly = TRUE) |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
2 |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
3 infile=args[1] |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
4 outfile=args[2] |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
5 |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
6 blasted = read.table(infile, header=T, sep="\t", fill=T, stringsAsFactors=F, comment.char="") |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
7 |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
8 blasted$ID = 1:nrow(blasted) |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
9 blasted$VDJ.Frame = "Out-of-frame" |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
10 |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
11 search = blasted$inFrame == "true" & blasted$noStop == "false" |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
12 if(sum(search) > 0){ |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
13 blasted[search ,]$VDJ.Frame = "In-frame with stop codon" |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
14 } |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
15 |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
16 search = blasted$inFrame == "true" & blasted$noStop == "true" |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
17 if(sum(search) > 0){ |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
18 blasted[search ,]$VDJ.Frame = "In-frame" |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
19 } |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
20 |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
21 blasted$Top.V.Gene = blasted$vSegment |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
22 blasted$Top.D.Gene = blasted$dSegment |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
23 blasted$Top.J.Gene = blasted$jSegment |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
24 blasted$CDR1.Seq = blasted$cdr1aa |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
25 blasted$CDR1.Length = nchar(blasted$CDR1.Seq) |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
26 blasted$CDR2.Seq = blasted$cdr2aa |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
27 blasted$CDR2.Length = nchar(blasted$CDR2.Seq) |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
28 blasted$CDR3.Seq = blasted$cdr3aa |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
29 blasted$CDR3.Length = nchar(blasted$CDR3.Seq) |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
30 blasted$CDR3.Seq.DNA = blasted$cdr3nt |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
31 blasted$CDR3.Length.DNA = nchar(blasted$CDR3.Seq.DNA) |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
32 blasted$Strand = "+/-" |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
33 blasted$CDR3.Found.How = "found" |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
34 |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
35 search = blasted$cdr3nt == "" |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
36 if(sum(search) > 0){ |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
37 blasted[search,]$CDR3.Found.How = "NOT_FOUND" |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
38 } |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
39 |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
40 blasted$AA.JUNCTION = blasted$CDR3.Seq |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
41 |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
42 n = c("X.reads_count", "ID", "VDJ.Frame", "Top.V.Gene", "Top.D.Gene", "Top.J.Gene", "CDR1.Seq", "CDR1.Length", "CDR2.Seq", "CDR2.Length", "CDR3.Seq", "CDR3.Length", "CDR3.Seq.DNA", "CDR3.Length.DNA", "Strand", "CDR3.Found.How", "Functionality", "AA.JUNCTION") |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
43 |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
44 n[!(n %in% names(blasted))] |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
45 |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
46 blasted = blasted[,c("X.reads_count", "ID", "VDJ.Frame", "Top.V.Gene", "Top.D.Gene", "Top.J.Gene", "CDR1.Seq", "CDR1.Length", "CDR2.Seq", "CDR2.Length", "CDR3.Seq", "CDR3.Length", "CDR3.Seq.DNA", "CDR3.Length.DNA", "Strand", "CDR3.Found.How", "AA.JUNCTION")] |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
47 |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
48 names(blasted) = c("frequency.count", "ID", "VDJ Frame", "Top V Gene", "Top D Gene", "Top J Gene", "CDR1 Seq", "CDR1 Length", "CDR2 Seq", "CDR2 Length", "CDR3 Seq", "CDR3 Length", "CDR3 Seq DNA", "CDR3 Length DNA", "Strand", "CDR3 Found How", "AA JUNCTION") |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
49 |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
50 #duplicate rows based on frequency.count |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
51 blasted = blasted[rep(seq_len(nrow(blasted)), blasted$frequency.count),] |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
52 blasted$ID = 1:nrow(blasted) |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
53 |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
54 blasted = blasted[,c("ID", "VDJ Frame", "Top V Gene", "Top D Gene", "Top J Gene", "CDR1 Seq", "CDR1 Length", "CDR2 Seq", "CDR2 Length", "CDR3 Seq", "CDR3 Length", "CDR3 Seq DNA", "CDR3 Length DNA", "Strand", "CDR3 Found How", "AA JUNCTION")] |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
55 |
81b3eb11ed2c
"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents:
52
diff
changeset
|
56 write.table(blasted, outfile, quote=F, sep="\t", row.names=F, col.names=T) |