Mercurial > repos > davidvanzessen > argalaxy_tools
diff igblast/igblast.r @ 52:124b7fd92a3e draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 25 Feb 2021 13:36:15 +0000 |
parents | afe85eb6572e |
children | 81b3eb11ed2c |
line wrap: on
line diff
--- a/igblast/igblast.r Thu Aug 08 07:40:36 2019 -0400 +++ b/igblast/igblast.r Thu Feb 25 13:36:15 2021 +0000 @@ -1,56 +1,56 @@ -args <- commandArgs(trailingOnly = TRUE) - -infile=args[1] -outfile=args[2] - -blasted = read.table(infile, header=T, sep="\t", fill=T, stringsAsFactors=F, comment.char="") - -blasted$ID = 1:nrow(blasted) -blasted$VDJ.Frame = "Out-of-frame" - -search = blasted$inFrame == "true" & blasted$noStop == "false" -if(sum(search) > 0){ - blasted[search ,]$VDJ.Frame = "In-frame with stop codon" -} - -search = blasted$inFrame == "true" & blasted$noStop == "true" -if(sum(search) > 0){ - blasted[search ,]$VDJ.Frame = "In-frame" -} - -blasted$Top.V.Gene = blasted$vSegment -blasted$Top.D.Gene = blasted$dSegment -blasted$Top.J.Gene = blasted$jSegment -blasted$CDR1.Seq = blasted$cdr1aa -blasted$CDR1.Length = nchar(blasted$CDR1.Seq) -blasted$CDR2.Seq = blasted$cdr2aa -blasted$CDR2.Length = nchar(blasted$CDR2.Seq) -blasted$CDR3.Seq = blasted$cdr3aa -blasted$CDR3.Length = nchar(blasted$CDR3.Seq) -blasted$CDR3.Seq.DNA = blasted$cdr3nt -blasted$CDR3.Length.DNA = nchar(blasted$CDR3.Seq.DNA) -blasted$Strand = "+/-" -blasted$CDR3.Found.How = "found" - -search = blasted$cdr3nt == "" -if(sum(search) > 0){ - blasted[search,]$CDR3.Found.How = "NOT_FOUND" -} - -blasted$AA.JUNCTION = blasted$CDR3.Seq - -n = c("X.reads_count", "ID", "VDJ.Frame", "Top.V.Gene", "Top.D.Gene", "Top.J.Gene", "CDR1.Seq", "CDR1.Length", "CDR2.Seq", "CDR2.Length", "CDR3.Seq", "CDR3.Length", "CDR3.Seq.DNA", "CDR3.Length.DNA", "Strand", "CDR3.Found.How", "Functionality", "AA.JUNCTION") - -n[!(n %in% names(blasted))] - -blasted = blasted[,c("X.reads_count", "ID", "VDJ.Frame", "Top.V.Gene", "Top.D.Gene", "Top.J.Gene", "CDR1.Seq", "CDR1.Length", "CDR2.Seq", "CDR2.Length", "CDR3.Seq", "CDR3.Length", "CDR3.Seq.DNA", "CDR3.Length.DNA", "Strand", "CDR3.Found.How", "AA.JUNCTION")] - -names(blasted) = c("frequency.count", "ID", "VDJ Frame", "Top V Gene", "Top D Gene", "Top J Gene", "CDR1 Seq", "CDR1 Length", "CDR2 Seq", "CDR2 Length", "CDR3 Seq", "CDR3 Length", "CDR3 Seq DNA", "CDR3 Length DNA", "Strand", "CDR3 Found How", "AA JUNCTION") - -#duplicate rows based on frequency.count -blasted = blasted[rep(seq_len(nrow(blasted)), blasted$frequency.count),] -blasted$ID = 1:nrow(blasted) - -blasted = blasted[,c("ID", "VDJ Frame", "Top V Gene", "Top D Gene", "Top J Gene", "CDR1 Seq", "CDR1 Length", "CDR2 Seq", "CDR2 Length", "CDR3 Seq", "CDR3 Length", "CDR3 Seq DNA", "CDR3 Length DNA", "Strand", "CDR3 Found How", "AA JUNCTION")] - -write.table(blasted, outfile, quote=F, sep="\t", row.names=F, col.names=T) +args <- commandArgs(trailingOnly = TRUE) + +infile=args[1] +outfile=args[2] + +blasted = read.table(infile, header=T, sep="\t", fill=T, stringsAsFactors=F, comment.char="") + +blasted$ID = 1:nrow(blasted) +blasted$VDJ.Frame = "Out-of-frame" + +search = blasted$inFrame == "true" & blasted$noStop == "false" +if(sum(search) > 0){ + blasted[search ,]$VDJ.Frame = "In-frame with stop codon" +} + +search = blasted$inFrame == "true" & blasted$noStop == "true" +if(sum(search) > 0){ + blasted[search ,]$VDJ.Frame = "In-frame" +} + +blasted$Top.V.Gene = blasted$vSegment +blasted$Top.D.Gene = blasted$dSegment +blasted$Top.J.Gene = blasted$jSegment +blasted$CDR1.Seq = blasted$cdr1aa +blasted$CDR1.Length = nchar(blasted$CDR1.Seq) +blasted$CDR2.Seq = blasted$cdr2aa +blasted$CDR2.Length = nchar(blasted$CDR2.Seq) +blasted$CDR3.Seq = blasted$cdr3aa +blasted$CDR3.Length = nchar(blasted$CDR3.Seq) +blasted$CDR3.Seq.DNA = blasted$cdr3nt +blasted$CDR3.Length.DNA = nchar(blasted$CDR3.Seq.DNA) +blasted$Strand = "+/-" +blasted$CDR3.Found.How = "found" + +search = blasted$cdr3nt == "" +if(sum(search) > 0){ + blasted[search,]$CDR3.Found.How = "NOT_FOUND" +} + +blasted$AA.JUNCTION = blasted$CDR3.Seq + +n = c("X.reads_count", "ID", "VDJ.Frame", "Top.V.Gene", "Top.D.Gene", "Top.J.Gene", "CDR1.Seq", "CDR1.Length", "CDR2.Seq", "CDR2.Length", "CDR3.Seq", "CDR3.Length", "CDR3.Seq.DNA", "CDR3.Length.DNA", "Strand", "CDR3.Found.How", "Functionality", "AA.JUNCTION") + +n[!(n %in% names(blasted))] + +blasted = blasted[,c("X.reads_count", "ID", "VDJ.Frame", "Top.V.Gene", "Top.D.Gene", "Top.J.Gene", "CDR1.Seq", "CDR1.Length", "CDR2.Seq", "CDR2.Length", "CDR3.Seq", "CDR3.Length", "CDR3.Seq.DNA", "CDR3.Length.DNA", "Strand", "CDR3.Found.How", "AA.JUNCTION")] + +names(blasted) = c("frequency.count", "ID", "VDJ Frame", "Top V Gene", "Top D Gene", "Top J Gene", "CDR1 Seq", "CDR1 Length", "CDR2 Seq", "CDR2 Length", "CDR3 Seq", "CDR3 Length", "CDR3 Seq DNA", "CDR3 Length DNA", "Strand", "CDR3 Found How", "AA JUNCTION") + +#duplicate rows based on frequency.count +blasted = blasted[rep(seq_len(nrow(blasted)), blasted$frequency.count),] +blasted$ID = 1:nrow(blasted) + +blasted = blasted[,c("ID", "VDJ Frame", "Top V Gene", "Top D Gene", "Top J Gene", "CDR1 Seq", "CDR1 Length", "CDR2 Seq", "CDR2 Length", "CDR3 Seq", "CDR3 Length", "CDR3 Seq DNA", "CDR3 Length DNA", "Strand", "CDR3 Found How", "AA JUNCTION")] + +write.table(blasted, outfile, quote=F, sep="\t", row.names=F, col.names=T)