# HG changeset patch # User davidvanzessen # Date 1478009737 14400 # Node ID 012a738edf5acb20af56e92d7f5d99400ba664a1 # Parent 477e95b098fda88ee772b52c294a283705843c7f Uploaded diff -r 477e95b098fd -r 012a738edf5a gene_identification.py --- a/gene_identification.py Mon Oct 31 05:05:26 2016 -0400 +++ b/gene_identification.py Tue Nov 01 10:15:37 2016 -0400 @@ -47,10 +47,12 @@ #lambda/kappa reference sequence searchstrings = {"ca": "catccccgaccagccccaaggtcttcccgctgagcctctgcagcacccagccagatgggaacgtggtcatcgcctgcctgg", "cg": "ctccaccaagggcccatcggtcttccccctggcaccctcctccaagagcacctctgggggcacagcggcc", + "ce": "gcctccacacagagcccatccgtcttccccttgacccgctgctgcaaaaacattccctcc", "cm": "gggagtgcatccgccccaacc"} #new (shorter) cm sequence compiledregex = {"ca": [], "cg": [], + "ce": [], "cm": []} #lambda/kappa reference sequence variable nucleotides @@ -102,6 +104,8 @@ for i in range(0, len(searchstrings["cm"]) - chunklength, chunklength / 2): compiledregex["cm"].append((re.compile(searchstrings["cm"][i:i+chunklength]), False)) +for i in range(0, len(searchstrings["ce"]) - chunklength, chunklength / 2): + compiledregex["ce"].append((re.compile(searchstrings["ce"][i:i+chunklength]), False)) def removeAndReturnMaxIndex(x): #simplifies a list comprehension @@ -114,11 +118,11 @@ start_location = dict() hits = dict() alltotal = 0 -for key in compiledregex.keys(): #for ca/cg/cm +for key in compiledregex.keys(): #for ca/cg/cm/ce regularexpressions = compiledregex[key] #get the compiled regular expressions for ID in dic.keys()[0:]: #for every ID if ID not in hits.keys(): #ensure that the dictionairy that keeps track of the hits for every gene exists - hits[ID] = {"ca_hits": 0, "cg_hits": 0, "cm_hits": 0, "ca1": 0, "ca2": 0, "cg1": 0, "cg2": 0, "cg3": 0, "cg4": 0} + hits[ID] = {"ca_hits": 0, "cg_hits": 0, "cm_hits": 0, "ce_hits": 0, "ca1": 0, "ca2": 0, "cg1": 0, "cg2": 0, "cg3": 0, "cg4": 0} currentIDHits = hits[ID] seq = dic[ID] lastindex = 0 @@ -130,7 +134,7 @@ break regex, hasVar = regexp matches = regex.finditer(seq[lastindex:]) - for match in matches: #for every match with the current regex, only uses the first hit + for match in matches: #for every match with the current regex, only uses the first hit because of the break at the end of this loop lastindex += match.start() start[relativeStartLocation + start_zero] += 1 if hasVar: #if the regex has a variable nt in it @@ -144,7 +148,7 @@ currentIDHits["cg2"] += len([1 for x in cg2 if chunkstart <= x < chunkend and cg2[x] == seq[lastindex + x - chunkstart]]) currentIDHits["cg3"] += len([1 for x in cg3 if chunkstart <= x < chunkend and cg3[x] == seq[lastindex + x - chunkstart]]) currentIDHits["cg4"] += len([1 for x in cg4 if chunkstart <= x < chunkend and cg4[x] == seq[lastindex + x - chunkstart]]) - else: #key == "cm" #no variable regions in 'cm' + else: #key == "cm" #no variable regions in 'cm' or 'ce' pass break #this only breaks when there was a match with the regex, breaking means the 'else:' clause is skipped else: #only runs if there were no hits @@ -155,13 +159,10 @@ #start_location[ID + "_" + key] = str(start.index(max(start))) -chunksInCA = len(compiledregex["ca"]) -chunksInCG = len(compiledregex["cg"]) -chunksInCM = len(compiledregex["cm"]) -requiredChunkPercentage = 0.7 varsInCA = float(len(ca1.keys()) * 2) varsInCG = float(len(cg1.keys()) * 2) - 2 # -2 because the sliding window doesn't hit the first and last nt twice varsInCM = 0 +varsInCE = 0 @@ -183,17 +184,19 @@ possibleca = float(len(compiledregex["ca"])) possiblecg = float(len(compiledregex["cg"])) possiblecm = float(len(compiledregex["cm"])) + possiblece = float(len(compiledregex["ce"])) cahits = currentIDHits["ca_hits"] cghits = currentIDHits["cg_hits"] cmhits = currentIDHits["cm_hits"] - if cahits >= cghits and cahits >= cmhits: #its a ca gene + cehits = currentIDHits["ce_hits"] + if cahits >= cghits and cahits >= cmhits and cahits >= cehits: #its a ca gene ca1hits = currentIDHits["ca1"] ca2hits = currentIDHits["ca2"] if ca1hits >= ca2hits: o.write(ID + "\tIGA1\t" + str(int(ca1hits / varsInCA * 100)) + "\t" + str(int(cahits / possibleca * 100)) + "\t" + start_location[ID + "_ca"] + "\n") else: o.write(ID + "\tIGA2\t" + str(int(ca2hits / varsInCA * 100)) + "\t" + str(int(cahits / possibleca * 100)) + "\t" + start_location[ID + "_ca"] + "\n") - elif cghits >= cahits and cghits >= cmhits: #its a cg gene + elif cghits >= cahits and cghits >= cmhits and cghits >= cehits: #its a cg gene cg1hits = currentIDHits["cg1"] cg2hits = currentIDHits["cg2"] cg3hits = currentIDHits["cg3"] @@ -206,8 +209,11 @@ o.write(ID + "\tIGG3\t" + str(int(cg3hits / varsInCG * 100)) + "\t" + str(int(cghits / possiblecg * 100)) + "\t" + start_location[ID + "_cg"] + "\n") else: #cg4 gene o.write(ID + "\tIGG4\t" + str(int(cg4hits / varsInCG * 100)) + "\t" + str(int(cghits / possiblecg * 100)) + "\t" + start_location[ID + "_cg"] + "\n") - else: #its a cm gene - o.write(ID + "\tIGM\t100\t" + str(int(cmhits / possiblecm * 100)) + "\t" + start_location[ID + "_cg"] + "\n") + else: #its a cm or ce gene + if cmhits >= cehits: + o.write(ID + "\tIGM\t100\t" + str(int(cmhits / possiblecm * 100)) + "\t" + start_location[ID + "_cm"] + "\n") + else: + o.write(ID + "\tIGE\t100\t" + str(int(cehits / possiblece * 100)) + "\t" + start_location[ID + "_ce"] + "\n") seq_write_count += 1 print "Time: %i" % (int(time.time() * 1000) - starttime) diff -r 477e95b098fd -r 012a738edf5a merge_and_filter.r --- a/merge_and_filter.r Mon Oct 31 05:05:26 2016 -0400 +++ b/merge_and_filter.r Tue Nov 01 10:15:37 2016 -0400 @@ -141,44 +141,6 @@ result[is.na(result[,col]),] = 0 } -write.table(result, before.unique.file, sep="\t", quote=F,row.names=F,col.names=T) - -if(filter.unique != "no"){ - clmns = names(result) - - if(empty.region.filter == "leader"){ - result$unique.def = paste(result$FR1.IMGT.seq, result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) - } else if(empty.region.filter == "FR1"){ - result$unique.def = paste(result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) - } else if(empty.region.filter == "CDR1"){ - rresult$unique.def = paste(result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) - } else if(empty.region.filter == "FR2"){ - result$unique.def = paste(result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) - } - - if(grepl("_c", filter.unique)){ - result$unique.def = paste(result$unique.def, result$best_match) - } - - #fltr = result$unique.def %in% result.filtered$unique.def - - if(grepl("keep", filter.unique)){ - result$unique.def = paste(result$unique.def, result$best_match) #keep the unique sequences that are in multiple classes - result = result[!duplicated(result$unique.def),] - } else { - result = result[duplicated(result$unique.def) | duplicated(result$unique.def, fromLast=T),] - result$unique.def = paste(result$unique.def, result$best_match) #keep the unique sequences that are in multiple classes - result = result[!duplicated(result$unique.def),] - } - - #result = result[,clmns] - - #write.table(inputdata.removed, "unique_removed.csv", sep=",",quote=F,row.names=F,col.names=T) -} - -filtering.steps = rbind(filtering.steps, c("After filter unique sequences", nrow(result))) - - splt = strsplit(class.filter, "_")[[1]] chunk_hit_threshold = as.numeric(splt[1]) nt_hit_threshold = as.numeric(splt[2]) @@ -198,10 +160,39 @@ result$best_match = "all" } -if(any(higher_than, na.rm=T)){ - #summ = summ[higher_than,] +write.table(result, before.unique.file, sep="\t", quote=F,row.names=F,col.names=T) + +if(filter.unique != "no"){ + clmns = names(result) + + if(empty.region.filter == "leader"){ + result$unique.def = paste(result$FR1.IMGT.seq, result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) + } else if(empty.region.filter == "FR1"){ + result$unique.def = paste(result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) + } else if(empty.region.filter == "CDR1"){ + result$unique.def = paste(result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) + } else if(empty.region.filter == "FR2"){ + result$unique.def = paste(result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) + } + + if(grepl("_c", filter.unique)){ + result$unique.def = paste(result$unique.def, result$best_match) + } + + if(grepl("keep", filter.unique)){ + result$unique.def = paste(result$unique.def, result$best_match) #keep the unique sequences that are in multiple classes + result = result[!duplicated(result$unique.def),] + } else { + result = result[duplicated(result$unique.def) | duplicated(result$unique.def, fromLast=T),] + result$unique.def = paste(result$unique.def, result$best_match) #keep the unique sequences that are in multiple classes + result = result[!duplicated(result$unique.def),] + } } +write.table(result, gsub("before_unique_filter.txt", "after_unique_filter.txt", before.unique.file), sep="\t", quote=F,row.names=F,col.names=T) + +filtering.steps = rbind(filtering.steps, c("After filter unique sequences", nrow(result))) + if(nrow(summ) == 0){ stop("No data remaining after filter") } diff -r 477e95b098fd -r 012a738edf5a pattern_plots.r --- a/pattern_plots.r Mon Oct 31 05:05:26 2016 -0400 +++ b/pattern_plots.r Tue Nov 01 10:15:37 2016 -0400 @@ -22,7 +22,7 @@ -classes = c("IGA", "IGA1", "IGA2", "IGG", "IGG1", "IGG2", "IGG3", "IGG4", "IGM") +classes = c("IGA", "IGA1", "IGA2", "IGG", "IGG1", "IGG2", "IGG3", "IGG4", "IGM", "IGE") xyz = c("x", "y", "z") new.names = c(paste(rep(classes, each=3), xyz, sep="."), paste("un", xyz, sep="."), paste("all", xyz, sep=".")) @@ -45,7 +45,7 @@ write.table(data1, plot1.txt, quote=F, sep="\t", na="", row.names=F, col.names=T) p = ggplot(data1, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of mutations") + guides(fill=guide_legend(title=NULL)) -p = p + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=13, colour="black")) + scale_fill_manual(values=c("RGYW.WRCY" = "white", "TW.WA" = "blue4")) +p = p + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("RGYW.WRCY" = "white", "TW.WA" = "blue4")) #p = p + scale_colour_manual(values=c("RGYW.WRCY" = "black", "TW.WA" = "blue4")) png(filename=plot1.png, width=480, height=300) print(p) @@ -79,7 +79,7 @@ write.table(data2, plot2.txt, quote=F, sep="\t", na="", row.names=F, col.names=T) p = ggplot(data2, aes(x=Class, y=value, fill=Type)) + geom_bar(position="fill", stat="identity", colour = "black") + scale_y_continuous(labels=percent_format()) + guides(fill=guide_legend(title=NULL)) + ylab("% of mutations") -p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black")) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white")) +p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white")) #p = p + scale_colour_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "black")) png(filename=plot2.png, width=480, height=300) print(p) @@ -115,7 +115,7 @@ write.table(data3, plot3.txt, quote=F, sep="\t", na="", row.names=F, col.names=T) p = ggplot(data3, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of nucleotides") + guides(fill=guide_legend(title=NULL)) -p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black")) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white")) +p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white")) #p = p + scale_colour_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "black")) png(filename=plot3.png, width=480, height=300) print(p) diff -r 477e95b098fd -r 012a738edf5a shm_csr.r --- a/shm_csr.r Mon Oct 31 05:05:26 2016 -0400 +++ b/shm_csr.r Tue Nov 01 10:15:37 2016 -0400 @@ -280,15 +280,13 @@ transition2 = merge(transition2, base.order, by.x="variable", by.y="base") transition2[is.na(transition2$value),]$value = 0 - - print(transition2) if(any(transition2$value != 0)){ #having rows of data but a transition table filled with 0 is bad print("Plotting stacked transition") png(filename=paste("transitions_stacked_", name, ".png", sep="")) p = ggplot(transition2, aes(factor(reorder(id, order.x)), y=value, fill=factor(reorder(variable, order.y)))) + geom_bar(position="fill", stat="identity", colour="black") #stacked bar p = p + xlab("From base") + ylab("To base") + ggtitle("Mutations frequency from base to base") + guides(fill=guide_legend(title=NULL)) - p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black")) + scale_fill_manual(values=c("A" = "blue4", "G" = "lightblue1", "C" = "olivedrab3", "T" = "olivedrab4")) + p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) + scale_fill_manual(values=c("A" = "blue4", "G" = "lightblue1", "C" = "olivedrab3", "T" = "olivedrab4")) #p = p + scale_colour_manual(values=c("A" = "black", "G" = "black", "C" = "black", "T" = "black")) print(p) dev.off() @@ -374,7 +372,7 @@ pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=Gene)) pc = pc + geom_bar(width = 1, stat = "identity") + scale_fill_manual(labels=genesForPlot$label, values=c("IGA1" = "lightblue1", "IGA2" = "blue4")) pc = pc + coord_polar(theta="y") + scale_y_continuous(breaks=NULL) - pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black")) + pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"), axis.title=element_blank(), axis.text=element_blank(), axis.ticks=element_blank()) pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IGA subclasses", "( n =", sum(genesForPlot$Freq), ")")) write.table(genesForPlot, "IGA_pie.txt", sep="\t",quote=F,row.names=F,col.names=T) @@ -395,7 +393,7 @@ pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=Gene)) pc = pc + geom_bar(width = 1, stat = "identity") + scale_fill_manual(labels=genesForPlot$label, values=c("IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred")) pc = pc + coord_polar(theta="y") + scale_y_continuous(breaks=NULL) - pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black")) + pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"), axis.title=element_blank(), axis.text=element_blank(), axis.ticks=element_blank()) pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IGG subclasses", "( n =", sum(genesForPlot$Freq), ")")) write.table(genesForPlot, "IGG_pie.txt", sep="\t",quote=F,row.names=F,col.names=T) @@ -415,7 +413,7 @@ p = ggplot(dat.clss, aes(best_match, percentage_mutations)) p = p + geom_point(aes(colour=best_match), position="jitter") + geom_boxplot(aes(middle=mean(percentage_mutations)), alpha=0.1, outlier.shape = NA) -p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black")) +p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) p = p + scale_fill_manual(values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "all" = "blue4")) p = p + scale_colour_manual(values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "all" = "blue4")) @@ -442,7 +440,7 @@ frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2) p = ggplot(frequency_bins_data, aes(frequency_bins, frequency)) -p = p + geom_bar(aes(fill=best_match_class), stat="identity", position="dodge") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black")) +p = p + geom_bar(aes(fill=best_match_class), stat="identity", position="dodge") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) p = p + xlab("Frequency ranges") + ylab("Frequency") + ggtitle("Mutation Frequencies by class") + scale_fill_manual(values=c("IGA" = "blue4", "IGG" = "olivedrab3", "IGM" = "black", "all" = "blue4")) png(filename="frequency_ranges.png") diff -r 477e95b098fd -r 012a738edf5a shm_csr.xml --- a/shm_csr.xml Mon Oct 31 05:05:26 2016 -0400 +++ b/shm_csr.xml Tue Nov 01 10:15:37 2016 -0400 @@ -1,7 +1,7 @@ - wrapper.sh $in_file custom $out_file $out_file.files_path ${in_file.name} "-" $functionality $unique $naive_output_ca $naive_output_cg $naive_output_cm $filter_uniques $class_filter $empty_region_filter + wrapper.sh $in_file custom $out_file $out_file.files_path ${in_file.name} "-" $functionality $unique $naive_output_ca $naive_output_cg $naive_output_cm $filter_uniques $class_filter $empty_region_filter $fast @@ -33,7 +33,7 @@ - + @@ -46,6 +46,14 @@ + + + + + + + + diff -r 477e95b098fd -r 012a738edf5a wrapper.sh --- a/wrapper.sh Mon Oct 31 05:05:26 2016 -0400 +++ b/wrapper.sh Tue Nov 01 10:15:37 2016 -0400 @@ -16,6 +16,7 @@ filter_unique=${12} class_filter=${13} empty_region_filter=${14} +fast=${15} mkdir $outdir tar -xzf $dir/style.tar.gz -C $outdir @@ -62,101 +63,112 @@ Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} ${empty_region_filter} 2>&1 -echo "---------------- creating new IMGT zips ----------------" -echo "---------------- creating new IMGT zips ----------------
" >> $log +if [[ "$fast" == "no" ]] ; then -mkdir $outdir/new_IMGT + echo "---------------- creating new IMGT zips ----------------" + echo "---------------- creating new IMGT zips ----------------
" >> $log + + mkdir $outdir/new_IMGT -cat `find $PWD/files/ -name "1_*"` > "$outdir/new_IMGT/1_Summary.txt" -cat `find $PWD/files/ -name "2_*"` > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt" -cat `find $PWD/files/ -name "3_*"` > "$outdir/new_IMGT/3_Nt-sequences.txt" -cat `find $PWD/files/ -name "4_*"` > "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt" -cat `find $PWD/files/ -name "5_*"` > "$outdir/new_IMGT/5_AA-sequences.txt" -cat `find $PWD/files/ -name "6_*"` > "$outdir/new_IMGT/6_Junction.txt" -cat `find $PWD/files/ -name "7_*"` > "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt" -cat `find $PWD/files/ -name "8_*"` > "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt" -cat `find $PWD/files/ -name "9_*"` > "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt" -cat `find $PWD/files/ -name "10_*"` > "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt" + cat `find $PWD/files/ -name "1_*"` > "$outdir/new_IMGT/1_Summary.txt" + cat `find $PWD/files/ -name "2_*"` > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt" + cat `find $PWD/files/ -name "3_*"` > "$outdir/new_IMGT/3_Nt-sequences.txt" + cat `find $PWD/files/ -name "4_*"` > "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt" + cat `find $PWD/files/ -name "5_*"` > "$outdir/new_IMGT/5_AA-sequences.txt" + cat `find $PWD/files/ -name "6_*"` > "$outdir/new_IMGT/6_Junction.txt" + cat `find $PWD/files/ -name "7_*"` > "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt" + cat `find $PWD/files/ -name "8_*"` > "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt" + cat `find $PWD/files/ -name "9_*"` > "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt" + cat `find $PWD/files/ -name "10_*"` > "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt" -mkdir $outdir/new_IMGT_IGA -cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA + mkdir $outdir/new_IMGT_IGA + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA -mkdir $outdir/new_IMGT_IGA1 -cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA1 + mkdir $outdir/new_IMGT_IGA1 + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA1 -mkdir $outdir/new_IMGT_IGA2 -cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA2 + mkdir $outdir/new_IMGT_IGA2 + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA2 -mkdir $outdir/new_IMGT_IGG -cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG + mkdir $outdir/new_IMGT_IGG + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG -mkdir $outdir/new_IMGT_IGG1 -cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG1 + mkdir $outdir/new_IMGT_IGG1 + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG1 -mkdir $outdir/new_IMGT_IGG2 -cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG2 + mkdir $outdir/new_IMGT_IGG2 + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG2 -mkdir $outdir/new_IMGT_IGG3 -cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG3 + mkdir $outdir/new_IMGT_IGG3 + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG3 -mkdir $outdir/new_IMGT_IGG4 -cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG4 + mkdir $outdir/new_IMGT_IGG4 + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG4 + + mkdir $outdir/new_IMGT_IGM + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGM -mkdir $outdir/new_IMGT_IGM -cp $outdir/new_IMGT/* $outdir/new_IMGT_IGM + mkdir $outdir/new_IMGT_IGE + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGE -Rscript $dir/new_imgt.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1 + Rscript $dir/new_imgt.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1 + + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA/ $outdir/merged.txt "IGA" 2>&1 + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA1/ $outdir/merged.txt "IGA1" 2>&1 + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA2/ $outdir/merged.txt "IGA2" 2>&1 -Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA/ $outdir/merged.txt "IGA" 2>&1 -Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA1/ $outdir/merged.txt "IGA1" 2>&1 -Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA2/ $outdir/merged.txt "IGA2" 2>&1 + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG/ $outdir/merged.txt "IGG" 2>&1 + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG1/ $outdir/merged.txt "IGG1" 2>&1 + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG2/ $outdir/merged.txt "IGG2" 2>&1 + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG3/ $outdir/merged.txt "IGG3" 2>&1 + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG4/ $outdir/merged.txt "IGG4" 2>&1 -Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG/ $outdir/merged.txt "IGG" 2>&1 -Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG1/ $outdir/merged.txt "IGG1" 2>&1 -Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG2/ $outdir/merged.txt "IGG2" 2>&1 -Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG3/ $outdir/merged.txt "IGG3" 2>&1 -Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG4/ $outdir/merged.txt "IGG4" 2>&1 + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGM/ $outdir/merged.txt "IGM" 2>&1 -Rscript $dir/new_imgt.r $outdir/new_IMGT_IGM/ $outdir/merged.txt "IGM" 2>&1 + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGE/ $outdir/merged.txt "IGE" 2>&1 -tmp="$PWD" -cd $outdir/new_IMGT/ #tar weirdness... -tar -cJf ../new_IMGT.txz * + tmp="$PWD" + cd $outdir/new_IMGT/ #tar weirdness... + tar -cJf ../new_IMGT.txz * -cd $outdir/new_IMGT_IGA/ -tar -cJf ../new_IMGT_IGA.txz * + cd $outdir/new_IMGT_IGA/ + tar -cJf ../new_IMGT_IGA.txz * -cd $outdir/new_IMGT_IGA1/ -tar -cJf ../new_IMGT_IGA1.txz * + cd $outdir/new_IMGT_IGA1/ + tar -cJf ../new_IMGT_IGA1.txz * -cd $outdir/new_IMGT_IGA2/ -tar -cJf ../new_IMGT_IGA2.txz * + cd $outdir/new_IMGT_IGA2/ + tar -cJf ../new_IMGT_IGA2.txz * -cd $outdir/new_IMGT_IGG/ -tar -cJf ../new_IMGT_IGG.txz * + cd $outdir/new_IMGT_IGG/ + tar -cJf ../new_IMGT_IGG.txz * -cd $outdir/new_IMGT_IGG1/ -tar -cJf ../new_IMGT_IGG1.txz * + cd $outdir/new_IMGT_IGG1/ + tar -cJf ../new_IMGT_IGG1.txz * -cd $outdir/new_IMGT_IGG2/ -tar -cJf ../new_IMGT_IGG2.txz * + cd $outdir/new_IMGT_IGG2/ + tar -cJf ../new_IMGT_IGG2.txz * + + cd $outdir/new_IMGT_IGG3/ + tar -cJf ../new_IMGT_IGG3.txz * -cd $outdir/new_IMGT_IGG3/ -tar -cJf ../new_IMGT_IGG3.txz * + cd $outdir/new_IMGT_IGG4/ + tar -cJf ../new_IMGT_IGG4.txz * + + cd $outdir/new_IMGT_IGM/ + tar -cJf ../new_IMGT_IGM.txz * -cd $outdir/new_IMGT_IGG4/ -tar -cJf ../new_IMGT_IGG4.txz * + cd $outdir/new_IMGT_IGE/ + tar -cJf ../new_IMGT_IGE.txz * -cd $outdir/new_IMGT_IGM/ -tar -cJf ../new_IMGT_IGM.txz * - -cd $tmp + cd $tmp +fi echo "---------------- shm_csr.r ----------------" echo "---------------- shm_csr.r ----------------
" >> $log -classes="IGA,IGA1,IGA2,IGG,IGG1,IGG2,IGG3,IGG4,IGM,unmatched" +classes="IGA,IGA1,IGA2,IGG,IGG1,IGG2,IGG3,IGG4,IGM,IGE,unmatched" echo "R mutation analysis" Rscript $dir/shm_csr.r $outdir/merged.txt $classes $outdir ${empty_region_filter} 2>&1 @@ -168,13 +180,13 @@ echo "---------------- aa_histogram.r ----------------" echo "---------------- aa_histogram.r ----------------
" >> $log -Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "IGA,IGG,IGM" $outdir/ 2>&1 +Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "IGA,IGG,IGM,IGE" $outdir/ 2>&1 if [ -e "$outdir/aa_histogram_.png" ]; then mv $outdir/aa_histogram_.png $outdir/aa_histogram.png mv $outdir/aa_histogram_.txt $outdir/aa_histogram.txt fi -genes=(IGA IGA1 IGA2 IGG IGG1 IGG2 IGG3 IGG4 IGM) +genes=(IGA IGA1 IGA2 IGG IGG1 IGG2 IGG3 IGG4 IGM IGE) funcs=(sum mean median) funcs=(sum) @@ -247,14 +259,14 @@ tmp=`cat $outdir/unmatched_${func}_n.txt` echo "unmatched (N = ${unmatched_count})" >> $output - while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz + while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz cex cey cez unx uny unz allx ally allz do if [ "$name" == "FR R/S (ratio)" ] || [ "$name" == "CDR R/S (ratio)" ] ; then #meh - echo "$name${cax}/${cay} (${caz})${ca1x}/${ca1y} (${ca1z})${ca2x}/${ca2y} (${ca2z})${cgx}/${cgy} (${cgz})${cg1x}/${cg1y} (${cg1z})${cg2x}/${cg2y} (${cg2z})${cg3x}/${cg3y} (${cg3z})${cg4x}/${cg4y} (${cg4z})${cmx}/${cmy} (${cmz})${allx}/${ally} (${allz})${unx}/${uny} (${unz})" >> $output + echo "$name${cax}/${cay} (${caz})${ca1x}/${ca1y} (${ca1z})${ca2x}/${ca2y} (${ca2z})${cgx}/${cgy} (${cgz})${cg1x}/${cg1y} (${cg1z})${cg2x}/${cg2y} (${cg2z})${cg3x}/${cg3y} (${cg3z})${cg4x}/${cg4y} (${cg4z})${cmx}/${cmy} (${cmz})${cex}/${cey} (${cez})${allx}/${ally} (${allz})${unx}/${uny} (${unz})" >> $output elif [ "$name" == "Median of Number of Mutations (%)" ] ; then - echo "$name${caz}%${ca1z}%${ca2z}%${cgz}%${cg1z}%${cg2z}%${cg3z}%${cg4z}%${cmz}%${allz}%${unz}%" >> $output + echo "$name${caz}%${ca1z}%${ca2z}%${cgz}%${cg1z}%${cg2z}%${cg3z}%${cg4z}%${cmz}%${cez}%${allz}%${unz}%" >> $output else - echo "$name${cax}/${cay} (${caz}%)${ca1x}/${ca1y} (${ca1z}%)${ca2x}/${ca2y} (${ca2z}%)${cgx}/${cgy} (${cgz}%)${cg1x}/${cg1y} (${cg1z}%)${cg2x}/${cg2y} (${cg2z}%)${cg3x}/${cg3y} (${cg3z}%)${cg4x}/${cg4y} (${cg4z}%)${cmx}/${cmy} (${cmz}%)${allx}/${ally} (${allz}%)${unx}/${uny} (${unz}%)" >> $output + echo "$name${cax}/${cay} (${caz}%)${ca1x}/${ca1y} (${ca1z}%)${ca2x}/${ca2y} (${ca2z}%)${cgx}/${cgy} (${cgz}%)${cg1x}/${cg1y} (${cg1z}%)${cg2x}/${cg2y} (${cg2z}%)${cg3x}/${cg3y} (${cg3z}%)${cg4x}/${cg4y} (${cg4z}%)${cmx}/${cmy} (${cmz}%)${cex}/${cey} (${cez}%)${allx}/${ally} (${allz}%)${unx}/${uny} (${unz}%)" >> $output fi done < $outdir/data_${func}.txt @@ -262,7 +274,7 @@ tmp=`cat $outdir/all_${func}_n.txt` echo "all (N = $tmp)" >> $output - while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz + while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz cex cey cez unx uny unz allx ally allz do if [ "$name" == "FR R/S (ratio)" ] || [ "$name" == "CDR R/S (ratio)" ] ; then #meh echo "$name${allx}/${ally}" >> $output @@ -381,46 +393,48 @@ echo "" >> $output #CSR tab end -echo "---------------- change-o MakeDB ----------------" +if [[ "$fast" == "no" ]] ; then + + echo "---------------- change-o MakeDB ----------------" -mkdir $outdir/change_o + mkdir $outdir/change_o -tmp="$PWD" + tmp="$PWD" -cd $outdir/change_o + cd $outdir/change_o -bash $dir/change_o/makedb.sh $outdir/new_IMGT.txz false false false $outdir/change_o/change-o-db.txt -bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-defined_clones-summary.txt + bash $dir/change_o/makedb.sh $outdir/new_IMGT.txz false false false $outdir/change_o/change-o-db.txt + bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-defined_clones-summary.txt -Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/merged.txt "all" "Sequence.ID,best_match" "SEQUENCE_ID" "Sequence.ID" $outdir/change_o/change-o-db-defined_clones.txt 2>&1 + Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/merged.txt "all" "Sequence.ID,best_match" "SEQUENCE_ID" "Sequence.ID" $outdir/change_o/change-o-db-defined_clones.txt 2>&1 -echo "Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/$outdir/merged.txt 'all' 'Sequence.ID,best_match' 'Sequence.ID' 'Sequence.ID' '\t' $outdir/change_o/change-o-db-defined_clones.txt 2>&1" + echo "Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/$outdir/merged.txt 'all' 'Sequence.ID,best_match' 'Sequence.ID' 'Sequence.ID' '\t' $outdir/change_o/change-o-db-defined_clones.txt 2>&1" -if [[ $(wc -l < $outdir/new_IMGT_IGA/1_Summary.txt) -gt "1" ]]; then - bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGA.txz false false false $outdir/change_o/change-o-db-IGA.txt - bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGA.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGA.txt $outdir/change_o/change-o-defined_clones-summary-IGA.txt -else - echo "No IGA sequences" > "$outdir/change_o/change-o-db-defined_clones-IGA.txt" - echo "No IGA sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGA.txt" -fi + if [[ $(wc -l < $outdir/new_IMGT_IGA/1_Summary.txt) -gt "1" ]]; then + bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGA.txz false false false $outdir/change_o/change-o-db-IGA.txt + bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGA.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGA.txt $outdir/change_o/change-o-defined_clones-summary-IGA.txt + else + echo "No IGA sequences" > "$outdir/change_o/change-o-db-defined_clones-IGA.txt" + echo "No IGA sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGA.txt" + fi -if [[ $(wc -l < $outdir/new_IMGT_IGG/1_Summary.txt) -gt "1" ]]; then - bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGG.txz false false false $outdir/change_o/change-o-db-IGG.txt - bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGG.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGG.txt $outdir/change_o/change-o-defined_clones-summary-IGG.txt -else - echo "No IGG sequences" > "$outdir/change_o/change-o-db-defined_clones-IGG.txt" - echo "No IGG sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGG.txt" -fi + if [[ $(wc -l < $outdir/new_IMGT_IGG/1_Summary.txt) -gt "1" ]]; then + bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGG.txz false false false $outdir/change_o/change-o-db-IGG.txt + bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGG.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGG.txt $outdir/change_o/change-o-defined_clones-summary-IGG.txt + else + echo "No IGG sequences" > "$outdir/change_o/change-o-db-defined_clones-IGG.txt" + echo "No IGG sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGG.txt" + fi -if [[ $(wc -l < $outdir/new_IMGT_IGM/1_Summary.txt) -gt "1" ]]; then - bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGM.txz false false false $outdir/change_o/change-o-db-IGM.txt - bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGM.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGM.txt $outdir/change_o/change-o-defined_clones-summary-IGM.txt -else - echo "No IGM sequences" > "$outdir/change_o/change-o-db-defined_clones-IGM.txt" - echo "No IGM sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGM.txt" -fi + if [[ $(wc -l < $outdir/new_IMGT_IGM/1_Summary.txt) -gt "1" ]]; then + bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGM.txz false false false $outdir/change_o/change-o-db-IGM.txt + bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGM.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGM.txt $outdir/change_o/change-o-defined_clones-summary-IGM.txt + else + echo "No IGM sequences" > "$outdir/change_o/change-o-db-defined_clones-IGM.txt" + echo "No IGM sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGM.txt" + fi -PWD="$tmp" + PWD="$tmp" echo "
" >> $output #clonality tab @@ -471,6 +485,8 @@ echo "
" >> $output #clonality tab end +fi + echo "
" >> $output echo "" >> $output @@ -553,54 +569,59 @@ echo "" >> $output -echo "---------------- baseline ----------------" -echo "---------------- baseline ----------------
" >> $log -tmp="$PWD" + +if [[ "$fast" == "no" ]] ; then -mkdir $outdir/baseline + echo "---------------- baseline ----------------" + echo "---------------- baseline ----------------
" >> $log + tmp="$PWD" + + mkdir $outdir/baseline -mkdir $outdir/baseline/IGA_IGG_IGM -if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then - cd $outdir/baseline/IGA_IGG_IGM - bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "IGA_IGG_IGM" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt" -else - echo "No sequences" > "$outdir/baseline.txt" -fi + mkdir $outdir/baseline/IGA_IGG_IGM + if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then + cd $outdir/baseline/IGA_IGG_IGM + bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "IGA_IGG_IGM" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt" + else + echo "No sequences" > "$outdir/baseline.txt" + fi -mkdir $outdir/baseline/IGA -if [[ $(wc -l < $outdir/new_IMGT_IGA/1_Summary.txt) -gt "1" ]]; then - cd $outdir/baseline/IGA - bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_IGA.txz "IGA" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_IGA.pdf" "Sequence.ID" "$outdir/baseline_IGA.txt" -else - echo "No IGA sequences" > "$outdir/baseline_IGA.txt" -fi + mkdir $outdir/baseline/IGA + if [[ $(wc -l < $outdir/new_IMGT_IGA/1_Summary.txt) -gt "1" ]]; then + cd $outdir/baseline/IGA + bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_IGA.txz "IGA" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_IGA.pdf" "Sequence.ID" "$outdir/baseline_IGA.txt" + else + echo "No IGA sequences" > "$outdir/baseline_IGA.txt" + fi -mkdir $outdir/baseline/IGG -if [[ $(wc -l < $outdir/new_IMGT_IGG/1_Summary.txt) -gt "1" ]]; then - cd $outdir/baseline/IGG - bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_IGG.txz "cg" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_IGG.pdf" "Sequence.ID" "$outdir/baseline_IGG.txt" -else - echo "No IGG sequences" > "$outdir/baseline_IGG.txt" -fi + mkdir $outdir/baseline/IGG + if [[ $(wc -l < $outdir/new_IMGT_IGG/1_Summary.txt) -gt "1" ]]; then + cd $outdir/baseline/IGG + bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_IGG.txz "cg" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_IGG.pdf" "Sequence.ID" "$outdir/baseline_IGG.txt" + else + echo "No IGG sequences" > "$outdir/baseline_IGG.txt" + fi -mkdir $outdir/baseline/IGM -if [[ $(wc -l < $outdir/new_IMGT_IGM/1_Summary.txt) -gt "1" ]]; then - cd $outdir/baseline/IGM - bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_IGM.txz "IGM" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_IGM.pdf" "Sequence.ID" "$outdir/baseline_IGM.txt" -else - echo "No IGM sequences" > "$outdir/baseline_IGM.txt" + mkdir $outdir/baseline/IGM + if [[ $(wc -l < $outdir/new_IMGT_IGM/1_Summary.txt) -gt "1" ]]; then + cd $outdir/baseline/IGM + bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_IGM.txz "IGM" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_IGM.pdf" "Sequence.ID" "$outdir/baseline_IGM.txt" + else + echo "No IGM sequences" > "$outdir/baseline_IGM.txt" + fi + + cd $tmp + + echo "Cleaning up *.RData files" + find $outdir/baseline -name "*.RData" -type f -delete + fi -cd $tmp - -echo "Cleaning up *.RData files" -find $outdir/baseline -name "*.RData" -type f -delete - echo "---------------- naive_output.r ----------------" echo "---------------- naive_output.r ----------------
" >> $log -if [[ "$naive_output" != "None" ]] +if [[ "$naive_output" == "yes" ]] then cp $outdir/new_IMGT_IGA.txz ${naive_output_ca} cp $outdir/new_IMGT_IGG.txz ${naive_output_cg}