Mercurial > repos > davidvanzessen > shm_csr
changeset 5:012a738edf5a draft
Uploaded
author | davidvanzessen |
---|---|
date | Tue, 01 Nov 2016 10:15:37 -0400 |
parents | 477e95b098fd |
children | 2ddb9a21f635 |
files | gene_identification.py merge_and_filter.r pattern_plots.r shm_csr.r shm_csr.xml wrapper.sh |
diffstat | 6 files changed, 228 insertions(+), 204 deletions(-) [+] |
line wrap: on
line diff
--- a/gene_identification.py Mon Oct 31 05:05:26 2016 -0400 +++ b/gene_identification.py Tue Nov 01 10:15:37 2016 -0400 @@ -47,10 +47,12 @@ #lambda/kappa reference sequence searchstrings = {"ca": "catccccgaccagccccaaggtcttcccgctgagcctctgcagcacccagccagatgggaacgtggtcatcgcctgcctgg", "cg": "ctccaccaagggcccatcggtcttccccctggcaccctcctccaagagcacctctgggggcacagcggcc", + "ce": "gcctccacacagagcccatccgtcttccccttgacccgctgctgcaaaaacattccctcc", "cm": "gggagtgcatccgccccaacc"} #new (shorter) cm sequence compiledregex = {"ca": [], "cg": [], + "ce": [], "cm": []} #lambda/kappa reference sequence variable nucleotides @@ -102,6 +104,8 @@ for i in range(0, len(searchstrings["cm"]) - chunklength, chunklength / 2): compiledregex["cm"].append((re.compile(searchstrings["cm"][i:i+chunklength]), False)) +for i in range(0, len(searchstrings["ce"]) - chunklength, chunklength / 2): + compiledregex["ce"].append((re.compile(searchstrings["ce"][i:i+chunklength]), False)) def removeAndReturnMaxIndex(x): #simplifies a list comprehension @@ -114,11 +118,11 @@ start_location = dict() hits = dict() alltotal = 0 -for key in compiledregex.keys(): #for ca/cg/cm +for key in compiledregex.keys(): #for ca/cg/cm/ce regularexpressions = compiledregex[key] #get the compiled regular expressions for ID in dic.keys()[0:]: #for every ID if ID not in hits.keys(): #ensure that the dictionairy that keeps track of the hits for every gene exists - hits[ID] = {"ca_hits": 0, "cg_hits": 0, "cm_hits": 0, "ca1": 0, "ca2": 0, "cg1": 0, "cg2": 0, "cg3": 0, "cg4": 0} + hits[ID] = {"ca_hits": 0, "cg_hits": 0, "cm_hits": 0, "ce_hits": 0, "ca1": 0, "ca2": 0, "cg1": 0, "cg2": 0, "cg3": 0, "cg4": 0} currentIDHits = hits[ID] seq = dic[ID] lastindex = 0 @@ -130,7 +134,7 @@ break regex, hasVar = regexp matches = regex.finditer(seq[lastindex:]) - for match in matches: #for every match with the current regex, only uses the first hit + for match in matches: #for every match with the current regex, only uses the first hit because of the break at the end of this loop lastindex += match.start() start[relativeStartLocation + start_zero] += 1 if hasVar: #if the regex has a variable nt in it @@ -144,7 +148,7 @@ currentIDHits["cg2"] += len([1 for x in cg2 if chunkstart <= x < chunkend and cg2[x] == seq[lastindex + x - chunkstart]]) currentIDHits["cg3"] += len([1 for x in cg3 if chunkstart <= x < chunkend and cg3[x] == seq[lastindex + x - chunkstart]]) currentIDHits["cg4"] += len([1 for x in cg4 if chunkstart <= x < chunkend and cg4[x] == seq[lastindex + x - chunkstart]]) - else: #key == "cm" #no variable regions in 'cm' + else: #key == "cm" #no variable regions in 'cm' or 'ce' pass break #this only breaks when there was a match with the regex, breaking means the 'else:' clause is skipped else: #only runs if there were no hits @@ -155,13 +159,10 @@ #start_location[ID + "_" + key] = str(start.index(max(start))) -chunksInCA = len(compiledregex["ca"]) -chunksInCG = len(compiledregex["cg"]) -chunksInCM = len(compiledregex["cm"]) -requiredChunkPercentage = 0.7 varsInCA = float(len(ca1.keys()) * 2) varsInCG = float(len(cg1.keys()) * 2) - 2 # -2 because the sliding window doesn't hit the first and last nt twice varsInCM = 0 +varsInCE = 0 @@ -183,17 +184,19 @@ possibleca = float(len(compiledregex["ca"])) possiblecg = float(len(compiledregex["cg"])) possiblecm = float(len(compiledregex["cm"])) + possiblece = float(len(compiledregex["ce"])) cahits = currentIDHits["ca_hits"] cghits = currentIDHits["cg_hits"] cmhits = currentIDHits["cm_hits"] - if cahits >= cghits and cahits >= cmhits: #its a ca gene + cehits = currentIDHits["ce_hits"] + if cahits >= cghits and cahits >= cmhits and cahits >= cehits: #its a ca gene ca1hits = currentIDHits["ca1"] ca2hits = currentIDHits["ca2"] if ca1hits >= ca2hits: o.write(ID + "\tIGA1\t" + str(int(ca1hits / varsInCA * 100)) + "\t" + str(int(cahits / possibleca * 100)) + "\t" + start_location[ID + "_ca"] + "\n") else: o.write(ID + "\tIGA2\t" + str(int(ca2hits / varsInCA * 100)) + "\t" + str(int(cahits / possibleca * 100)) + "\t" + start_location[ID + "_ca"] + "\n") - elif cghits >= cahits and cghits >= cmhits: #its a cg gene + elif cghits >= cahits and cghits >= cmhits and cghits >= cehits: #its a cg gene cg1hits = currentIDHits["cg1"] cg2hits = currentIDHits["cg2"] cg3hits = currentIDHits["cg3"] @@ -206,8 +209,11 @@ o.write(ID + "\tIGG3\t" + str(int(cg3hits / varsInCG * 100)) + "\t" + str(int(cghits / possiblecg * 100)) + "\t" + start_location[ID + "_cg"] + "\n") else: #cg4 gene o.write(ID + "\tIGG4\t" + str(int(cg4hits / varsInCG * 100)) + "\t" + str(int(cghits / possiblecg * 100)) + "\t" + start_location[ID + "_cg"] + "\n") - else: #its a cm gene - o.write(ID + "\tIGM\t100\t" + str(int(cmhits / possiblecm * 100)) + "\t" + start_location[ID + "_cg"] + "\n") + else: #its a cm or ce gene + if cmhits >= cehits: + o.write(ID + "\tIGM\t100\t" + str(int(cmhits / possiblecm * 100)) + "\t" + start_location[ID + "_cm"] + "\n") + else: + o.write(ID + "\tIGE\t100\t" + str(int(cehits / possiblece * 100)) + "\t" + start_location[ID + "_ce"] + "\n") seq_write_count += 1 print "Time: %i" % (int(time.time() * 1000) - starttime)
--- a/merge_and_filter.r Mon Oct 31 05:05:26 2016 -0400 +++ b/merge_and_filter.r Tue Nov 01 10:15:37 2016 -0400 @@ -141,44 +141,6 @@ result[is.na(result[,col]),] = 0 } -write.table(result, before.unique.file, sep="\t", quote=F,row.names=F,col.names=T) - -if(filter.unique != "no"){ - clmns = names(result) - - if(empty.region.filter == "leader"){ - result$unique.def = paste(result$FR1.IMGT.seq, result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) - } else if(empty.region.filter == "FR1"){ - result$unique.def = paste(result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) - } else if(empty.region.filter == "CDR1"){ - rresult$unique.def = paste(result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) - } else if(empty.region.filter == "FR2"){ - result$unique.def = paste(result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) - } - - if(grepl("_c", filter.unique)){ - result$unique.def = paste(result$unique.def, result$best_match) - } - - #fltr = result$unique.def %in% result.filtered$unique.def - - if(grepl("keep", filter.unique)){ - result$unique.def = paste(result$unique.def, result$best_match) #keep the unique sequences that are in multiple classes - result = result[!duplicated(result$unique.def),] - } else { - result = result[duplicated(result$unique.def) | duplicated(result$unique.def, fromLast=T),] - result$unique.def = paste(result$unique.def, result$best_match) #keep the unique sequences that are in multiple classes - result = result[!duplicated(result$unique.def),] - } - - #result = result[,clmns] - - #write.table(inputdata.removed, "unique_removed.csv", sep=",",quote=F,row.names=F,col.names=T) -} - -filtering.steps = rbind(filtering.steps, c("After filter unique sequences", nrow(result))) - - splt = strsplit(class.filter, "_")[[1]] chunk_hit_threshold = as.numeric(splt[1]) nt_hit_threshold = as.numeric(splt[2]) @@ -198,10 +160,39 @@ result$best_match = "all" } -if(any(higher_than, na.rm=T)){ - #summ = summ[higher_than,] +write.table(result, before.unique.file, sep="\t", quote=F,row.names=F,col.names=T) + +if(filter.unique != "no"){ + clmns = names(result) + + if(empty.region.filter == "leader"){ + result$unique.def = paste(result$FR1.IMGT.seq, result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) + } else if(empty.region.filter == "FR1"){ + result$unique.def = paste(result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) + } else if(empty.region.filter == "CDR1"){ + result$unique.def = paste(result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) + } else if(empty.region.filter == "FR2"){ + result$unique.def = paste(result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) + } + + if(grepl("_c", filter.unique)){ + result$unique.def = paste(result$unique.def, result$best_match) + } + + if(grepl("keep", filter.unique)){ + result$unique.def = paste(result$unique.def, result$best_match) #keep the unique sequences that are in multiple classes + result = result[!duplicated(result$unique.def),] + } else { + result = result[duplicated(result$unique.def) | duplicated(result$unique.def, fromLast=T),] + result$unique.def = paste(result$unique.def, result$best_match) #keep the unique sequences that are in multiple classes + result = result[!duplicated(result$unique.def),] + } } +write.table(result, gsub("before_unique_filter.txt", "after_unique_filter.txt", before.unique.file), sep="\t", quote=F,row.names=F,col.names=T) + +filtering.steps = rbind(filtering.steps, c("After filter unique sequences", nrow(result))) + if(nrow(summ) == 0){ stop("No data remaining after filter") }
--- a/pattern_plots.r Mon Oct 31 05:05:26 2016 -0400 +++ b/pattern_plots.r Tue Nov 01 10:15:37 2016 -0400 @@ -22,7 +22,7 @@ -classes = c("IGA", "IGA1", "IGA2", "IGG", "IGG1", "IGG2", "IGG3", "IGG4", "IGM") +classes = c("IGA", "IGA1", "IGA2", "IGG", "IGG1", "IGG2", "IGG3", "IGG4", "IGM", "IGE") xyz = c("x", "y", "z") new.names = c(paste(rep(classes, each=3), xyz, sep="."), paste("un", xyz, sep="."), paste("all", xyz, sep=".")) @@ -45,7 +45,7 @@ write.table(data1, plot1.txt, quote=F, sep="\t", na="", row.names=F, col.names=T) p = ggplot(data1, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of mutations") + guides(fill=guide_legend(title=NULL)) -p = p + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=13, colour="black")) + scale_fill_manual(values=c("RGYW.WRCY" = "white", "TW.WA" = "blue4")) +p = p + theme(panel.background = element_rect(fill = "white", colour="black"),text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("RGYW.WRCY" = "white", "TW.WA" = "blue4")) #p = p + scale_colour_manual(values=c("RGYW.WRCY" = "black", "TW.WA" = "blue4")) png(filename=plot1.png, width=480, height=300) print(p) @@ -79,7 +79,7 @@ write.table(data2, plot2.txt, quote=F, sep="\t", na="", row.names=F, col.names=T) p = ggplot(data2, aes(x=Class, y=value, fill=Type)) + geom_bar(position="fill", stat="identity", colour = "black") + scale_y_continuous(labels=percent_format()) + guides(fill=guide_legend(title=NULL)) + ylab("% of mutations") -p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black")) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white")) +p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white")) #p = p + scale_colour_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "black")) png(filename=plot2.png, width=480, height=300) print(p) @@ -115,7 +115,7 @@ write.table(data3, plot3.txt, quote=F, sep="\t", na="", row.names=F, col.names=T) p = ggplot(data3, aes(Class, value)) + geom_bar(aes(fill=Type), stat="identity", position="dodge", colour = "black") + ylab("% of nucleotides") + guides(fill=guide_legend(title=NULL)) -p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black")) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white")) +p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=15, colour="black"), axis.text.x = element_text(angle = 45, hjust = 1)) + scale_fill_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "white")) #p = p + scale_colour_manual(values=c("A/T" = "blue4", "G/C transversions" = "gray74", "G/C transitions" = "black")) png(filename=plot3.png, width=480, height=300) print(p)
--- a/shm_csr.r Mon Oct 31 05:05:26 2016 -0400 +++ b/shm_csr.r Tue Nov 01 10:15:37 2016 -0400 @@ -280,15 +280,13 @@ transition2 = merge(transition2, base.order, by.x="variable", by.y="base") transition2[is.na(transition2$value),]$value = 0 - - print(transition2) if(any(transition2$value != 0)){ #having rows of data but a transition table filled with 0 is bad print("Plotting stacked transition") png(filename=paste("transitions_stacked_", name, ".png", sep="")) p = ggplot(transition2, aes(factor(reorder(id, order.x)), y=value, fill=factor(reorder(variable, order.y)))) + geom_bar(position="fill", stat="identity", colour="black") #stacked bar p = p + xlab("From base") + ylab("To base") + ggtitle("Mutations frequency from base to base") + guides(fill=guide_legend(title=NULL)) - p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black")) + scale_fill_manual(values=c("A" = "blue4", "G" = "lightblue1", "C" = "olivedrab3", "T" = "olivedrab4")) + p = p + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) + scale_fill_manual(values=c("A" = "blue4", "G" = "lightblue1", "C" = "olivedrab3", "T" = "olivedrab4")) #p = p + scale_colour_manual(values=c("A" = "black", "G" = "black", "C" = "black", "T" = "black")) print(p) dev.off() @@ -374,7 +372,7 @@ pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=Gene)) pc = pc + geom_bar(width = 1, stat = "identity") + scale_fill_manual(labels=genesForPlot$label, values=c("IGA1" = "lightblue1", "IGA2" = "blue4")) pc = pc + coord_polar(theta="y") + scale_y_continuous(breaks=NULL) - pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black")) + pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"), axis.title=element_blank(), axis.text=element_blank(), axis.ticks=element_blank()) pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IGA subclasses", "( n =", sum(genesForPlot$Freq), ")")) write.table(genesForPlot, "IGA_pie.txt", sep="\t",quote=F,row.names=F,col.names=T) @@ -395,7 +393,7 @@ pc = ggplot(genesForPlot, aes(x = factor(1), y=Freq, fill=Gene)) pc = pc + geom_bar(width = 1, stat = "identity") + scale_fill_manual(labels=genesForPlot$label, values=c("IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred")) pc = pc + coord_polar(theta="y") + scale_y_continuous(breaks=NULL) - pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black")) + pc = pc + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black"), axis.title=element_blank(), axis.text=element_blank(), axis.ticks=element_blank()) pc = pc + xlab(" ") + ylab(" ") + ggtitle(paste("IGG subclasses", "( n =", sum(genesForPlot$Freq), ")")) write.table(genesForPlot, "IGG_pie.txt", sep="\t",quote=F,row.names=F,col.names=T) @@ -415,7 +413,7 @@ p = ggplot(dat.clss, aes(best_match, percentage_mutations)) p = p + geom_point(aes(colour=best_match), position="jitter") + geom_boxplot(aes(middle=mean(percentage_mutations)), alpha=0.1, outlier.shape = NA) -p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black")) +p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) p = p + scale_fill_manual(values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "all" = "blue4")) p = p + scale_colour_manual(values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "all" = "blue4")) @@ -442,7 +440,7 @@ frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2) p = ggplot(frequency_bins_data, aes(frequency_bins, frequency)) -p = p + geom_bar(aes(fill=best_match_class), stat="identity", position="dodge") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=13, colour="black")) +p = p + geom_bar(aes(fill=best_match_class), stat="identity", position="dodge") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) p = p + xlab("Frequency ranges") + ylab("Frequency") + ggtitle("Mutation Frequencies by class") + scale_fill_manual(values=c("IGA" = "blue4", "IGG" = "olivedrab3", "IGM" = "black", "all" = "blue4")) png(filename="frequency_ranges.png")
--- a/shm_csr.xml Mon Oct 31 05:05:26 2016 -0400 +++ b/shm_csr.xml Tue Nov 01 10:15:37 2016 -0400 @@ -1,7 +1,7 @@ <tool id="shm_csr" name="SHM & CSR pipeline" version="1.0"> <description></description> <command interpreter="bash"> - wrapper.sh $in_file custom $out_file $out_file.files_path ${in_file.name} "-" $functionality $unique $naive_output_ca $naive_output_cg $naive_output_cm $filter_uniques $class_filter $empty_region_filter + wrapper.sh $in_file custom $out_file $out_file.files_path ${in_file.name} "-" $functionality $unique $naive_output_ca $naive_output_cg $naive_output_cm $filter_uniques $class_filter $empty_region_filter $fast </command> <inputs> <param name="in_file" type="data" label="IMGT zip file to be analysed" /> @@ -33,7 +33,7 @@ <option value="CDR3.IMGT.seq">CDR3 (nt)</option> <option value="Sequence.ID">Don't remove duplicates</option> </param> - <param name="class_filter" type="select" label="Class/Subclass filter" help="" > + <param name="class_filter" type="select" label="Human Class/Subclass filter" help="" > <option value="70_70" selected="true">>70% class and >70% subclass</option> <option value="60_55">>60% class and >55% subclass</option> <option value="70_0">>70% class</option> @@ -46,6 +46,14 @@ <option value="no" selected="true">No</option> </param> </conditional> + + + <param name="fast" type="select" label="Fast" help="Skips generating the new ZIP files and Change-O/Baseline" > + <option value="yes">Yes</option> + <option value="no" selected="true">No</option> + </param> + + </inputs> <outputs> <data format="html" name="out_file" label = "SHM & CSR on ${in_file.name}"/>
--- a/wrapper.sh Mon Oct 31 05:05:26 2016 -0400 +++ b/wrapper.sh Tue Nov 01 10:15:37 2016 -0400 @@ -16,6 +16,7 @@ filter_unique=${12} class_filter=${13} empty_region_filter=${14} +fast=${15} mkdir $outdir tar -xzf $dir/style.tar.gz -C $outdir @@ -62,101 +63,112 @@ Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} ${empty_region_filter} 2>&1 -echo "---------------- creating new IMGT zips ----------------" -echo "---------------- creating new IMGT zips ----------------<br />" >> $log +if [[ "$fast" == "no" ]] ; then -mkdir $outdir/new_IMGT + echo "---------------- creating new IMGT zips ----------------" + echo "---------------- creating new IMGT zips ----------------<br />" >> $log + + mkdir $outdir/new_IMGT -cat `find $PWD/files/ -name "1_*"` > "$outdir/new_IMGT/1_Summary.txt" -cat `find $PWD/files/ -name "2_*"` > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt" -cat `find $PWD/files/ -name "3_*"` > "$outdir/new_IMGT/3_Nt-sequences.txt" -cat `find $PWD/files/ -name "4_*"` > "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt" -cat `find $PWD/files/ -name "5_*"` > "$outdir/new_IMGT/5_AA-sequences.txt" -cat `find $PWD/files/ -name "6_*"` > "$outdir/new_IMGT/6_Junction.txt" -cat `find $PWD/files/ -name "7_*"` > "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt" -cat `find $PWD/files/ -name "8_*"` > "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt" -cat `find $PWD/files/ -name "9_*"` > "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt" -cat `find $PWD/files/ -name "10_*"` > "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt" + cat `find $PWD/files/ -name "1_*"` > "$outdir/new_IMGT/1_Summary.txt" + cat `find $PWD/files/ -name "2_*"` > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt" + cat `find $PWD/files/ -name "3_*"` > "$outdir/new_IMGT/3_Nt-sequences.txt" + cat `find $PWD/files/ -name "4_*"` > "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt" + cat `find $PWD/files/ -name "5_*"` > "$outdir/new_IMGT/5_AA-sequences.txt" + cat `find $PWD/files/ -name "6_*"` > "$outdir/new_IMGT/6_Junction.txt" + cat `find $PWD/files/ -name "7_*"` > "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt" + cat `find $PWD/files/ -name "8_*"` > "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt" + cat `find $PWD/files/ -name "9_*"` > "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt" + cat `find $PWD/files/ -name "10_*"` > "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt" -mkdir $outdir/new_IMGT_IGA -cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA + mkdir $outdir/new_IMGT_IGA + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA -mkdir $outdir/new_IMGT_IGA1 -cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA1 + mkdir $outdir/new_IMGT_IGA1 + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA1 -mkdir $outdir/new_IMGT_IGA2 -cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA2 + mkdir $outdir/new_IMGT_IGA2 + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA2 -mkdir $outdir/new_IMGT_IGG -cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG + mkdir $outdir/new_IMGT_IGG + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG -mkdir $outdir/new_IMGT_IGG1 -cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG1 + mkdir $outdir/new_IMGT_IGG1 + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG1 -mkdir $outdir/new_IMGT_IGG2 -cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG2 + mkdir $outdir/new_IMGT_IGG2 + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG2 -mkdir $outdir/new_IMGT_IGG3 -cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG3 + mkdir $outdir/new_IMGT_IGG3 + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG3 -mkdir $outdir/new_IMGT_IGG4 -cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG4 + mkdir $outdir/new_IMGT_IGG4 + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGG4 + + mkdir $outdir/new_IMGT_IGM + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGM -mkdir $outdir/new_IMGT_IGM -cp $outdir/new_IMGT/* $outdir/new_IMGT_IGM + mkdir $outdir/new_IMGT_IGE + cp $outdir/new_IMGT/* $outdir/new_IMGT_IGE -Rscript $dir/new_imgt.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1 + Rscript $dir/new_imgt.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1 + + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA/ $outdir/merged.txt "IGA" 2>&1 + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA1/ $outdir/merged.txt "IGA1" 2>&1 + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA2/ $outdir/merged.txt "IGA2" 2>&1 -Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA/ $outdir/merged.txt "IGA" 2>&1 -Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA1/ $outdir/merged.txt "IGA1" 2>&1 -Rscript $dir/new_imgt.r $outdir/new_IMGT_IGA2/ $outdir/merged.txt "IGA2" 2>&1 + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG/ $outdir/merged.txt "IGG" 2>&1 + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG1/ $outdir/merged.txt "IGG1" 2>&1 + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG2/ $outdir/merged.txt "IGG2" 2>&1 + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG3/ $outdir/merged.txt "IGG3" 2>&1 + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG4/ $outdir/merged.txt "IGG4" 2>&1 -Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG/ $outdir/merged.txt "IGG" 2>&1 -Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG1/ $outdir/merged.txt "IGG1" 2>&1 -Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG2/ $outdir/merged.txt "IGG2" 2>&1 -Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG3/ $outdir/merged.txt "IGG3" 2>&1 -Rscript $dir/new_imgt.r $outdir/new_IMGT_IGG4/ $outdir/merged.txt "IGG4" 2>&1 + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGM/ $outdir/merged.txt "IGM" 2>&1 -Rscript $dir/new_imgt.r $outdir/new_IMGT_IGM/ $outdir/merged.txt "IGM" 2>&1 + Rscript $dir/new_imgt.r $outdir/new_IMGT_IGE/ $outdir/merged.txt "IGE" 2>&1 -tmp="$PWD" -cd $outdir/new_IMGT/ #tar weirdness... -tar -cJf ../new_IMGT.txz * + tmp="$PWD" + cd $outdir/new_IMGT/ #tar weirdness... + tar -cJf ../new_IMGT.txz * -cd $outdir/new_IMGT_IGA/ -tar -cJf ../new_IMGT_IGA.txz * + cd $outdir/new_IMGT_IGA/ + tar -cJf ../new_IMGT_IGA.txz * -cd $outdir/new_IMGT_IGA1/ -tar -cJf ../new_IMGT_IGA1.txz * + cd $outdir/new_IMGT_IGA1/ + tar -cJf ../new_IMGT_IGA1.txz * -cd $outdir/new_IMGT_IGA2/ -tar -cJf ../new_IMGT_IGA2.txz * + cd $outdir/new_IMGT_IGA2/ + tar -cJf ../new_IMGT_IGA2.txz * -cd $outdir/new_IMGT_IGG/ -tar -cJf ../new_IMGT_IGG.txz * + cd $outdir/new_IMGT_IGG/ + tar -cJf ../new_IMGT_IGG.txz * -cd $outdir/new_IMGT_IGG1/ -tar -cJf ../new_IMGT_IGG1.txz * + cd $outdir/new_IMGT_IGG1/ + tar -cJf ../new_IMGT_IGG1.txz * -cd $outdir/new_IMGT_IGG2/ -tar -cJf ../new_IMGT_IGG2.txz * + cd $outdir/new_IMGT_IGG2/ + tar -cJf ../new_IMGT_IGG2.txz * + + cd $outdir/new_IMGT_IGG3/ + tar -cJf ../new_IMGT_IGG3.txz * -cd $outdir/new_IMGT_IGG3/ -tar -cJf ../new_IMGT_IGG3.txz * + cd $outdir/new_IMGT_IGG4/ + tar -cJf ../new_IMGT_IGG4.txz * + + cd $outdir/new_IMGT_IGM/ + tar -cJf ../new_IMGT_IGM.txz * -cd $outdir/new_IMGT_IGG4/ -tar -cJf ../new_IMGT_IGG4.txz * + cd $outdir/new_IMGT_IGE/ + tar -cJf ../new_IMGT_IGE.txz * -cd $outdir/new_IMGT_IGM/ -tar -cJf ../new_IMGT_IGM.txz * - -cd $tmp + cd $tmp +fi echo "---------------- shm_csr.r ----------------" echo "---------------- shm_csr.r ----------------<br />" >> $log -classes="IGA,IGA1,IGA2,IGG,IGG1,IGG2,IGG3,IGG4,IGM,unmatched" +classes="IGA,IGA1,IGA2,IGG,IGG1,IGG2,IGG3,IGG4,IGM,IGE,unmatched" echo "R mutation analysis" Rscript $dir/shm_csr.r $outdir/merged.txt $classes $outdir ${empty_region_filter} 2>&1 @@ -168,13 +180,13 @@ echo "---------------- aa_histogram.r ----------------" echo "---------------- aa_histogram.r ----------------<br />" >> $log -Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "IGA,IGG,IGM" $outdir/ 2>&1 +Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "IGA,IGG,IGM,IGE" $outdir/ 2>&1 if [ -e "$outdir/aa_histogram_.png" ]; then mv $outdir/aa_histogram_.png $outdir/aa_histogram.png mv $outdir/aa_histogram_.txt $outdir/aa_histogram.txt fi -genes=(IGA IGA1 IGA2 IGG IGG1 IGG2 IGG3 IGG4 IGM) +genes=(IGA IGA1 IGA2 IGG IGG1 IGG2 IGG3 IGG4 IGM IGE) funcs=(sum mean median) funcs=(sum) @@ -247,14 +259,14 @@ tmp=`cat $outdir/unmatched_${func}_n.txt` echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th><tr></thead>" >> $output - while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz + while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz cex cey cez unx uny unz allx ally allz do if [ "$name" == "FR R/S (ratio)" ] || [ "$name" == "CDR R/S (ratio)" ] ; then #meh - echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td><td>${unx}/${uny} (${unz})</td></tr>" >> $output + echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${cex}/${cey} (${cez})</td><td>${allx}/${ally} (${allz})</td><td>${unx}/${uny} (${unz})</td></tr>" >> $output elif [ "$name" == "Median of Number of Mutations (%)" ] ; then - echo "<tr><td>$name</td><td>${caz}%</td><td>${ca1z}%</td><td>${ca2z}%</td><td>${cgz}%</td><td>${cg1z}%</td><td>${cg2z}%</td><td>${cg3z}%</td><td>${cg4z}%</td><td>${cmz}%</td><td>${allz}%</td><td>${unz}%</td></tr>" >> $output + echo "<tr><td>$name</td><td>${caz}%</td><td>${ca1z}%</td><td>${ca2z}%</td><td>${cgz}%</td><td>${cg1z}%</td><td>${cg2z}%</td><td>${cg3z}%</td><td>${cg4z}%</td><td>${cmz}%</td><td>${cez}%</td><td>${allz}%</td><td>${unz}%</td></tr>" >> $output else - echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td><td>${unx}/${uny} (${unz}%)</td></tr>" >> $output + echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${cex}/${cey} (${cez}%)</td><td>${allx}/${ally} (${allz}%)</td><td>${unx}/${uny} (${unz}%)</td></tr>" >> $output fi done < $outdir/data_${func}.txt @@ -262,7 +274,7 @@ tmp=`cat $outdir/all_${func}_n.txt` echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output - while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz + while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz cex cey cez unx uny unz allx ally allz do if [ "$name" == "FR R/S (ratio)" ] || [ "$name" == "CDR R/S (ratio)" ] ; then #meh echo "<tr><td>$name</td><td>${allx}/${ally}</td></tr>" >> $output @@ -381,46 +393,48 @@ echo "</div>" >> $output #CSR tab end -echo "---------------- change-o MakeDB ----------------" +if [[ "$fast" == "no" ]] ; then + + echo "---------------- change-o MakeDB ----------------" -mkdir $outdir/change_o + mkdir $outdir/change_o -tmp="$PWD" + tmp="$PWD" -cd $outdir/change_o + cd $outdir/change_o -bash $dir/change_o/makedb.sh $outdir/new_IMGT.txz false false false $outdir/change_o/change-o-db.txt -bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-defined_clones-summary.txt + bash $dir/change_o/makedb.sh $outdir/new_IMGT.txz false false false $outdir/change_o/change-o-db.txt + bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-defined_clones-summary.txt -Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/merged.txt "all" "Sequence.ID,best_match" "SEQUENCE_ID" "Sequence.ID" $outdir/change_o/change-o-db-defined_clones.txt 2>&1 + Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/merged.txt "all" "Sequence.ID,best_match" "SEQUENCE_ID" "Sequence.ID" $outdir/change_o/change-o-db-defined_clones.txt 2>&1 -echo "Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/$outdir/merged.txt 'all' 'Sequence.ID,best_match' 'Sequence.ID' 'Sequence.ID' '\t' $outdir/change_o/change-o-db-defined_clones.txt 2>&1" + echo "Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/$outdir/merged.txt 'all' 'Sequence.ID,best_match' 'Sequence.ID' 'Sequence.ID' '\t' $outdir/change_o/change-o-db-defined_clones.txt 2>&1" -if [[ $(wc -l < $outdir/new_IMGT_IGA/1_Summary.txt) -gt "1" ]]; then - bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGA.txz false false false $outdir/change_o/change-o-db-IGA.txt - bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGA.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGA.txt $outdir/change_o/change-o-defined_clones-summary-IGA.txt -else - echo "No IGA sequences" > "$outdir/change_o/change-o-db-defined_clones-IGA.txt" - echo "No IGA sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGA.txt" -fi + if [[ $(wc -l < $outdir/new_IMGT_IGA/1_Summary.txt) -gt "1" ]]; then + bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGA.txz false false false $outdir/change_o/change-o-db-IGA.txt + bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGA.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGA.txt $outdir/change_o/change-o-defined_clones-summary-IGA.txt + else + echo "No IGA sequences" > "$outdir/change_o/change-o-db-defined_clones-IGA.txt" + echo "No IGA sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGA.txt" + fi -if [[ $(wc -l < $outdir/new_IMGT_IGG/1_Summary.txt) -gt "1" ]]; then - bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGG.txz false false false $outdir/change_o/change-o-db-IGG.txt - bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGG.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGG.txt $outdir/change_o/change-o-defined_clones-summary-IGG.txt -else - echo "No IGG sequences" > "$outdir/change_o/change-o-db-defined_clones-IGG.txt" - echo "No IGG sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGG.txt" -fi + if [[ $(wc -l < $outdir/new_IMGT_IGG/1_Summary.txt) -gt "1" ]]; then + bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGG.txz false false false $outdir/change_o/change-o-db-IGG.txt + bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGG.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGG.txt $outdir/change_o/change-o-defined_clones-summary-IGG.txt + else + echo "No IGG sequences" > "$outdir/change_o/change-o-db-defined_clones-IGG.txt" + echo "No IGG sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGG.txt" + fi -if [[ $(wc -l < $outdir/new_IMGT_IGM/1_Summary.txt) -gt "1" ]]; then - bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGM.txz false false false $outdir/change_o/change-o-db-IGM.txt - bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGM.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGM.txt $outdir/change_o/change-o-defined_clones-summary-IGM.txt -else - echo "No IGM sequences" > "$outdir/change_o/change-o-db-defined_clones-IGM.txt" - echo "No IGM sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGM.txt" -fi + if [[ $(wc -l < $outdir/new_IMGT_IGM/1_Summary.txt) -gt "1" ]]; then + bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGM.txz false false false $outdir/change_o/change-o-db-IGM.txt + bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGM.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGM.txt $outdir/change_o/change-o-defined_clones-summary-IGM.txt + else + echo "No IGM sequences" > "$outdir/change_o/change-o-db-defined_clones-IGM.txt" + echo "No IGM sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGM.txt" + fi -PWD="$tmp" + PWD="$tmp" echo "<div class='tabbertab' title='Clonality'>" >> $output #clonality tab @@ -471,6 +485,8 @@ echo "</div>" >> $output #clonality tab end +fi + echo "<div class='tabbertab' title='Downloads'>" >> $output echo "<table class='pure-table pure-table-striped'>" >> $output @@ -553,54 +569,59 @@ echo "</html>" >> $output -echo "---------------- baseline ----------------" -echo "---------------- baseline ----------------<br />" >> $log -tmp="$PWD" + +if [[ "$fast" == "no" ]] ; then -mkdir $outdir/baseline + echo "---------------- baseline ----------------" + echo "---------------- baseline ----------------<br />" >> $log + tmp="$PWD" + + mkdir $outdir/baseline -mkdir $outdir/baseline/IGA_IGG_IGM -if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then - cd $outdir/baseline/IGA_IGG_IGM - bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "IGA_IGG_IGM" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt" -else - echo "No sequences" > "$outdir/baseline.txt" -fi + mkdir $outdir/baseline/IGA_IGG_IGM + if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then + cd $outdir/baseline/IGA_IGG_IGM + bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "IGA_IGG_IGM" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt" + else + echo "No sequences" > "$outdir/baseline.txt" + fi -mkdir $outdir/baseline/IGA -if [[ $(wc -l < $outdir/new_IMGT_IGA/1_Summary.txt) -gt "1" ]]; then - cd $outdir/baseline/IGA - bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_IGA.txz "IGA" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_IGA.pdf" "Sequence.ID" "$outdir/baseline_IGA.txt" -else - echo "No IGA sequences" > "$outdir/baseline_IGA.txt" -fi + mkdir $outdir/baseline/IGA + if [[ $(wc -l < $outdir/new_IMGT_IGA/1_Summary.txt) -gt "1" ]]; then + cd $outdir/baseline/IGA + bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_IGA.txz "IGA" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_IGA.pdf" "Sequence.ID" "$outdir/baseline_IGA.txt" + else + echo "No IGA sequences" > "$outdir/baseline_IGA.txt" + fi -mkdir $outdir/baseline/IGG -if [[ $(wc -l < $outdir/new_IMGT_IGG/1_Summary.txt) -gt "1" ]]; then - cd $outdir/baseline/IGG - bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_IGG.txz "cg" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_IGG.pdf" "Sequence.ID" "$outdir/baseline_IGG.txt" -else - echo "No IGG sequences" > "$outdir/baseline_IGG.txt" -fi + mkdir $outdir/baseline/IGG + if [[ $(wc -l < $outdir/new_IMGT_IGG/1_Summary.txt) -gt "1" ]]; then + cd $outdir/baseline/IGG + bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_IGG.txz "cg" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_IGG.pdf" "Sequence.ID" "$outdir/baseline_IGG.txt" + else + echo "No IGG sequences" > "$outdir/baseline_IGG.txt" + fi -mkdir $outdir/baseline/IGM -if [[ $(wc -l < $outdir/new_IMGT_IGM/1_Summary.txt) -gt "1" ]]; then - cd $outdir/baseline/IGM - bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_IGM.txz "IGM" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_IGM.pdf" "Sequence.ID" "$outdir/baseline_IGM.txt" -else - echo "No IGM sequences" > "$outdir/baseline_IGM.txt" + mkdir $outdir/baseline/IGM + if [[ $(wc -l < $outdir/new_IMGT_IGM/1_Summary.txt) -gt "1" ]]; then + cd $outdir/baseline/IGM + bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_IGM.txz "IGM" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_IGM.pdf" "Sequence.ID" "$outdir/baseline_IGM.txt" + else + echo "No IGM sequences" > "$outdir/baseline_IGM.txt" + fi + + cd $tmp + + echo "Cleaning up *.RData files" + find $outdir/baseline -name "*.RData" -type f -delete + fi -cd $tmp - -echo "Cleaning up *.RData files" -find $outdir/baseline -name "*.RData" -type f -delete - echo "---------------- naive_output.r ----------------" echo "---------------- naive_output.r ----------------<br />" >> $log -if [[ "$naive_output" != "None" ]] +if [[ "$naive_output" == "yes" ]] then cp $outdir/new_IMGT_IGA.txz ${naive_output_ca} cp $outdir/new_IMGT_IGG.txz ${naive_output_cg}