# HG changeset patch # User davidvanzessen # Date 1478867508 18000 # Node ID 59765d2c8890bf8a5ae82fe78374052b80c5a36d # Parent 933fb21568cecfd301573cd6a73ca58c0b7b99f5 Uploaded diff -r 933fb21568ce -r 59765d2c8890 merge_and_filter.r --- a/merge_and_filter.r Fri Nov 11 03:49:30 2016 -0500 +++ b/merge_and_filter.r Fri Nov 11 07:31:48 2016 -0500 @@ -6,22 +6,24 @@ mutationanalysisfile = args[3] mutationstatsfile = args[4] hotspotsfile = args[5] -gene_identification_file= args[6] -output = args[7] -before.unique.file = args[8] -unmatchedfile = args[9] -method=args[10] -functionality=args[11] -unique.type=args[12] -filter.unique=args[13] -class.filter=args[14] -empty.region.filter=args[15] +aafile = args[6] +gene_identification_file= args[7] +output = args[8] +before.unique.file = args[9] +unmatchedfile = args[10] +method=args[11] +functionality=args[12] +unique.type=args[13] +filter.unique=args[14] +class.filter=args[15] +empty.region.filter=args[16] summ = read.table(summaryfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") sequences = read.table(sequencesfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") mutationanalysis = read.table(mutationanalysisfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") mutationstats = read.table(mutationstatsfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") hotspots = read.table(hotspotsfile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") +AAs = read.table(aafile, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") gene_identification = read.table(gene_identification_file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="") if(method == "blastn"){ @@ -81,6 +83,10 @@ names(sequences) = c("Sequence.ID", "FR1.IMGT.seq", "CDR1.IMGT.seq", "FR2.IMGT.seq", "CDR2.IMGT.seq", "FR3.IMGT.seq", "CDR3.IMGT.seq") result = merge(result, sequences, by="Sequence.ID", all.x=T) +AAs = AAs[,c("Sequence.ID", "CDR3.IMGT")] +names(AAs) = c("Sequence.ID", "CDR3.IMGT.AA") +result = merge(result, AAs, by="Sequence.ID", all.x=T) + print(paste("Number of sequences in result after merging with sequences:", nrow(result))) result$VGene = gsub("^Homsap ", "", result$V.GENE.and.allele) diff -r 933fb21568ce -r 59765d2c8890 shm_csr.xml --- a/shm_csr.xml Fri Nov 11 03:49:30 2016 -0500 +++ b/shm_csr.xml Fri Nov 11 07:31:48 2016 -0500 @@ -22,10 +22,10 @@ - - - - + + + + diff -r 933fb21568ce -r 59765d2c8890 wrapper.sh --- a/wrapper.sh Fri Nov 11 03:49:30 2016 -0500 +++ b/wrapper.sh Fri Nov 11 07:31:48 2016 -0500 @@ -62,7 +62,7 @@ echo "---------------- merge_and_filter.r ----------------" echo "---------------- merge_and_filter.r ----------------
" >> $log -Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} ${empty_region_filter} 2>&1 +Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $PWD/aa.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} ${empty_region_filter} 2>&1 if [[ "$fast" == "no" ]] ; then