# HG changeset patch # User davidvanzessen # Date 1500366491 14400 # Node ID cb779a45537bc9eac45b23dd51823392e7fa1c43 # Parent ee807645b2241760ef454cbf81682280d4fa67b7 Uploaded diff -r ee807645b224 -r cb779a45537b check_unique_id.r --- a/check_unique_id.r Mon Jul 17 10:44:40 2017 -0400 +++ b/check_unique_id.r Tue Jul 18 04:28:11 2017 -0400 @@ -8,7 +8,7 @@ stop("First argument doesn't contain the 'Sequence number' column") } -tbl = table(current$Sequence.ID) +tbl = table(current[,"Sequence ID"]) l_tbl = length(tbl) check = any(tbl > 1) diff -r ee807645b224 -r cb779a45537b merge_and_filter.r --- a/merge_and_filter.r Mon Jul 17 10:44:40 2017 -0400 +++ b/merge_and_filter.r Tue Jul 18 04:28:11 2017 -0400 @@ -115,12 +115,6 @@ print("mutation analysis files columns") print(names(mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-1])])) -print(head(summ$Sequence.ID)) - -print("_-------------------------------------") - -print(head(mutationanalysis$Sequence.ID)) - result = merge(summ, mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-1])], by="Sequence.ID") print(paste("Number of sequences after merging with mutation analysis file:", nrow(result))) diff -r ee807645b224 -r cb779a45537b new_imgt.r --- a/new_imgt.r Mon Jul 17 10:44:40 2017 -0400 +++ b/new_imgt.r Tue Jul 18 04:28:11 2017 -0400 @@ -14,6 +14,8 @@ merged = merged[!grepl("unmatched", merged$best_match),] } +nrow_dat = 0 + for(f in list.files(imgt.dir, pattern="*.txt$")){ #print(paste("filtering", f)) path = file.path(imgt.dir, f) @@ -21,9 +23,13 @@ dat = dat[dat[,"Sequence ID"] %in% merged$Sequence.ID,] + nrow_dat = nrow(dat) + if(nrow(dat) > 0 & grepl("^8_", f)){ #change the FR1 columns to 0 in the "8_..." file dat[,grepl("^FR1", names(dat))] = 0 } write.table(dat, path, quote=F, sep="\t", row.names=F, col.names=T, na="") } + +print(paste("Creating new zip for ", gene, "with", nrow_dat, "sequences")) diff -r ee807645b224 -r cb779a45537b wrapper.sh --- a/wrapper.sh Mon Jul 17 10:44:40 2017 -0400 +++ b/wrapper.sh Tue Jul 18 04:28:11 2017 -0400 @@ -48,17 +48,19 @@ fi cat "`find $PWD/files/ -name "1_*"`" > $PWD/summary.txt +cat "`find $PWD/files/ -name "2_*"`" > $PWD/gapped_nt.txt cat "`find $PWD/files/ -name "3_*"`" > $PWD/sequences.txt cat "`find $PWD/files/ -name "4_*"`" > $PWD/gapped_aa.txt cat "`find $PWD/files/ -name "5_*"`" > $PWD/aa.txt cat "`find $PWD/files/ -name "6_*"`" > $PWD/junction.txt cat "`find $PWD/files/ -name "7_*"`" > $PWD/mutationanalysis.txt cat "`find $PWD/files/ -name "8_*"`" > $PWD/mutationstats.txt +cat "`find $PWD/files/ -name "9_*"`" > $PWD/aa_change_stats.txt cat "`find $PWD/files/ -name "10_*"`" > $PWD/hotspots.txt echo "---------------- unique id check ----------------" -Rscript $dir/check_unique_id.r $PWD/summary.txt $PWD/sequences.txt $PWD/gapped_aa.txt $PWD/aa.txt $PWD/junction.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt +Rscript $dir/check_unique_id.r $PWD/summary.txt $PWD/gapped_nt.txt $PWD/sequences.txt $PWD/gapped_aa.txt $PWD/aa.txt $PWD/junction.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/aa_change_stats.txt $PWD/hotspots.txt if [[ ${#BLASTN_DIR} -ge 5 ]] ; then echo "On server, using BLASTN_DIR env: ${BLASTN_DIR}" @@ -77,23 +79,23 @@ Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt "$PWD/gapped_aa.txt" $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${filter_unique_count} ${class_filter} ${empty_region_filter} 2>&1 -if [[ "${naive_output}" == "yes" ]] ; then +if [[ "${naive_output}" == "yes" ]] || [[ "$fast" == "no" ]] ; then echo "---------------- creating new IMGT zips ----------------" echo "---------------- creating new IMGT zips ----------------
" >> $log mkdir $outdir/new_IMGT - cat "`find $PWD/files/ -name "1_*"`" > "$outdir/new_IMGT/1_Summary.txt" - cat "`find $PWD/files/ -name "2_*"`" > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt" - cat "`find $PWD/files/ -name "3_*"`" > "$outdir/new_IMGT/3_Nt-sequences.txt" - cat "`find $PWD/files/ -name "4_*"`" > "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt" - cat "`find $PWD/files/ -name "5_*"`" > "$outdir/new_IMGT/5_AA-sequences.txt" - cat "`find $PWD/files/ -name "6_*"`" > "$outdir/new_IMGT/6_Junction.txt" - cat "`find $PWD/files/ -name "7_*"`" > "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt" - cat "`find $PWD/files/ -name "8_*"`" > "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt" - cat "`find $PWD/files/ -name "9_*"`" > "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt" - cat "`find $PWD/files/ -name "10_*"`" > "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt" + cp $PWD/summary.txt "$outdir/new_IMGT/1_Summary.txt" + cp $PWD/gapped_nt.txt "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt" + cp $PWD/sequences.txt "$outdir/new_IMGT/3_Nt-sequences.txt" + cp $PWD/gapped_aa.txt "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt" + cp $PWD/aa.txt "$outdir/new_IMGT/5_AA-sequences.txt" + cp $PWD/junction.txt "$outdir/new_IMGT/6_Junction.txt" + cp $PWD/mutationanalysis.txt "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt" + cp $PWD/mutationstats.txt "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt" + cp $PWD/aa_change_stats.txt "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt" + cp $PWD/hotspots.txt "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt" mkdir $outdir/new_IMGT_IGA cp $outdir/new_IMGT/* $outdir/new_IMGT_IGA