# HG changeset patch # User rhpvorderman # Date 1678102592 0 # Node ID 8fcf31272f6e7ec9f31e87d14766edb8ba79a8ca # Parent cf8ad181628f787e1a78959f0c38de632e1f842e planemo upload commit a43893724cc769bed8a1f19a5b19ec1ba20cb63c diff -r cf8ad181628f -r 8fcf31272f6e CHANGELOG.md --- a/CHANGELOG.md Mon Dec 12 12:32:44 2022 +0000 +++ b/CHANGELOG.md Mon Mar 06 11:36:32 2023 +0000 @@ -1,3 +1,13 @@ +version 1.7.0 +----------------- ++ Use the name of the input file to generate the name of the output IMGT + archives. ++ Add same duplicate filters as immune repertoire pipeline. ++ Add a new "Everything is IGM" class filter for captured IGM sequences. ++ Fix bug where empty tables would cause crashes when generating plots. ++ Fix bug where R script errors where not written to stderr, causing galaxy to + mistake the jobs as being successful. + version 1.6.0 ------------- + Faster runtime due to faster gene identification, sequence overview creation diff -r cf8ad181628f -r 8fcf31272f6e CONTROL_NWK377_PB_IGHC_MID1_40nt_2.txz Binary file CONTROL_NWK377_PB_IGHC_MID1_40nt_2.txz has changed diff -r cf8ad181628f -r 8fcf31272f6e __pycache__/igm_naive_mutations.cpython-39.pyc Binary file __pycache__/igm_naive_mutations.cpython-39.pyc has changed diff -r cf8ad181628f -r 8fcf31272f6e a.out Binary file a.out has changed diff -r cf8ad181628f -r 8fcf31272f6e merge_and_filter.r --- a/merge_and_filter.r Mon Dec 12 12:32:44 2022 +0000 +++ b/merge_and_filter.r Mon Mar 06 11:36:32 2023 +0000 @@ -163,8 +163,8 @@ result[!higher_than,"best_match"] = paste("unmatched,", result[!higher_than,"best_match"]) } -if(class.filter == "101_101"){ - result$best_match = "all" +if(splt[1] == "101" & splt[2] == "101"){ + result$best_match = splt[3] } write.table(x=result, file=gsub("merged.txt$", "before_filters.txt", output), sep="\t",quote=F,row.names=F,col.names=T) diff -r cf8ad181628f -r 8fcf31272f6e sequence_overview.py --- a/sequence_overview.py Mon Dec 12 12:32:44 2022 +0000 +++ b/sequence_overview.py Mon Mar 06 11:36:32 2023 +0000 @@ -30,7 +30,9 @@ "IGG3": 0, "IGG4": 0, "IGM": 0, - "unmatched": 0} + "unmatched": 0, + "all": 0, + } self.table_rows: List[SequenceTableRow] = [] diff -r cf8ad181628f -r 8fcf31272f6e shm_csr.r --- a/shm_csr.r Mon Dec 12 12:32:44 2022 +0000 +++ b/shm_csr.r Mon Mar 06 11:36:32 2023 +0000 @@ -439,19 +439,20 @@ dat.clss = rbind(dat, dat.clss) +write.table(dat[,c("Sequence.ID", "best_match", "VRegionMutations", "VRegionNucleotides", "percentage_mutations")], "scatter.txt", sep="\t",quote=F,row.names=F,col.names=T) + +if (nrow(dat) > 0) { p = ggplot(dat.clss, aes(best_match, percentage_mutations)) p = p + geom_point(aes(colour=best_match), position="jitter") + geom_boxplot(aes(middle=mean(percentage_mutations)), alpha=0.1, outlier.shape = NA) p = p + xlab("Subclass") + ylab("Frequency") + ggtitle("Frequency scatter plot") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) p = p + scale_fill_manual(values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4")) p = p + scale_colour_manual(guide = guide_legend(title = "Subclass"), values=c("IGA" = "blue4", "IGA1" = "lightblue1", "IGA2" = "blue4", "IGG" = "olivedrab3", "IGG1" = "olivedrab3", "IGG2" = "red", "IGG3" = "gold", "IGG4" = "darkred", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4")) - png(filename="scatter.png") print(p) dev.off() pdfplots[["scatter.pdf"]] <- p - -write.table(dat[,c("Sequence.ID", "best_match", "VRegionMutations", "VRegionNucleotides", "percentage_mutations")], "scatter.txt", sep="\t",quote=F,row.names=F,col.names=T) +} print("Plotting frequency ranges plot") @@ -467,6 +468,7 @@ frequency_bins_data$frequency = round(frequency_bins_data$frequency_count / frequency_bins_data$class_sum * 100, 2) +if (nrow(frequency_bins_data) > 0) { p = ggplot(frequency_bins_data, aes(frequency_bins, frequency)) p = p + geom_bar(aes(fill=best_match_class), stat="identity", position="dodge") + theme(panel.background = element_rect(fill = "white", colour="black"), text = element_text(size=16, colour="black")) p = p + xlab("Frequency ranges") + ylab("Frequency") + ggtitle("Mutation Frequencies by class") + scale_fill_manual(guide = guide_legend(title = "Class"), values=c("IGA" = "blue4", "IGG" = "olivedrab3", "IGM" = "darkviolet", "IGE" = "darkorange", "all" = "blue4")) @@ -476,6 +478,7 @@ dev.off() pdfplots[["frequency_ranges.pdf"]] <- p +} save(pdfplots, file="pdfplots.RData") @@ -483,10 +486,12 @@ frequency_bins_data_by_class = frequency_bins_data_by_class[order(frequency_bins_data_by_class$best_match_class, frequency_bins_data_by_class$frequency_bins),] + frequency_bins_data_by_class$frequency_bins = gsub("-", " to ", frequency_bins_data_by_class$frequency_bins) -frequency_bins_data_by_class[frequency_bins_data_by_class$frequency_bins == "20", c("frequency_bins")] = "20 or higher" -frequency_bins_data_by_class[frequency_bins_data_by_class$frequency_bins == "0", c("frequency_bins")] = "0 or lower" - +if (nrow(frequency_bins_data_by_class) > 0) { + frequency_bins_data_by_class[frequency_bins_data_by_class$frequency_bins == "20", c("frequency_bins")] = "20 or higher" + frequency_bins_data_by_class[frequency_bins_data_by_class$frequency_bins == "0", c("frequency_bins")] = "0 or lower" +} write.table(frequency_bins_data_by_class, "frequency_ranges_classes.txt", sep="\t",quote=F,row.names=F,col.names=T) frequency_bins_data = data.frame(data.table(dat)[, list(frequency_count=.N), by=c("best_match", "best_match_class", "frequency_bins")]) @@ -499,9 +504,10 @@ frequency_bins_data = frequency_bins_data[order(frequency_bins_data$best_match, frequency_bins_data$frequency_bins),] frequency_bins_data$frequency_bins = gsub("-", " to ", frequency_bins_data$frequency_bins) -frequency_bins_data[frequency_bins_data$frequency_bins == "20", c("frequency_bins")] = "20 or higher" -frequency_bins_data[frequency_bins_data$frequency_bins == "0", c("frequency_bins")] = "0 or lower" - +if (nrow(frequency_bins_data) > 0) { + frequency_bins_data[frequency_bins_data$frequency_bins == "20", c("frequency_bins")] = "20 or higher" + frequency_bins_data[frequency_bins_data$frequency_bins == "0", c("frequency_bins")] = "0 or lower" +} write.table(frequency_bins_data, "frequency_ranges_subclasses.txt", sep="\t",quote=F,row.names=F,col.names=T) diff -r cf8ad181628f -r 8fcf31272f6e shm_csr.xml --- a/shm_csr.xml Mon Dec 12 12:32:44 2022 +0000 +++ b/shm_csr.xml Mon Mar 06 11:36:32 2023 +0000 @@ -1,4 +1,4 @@ - + python @@ -21,11 +21,34 @@ font-ttf-ubuntu + @@ -56,13 +79,16 @@ + - + + + @@ -72,14 +98,9 @@ - + + - - - - - - diff -r cf8ad181628f -r 8fcf31272f6e show_time_as_float Binary file show_time_as_float has changed diff -r cf8ad181628f -r 8fcf31272f6e show_time_as_float.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/show_time_as_float.c Mon Mar 06 11:36:32 2023 +0000 @@ -0,0 +1,16 @@ +/* script adapted from https://www.nu42.com/2021/07/windows-c-time-in-nanoseconds.html */ +#include +#include + +int main(void) +{ + struct timespec ts; + + if (timespec_get(&ts, TIME_UTC) != TIME_UTC) + { + fputs("timespec_get failed!", stderr); + return 1; + } + printf("%ld.%ld\n", ts.tv_sec, ts.tv_nsec); + return 0; +} diff -r cf8ad181628f -r 8fcf31272f6e tests/__pycache__/test_shm_csr.cpython-37-pytest-6.2.5.pyc Binary file tests/__pycache__/test_shm_csr.cpython-37-pytest-6.2.5.pyc has changed diff -r cf8ad181628f -r 8fcf31272f6e tests/__pycache__/test_shm_csr.cpython-39-pytest-7.2.0.pyc Binary file tests/__pycache__/test_shm_csr.cpython-39-pytest-7.2.0.pyc has changed diff -r cf8ad181628f -r 8fcf31272f6e tests/__pycache__/test_shm_csr.cpython-39-pytest-7.2.1.pyc Binary file tests/__pycache__/test_shm_csr.cpython-39-pytest-7.2.1.pyc has changed diff -r cf8ad181628f -r 8fcf31272f6e tests/data/.~lock.handleiding activeren pas.docx# --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/data/.~lock.handleiding activeren pas.docx# Mon Mar 06 11:36:32 2023 +0000 @@ -0,0 +1,1 @@ +Vorderman\, R.H.P. (MOLEPI) ,rhpvorderman,sasc-pc-6,21.02.2023 15:01,file:///home/rhpvorderman/.config/libreoffice/4; \ No newline at end of file diff -r cf8ad181628f -r 8fcf31272f6e tests/data/handleiding activeren pas.docx Binary file tests/data/handleiding activeren pas.docx has changed diff -r cf8ad181628f -r 8fcf31272f6e tests/test_shm_csr.py --- a/tests/test_shm_csr.py Mon Dec 12 12:32:44 2022 +0000 +++ b/tests/test_shm_csr.py Mon Mar 06 11:36:32 2023 +0000 @@ -43,11 +43,20 @@ return container.text +def ignore_files(src, files): + "Ignore virtualenv and git directories to prevent massive tmp folders" + if os.path.basename(src) in (".venv", ".git"): + return files + return () + @pytest.fixture(scope="module") def shm_csr_result(): temp_dir = Path(tempfile.mkdtemp()) tool_dir = temp_dir / "shm_csr" - shutil.copytree(GIT_ROOT, tool_dir) + shutil.copytree( + GIT_ROOT, tool_dir, + # Ignore .venv and .git directories. + ignore=ignore_files) working_dir = temp_dir / "working" working_dir.mkdir(parents=True) output_dir = temp_dir / "outputs" diff -r cf8ad181628f -r 8fcf31272f6e time_ns Binary file time_ns has changed diff -r cf8ad181628f -r 8fcf31272f6e wrapper.sh --- a/wrapper.sh Mon Dec 12 12:32:44 2022 +0000 +++ b/wrapper.sh Mon Mar 06 11:36:32 2023 +0000 @@ -22,12 +22,15 @@ empty_region_filter=${18} fast=${19} +BASENAME=$(basename $input) +# Cut off .txz or .tgz suffix +NEW_IMGT_PREFIX="new_IMGT_${BASENAME%.*}" + #exec 5> debug_output.txt #BASH_XTRACEFD="5" -## Busybox date does not support '+%s.%N'. So use the slower python instead. -## Using -S python does not do 'import site' which shortens the command -## to 10 milliseconds. -#PS4='$(python -Sc "import time; print(time.time())") $LINENO: ' +## Busybox date does not support '+%s.%N'. So use a custom program. Can be +## Compiled with cc -Os show_time_as_float.c -o show_time_as_float +#PS4='$(${dir}/show_time_as_float) $LINENO: ' #set -x mkdir -p $outdir @@ -39,7 +42,7 @@ echo "unpacking IMGT file" -type="`file $input`" +type="`file -L $input`" if [[ "$type" == *"Zip archive"* ]] ; then echo "Zip archive" echo "unzip $input -d $PWD/files/" @@ -85,13 +88,30 @@ echo "---------------- merge_and_filter.r ----------------" echo "---------------- merge_and_filter.r ----------------
" >> $log -Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt "$PWD/gapped_aa.txt" $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${filter_unique_count} ${class_filter} ${empty_region_filter} 2>&1 +Rscript $dir/merge_and_filter.r \ + $PWD/summary.txt \ + $PWD/sequences.txt \ + $PWD/mutationanalysis.txt \ + $PWD/mutationstats.txt \ + $PWD/hotspots.txt \ + "$PWD/gapped_aa.txt" \ + $outdir/identified_genes.txt \ + $outdir/merged.txt \ + $outdir/before_unique_filter.txt \ + $outdir/unmatched.txt \ + $method \ + $functionality \ + $unique \ + ${filter_unique} \ + ${filter_unique_count} \ + ${class_filter} \ + ${empty_region_filter} echo "---------------- creating new IMGT zips ----------------" echo "---------------- creating new IMGT zips ----------------
" >> $log python $dir/split_imgt_file.py --outdir $outdir $input $outdir/merged.txt \ - --prefix new_IMGT \ + --prefix "${NEW_IMGT_PREFIX}" \ - IGA IGA1 IGA2 IGG IGG1 IGG2 IGG3 IGG4 IGM IGE @@ -100,7 +120,7 @@ classes="IGA,IGA1,IGA2,IGG,IGG1,IGG2,IGG3,IGG4,IGM,IGE,unmatched" echo "R mutation analysis" -Rscript $dir/shm_csr.r $outdir/merged.txt $classes $outdir ${empty_region_filter} 2>&1 +Rscript $dir/shm_csr.r $outdir/merged.txt $classes $outdir ${empty_region_filter} echo "---------- Split naive memory IGM ---------" echo "---------- Split naive memory IGM ---------
" >> $log @@ -108,20 +128,20 @@ python $dir/igm_naive_mutations.py $outdir/scatter.txt $outdir/igm_naive_mutations.txt \ $outdir/igm_naive_memory_mutations.txt -python $dir/split_imgt_file.py --outdir $outdir $outdir/new_IMGT_IGM.txz \ +python $dir/split_imgt_file.py --outdir $outdir $outdir/${NEW_IMGT_PREFIX}_IGM.txz \ $outdir/igm_naive_mutations.txt \ - --prefix new_IMGT_IGM_NAIVE - + --prefix "${NEW_IMGT_PREFIX}_IGM_NAIVE" - -python $dir/split_imgt_file.py --outdir $outdir $outdir/new_IMGT_IGM.txz \ +python $dir/split_imgt_file.py --outdir $outdir $outdir/${NEW_IMGT_PREFIX}_IGM.txz \ $outdir/igm_naive_memory_mutations.txt \ - --prefix new_IMGT_IGM_NAIVE_MEMORY - + --prefix "${NEW_IMGT_PREFIX}_IGM_NAIVE_MEMORY" - echo "---------------- plot_pdfs.r ----------------" echo "---------------- plot_pdfs.r ----------------
" >> $log -echo "Rscript $dir/shm_csr.r $outdir/pdfplots.RData $outdir 2>&1" +echo "Rscript $dir/shm_csr.r $outdir/pdfplots.RData $outdir" -Rscript $dir/plot_pdf.r "$outdir/pdfplots.RData" "$outdir" 2>&1 +Rscript $dir/plot_pdf.r "$outdir/pdfplots.RData" "$outdir" echo "---------------- shm_csr.py ----------------" echo "---------------- shm_csr.py ----------------
" >> $log @@ -131,7 +151,11 @@ echo "---------------- aa_histogram.r ----------------" echo "---------------- aa_histogram.r ----------------
" >> $log -Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "IGA,IGG,IGM,IGE" $outdir/ 2>&1 +Rscript $dir/aa_histogram.r \ + $outdir/aa_id_mutations.txt \ + $outdir/absent_aa_id.txt "IGA,IGG,IGM,IGE" \ + $outdir/ + if [ -e "$outdir/aa_histogram_.png" ]; then mv $outdir/aa_histogram_.png $outdir/aa_histogram.png mv $outdir/aa_histogram_.pdf $outdir/aa_histogram.pdf @@ -153,7 +177,12 @@ python $dir/sequence_overview.py --before-unique $outdir/before_unique_filter.txt \ --outdir $outdir/sequence_overview --empty-region-filter ${empty_region_filter} -Rscript $dir/nt_overview.r $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt ${empty_region_filter} 2>&1 +Rscript $dir/nt_overview.r \ + $outdir/merged.txt \ + $outdir/sequence_overview \ + $classes \ + $outdir/hotspot_analysis_sum.txt \ + ${empty_region_filter} echo "" > $outdir/base_overview.html @@ -198,12 +227,17 @@ echo "---------------- pattern_plots.r ----------------" echo "---------------- pattern_plots.r ----------------
" >> $log - Rscript $dir/pattern_plots.r $outdir/data_${func}.txt $outdir/aid_motives $outdir/relative_mutations $outdir/absolute_mutations $outdir/shm_overview.txt 2>&1 + Rscript $dir/pattern_plots.r \ + $outdir/data_${func}.txt \ + $outdir/aid_motives \ + $outdir/relative_mutations \ + $outdir/absolute_mutations \ + $outdir/shm_overview.txt echo "
" >> $output echo "" >> $output - if [ "${class_filter}" != "101_101" ] ; then + if [ "${class_filter}" != "101_101_all" ] ; then for gene in ${genes[@]} do @@ -397,41 +431,41 @@ echo "

${header_substring}

" >> $output mkdir $outdir/baseline/IGA_IGG_IGM - if [[ "$(count_imgt_lines $outdir/new_IMGT.txz)" -gt "1" ]]; then + if [[ "$(count_imgt_lines $outdir/${NEW_IMGT_PREFIX}.txz)" -gt "1" ]]; then cd $outdir/baseline/IGA_IGG_IGM - bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT.txz "IGA_IGG_IGM_IGE" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt" + bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/${NEW_IMGT_PREFIX}.txz "IGA_IGG_IGM_IGE" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt" else echo "No sequences" > "$outdir/baseline.txt" fi mkdir $outdir/baseline/IGA - if [[ "$(count_imgt_lines $outdir/new_IMGT_IGA.txz)" -gt "1" ]]; then + if [[ "$(count_imgt_lines $outdir/${NEW_IMGT_PREFIX}_IGA.txz)" -gt "1" ]]; then cd $outdir/baseline/IGA - bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT_IGA.txz "IGA" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGA.pdf" "Sequence.ID" "$outdir/baseline_IGA.txt" + bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/${NEW_IMGT_PREFIX}_IGA.txz "IGA" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGA.pdf" "Sequence.ID" "$outdir/baseline_IGA.txt" else echo "No IGA sequences" > "$outdir/baseline_IGA.txt" fi mkdir $outdir/baseline/IGG - if [[ "$(count_imgt_lines $outdir/new_IMGT_IGG.txz)" -gt "1" ]]; then + if [[ "$(count_imgt_lines $outdir/${NEW_IMGT_PREFIX}_IGG.txz)" -gt "1" ]]; then cd $outdir/baseline/IGG - bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT_IGG.txz "IGG" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGG.pdf" "Sequence.ID" "$outdir/baseline_IGG.txt" + bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/${NEW_IMGT_PREFIX}_IGG.txz "IGG" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGG.pdf" "Sequence.ID" "$outdir/baseline_IGG.txt" else echo "No IGG sequences" > "$outdir/baseline_IGG.txt" fi mkdir $outdir/baseline/IGM - if [[ "$(count_imgt_lines $outdir/new_IMGT_IGM.txz)" -gt "1" ]]; then + if [[ "$(count_imgt_lines $outdir/${NEW_IMGT_PREFIX}_IGM.txz)" -gt "1" ]]; then cd $outdir/baseline/IGM - bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT_IGM.txz "IGM" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGM.pdf" "Sequence.ID" "$outdir/baseline_IGM.txt" + bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/${NEW_IMGT_PREFIX}_IGM.txz "IGM" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGM.pdf" "Sequence.ID" "$outdir/baseline_IGM.txt" else echo "No IGM sequences" > "$outdir/baseline_IGM.txt" fi mkdir $outdir/baseline/IGE - if [[ "$(count_imgt_lines $outdir/new_IMGT_IGE.txz)" -gt "1" ]]; then + if [[ "$(count_imgt_lines $outdir/${NEW_IMGT_PREFIX}_IGE.txz)" -gt "1" ]]; then cd $outdir/baseline/IGE - bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT_IGE.txz "IGE" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGE.pdf" "Sequence.ID" "$outdir/baseline_IGE.txt" + bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/${NEW_IMGT_PREFIX}_IGE.txz "IGE" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline_IGE.pdf" "Sequence.ID" "$outdir/baseline_IGE.txt" else echo "No IGE sequences" > "$outdir/baseline_IGE.txt" fi @@ -498,24 +532,32 @@ cd $outdir/change_o - bash $dir/change_o/makedb.sh $outdir/new_IMGT.txz false false false $outdir/change_o/change-o-db.txt + bash $dir/change_o/makedb.sh $outdir/${NEW_IMGT_PREFIX}.txz false false false $outdir/change_o/change-o-db.txt bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-defined_clones-summary.txt - Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-db-defined_first_clones.txt 2>&1 + Rscript $dir/change_o/select_first_in_clone.r \ + $outdir/change_o/change-o-db-defined_clones.txt \ + $outdir/change_o/change-o-db-defined_first_clones.txt - python $dir/split_imgt_file.py --outdir $outdir --prefix new_IMGT_first_seq_of_clone \ - $outdir/new_IMGT.txz $outdir/change_o/change-o-db-defined_first_clones.txt \ + python $dir/split_imgt_file.py --outdir $outdir --prefix ${NEW_IMGT_PREFIX}_first_seq_of_clone \ + $outdir/${NEW_IMGT_PREFIX}.txz $outdir/change_o/change-o-db-defined_first_clones.txt \ "-" - Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/merged.txt "all" "Sequence.ID,best_match" "SEQUENCE_ID" "Sequence.ID" $outdir/change_o/change-o-db-defined_clones.txt 2>&1 - echo "Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/$outdir/merged.txt 'all' 'Sequence.ID,best_match' 'Sequence.ID' 'Sequence.ID' '\t' $outdir/change_o/change-o-db-defined_clones.txt 2>&1" + Rscript $dir/merge.r \ + $outdir/change_o/change-o-db-defined_clones.txt \ + $outdir/merged.txt \ + "all" "Sequence.ID,best_match" "SEQUENCE_ID" "Sequence.ID" \ + $outdir/change_o/change-o-db-defined_clones.txt + echo "Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/$outdir/merged.txt 'all' 'Sequence.ID,best_match' 'Sequence.ID' 'Sequence.ID' '\t' $outdir/change_o/change-o-db-defined_clones.txt" - if [[ "$(count_imgt_lines $outdir/new_IMGT_IGA.txz)" -gt "1" ]]; then - bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGA.txz false false false $outdir/change_o/change-o-db-IGA.txt + if [[ "$(count_imgt_lines $outdir/${NEW_IMGT_PREFIX}_IGA.txz)" -gt "1" ]]; then + bash $dir/change_o/makedb.sh $outdir/${NEW_IMGT_PREFIX}_IGA.txz false false false $outdir/change_o/change-o-db-IGA.txt bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGA.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGA.txt $outdir/change_o/change-o-defined_clones-summary-IGA.txt - Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones-IGA.txt $outdir/change_o/change-o-db-defined_first_clones-IGA.txt 2>&1 + Rscript $dir/change_o/select_first_in_clone.r \ + $outdir/change_o/change-o-db-defined_clones-IGA.txt \ + $outdir/change_o/change-o-db-defined_first_clones-IGA.txt - python $dir/split_imgt_file.py --outdir $outdir --prefix new_IMGT_IGA_first_seq_of_clone \ - $outdir/new_IMGT.txz $outdir/change_o/change-o-db-defined_first_clones-IGA.txt \ + python $dir/split_imgt_file.py --outdir $outdir --prefix ${NEW_IMGT_PREFIX}_IGA_first_seq_of_clone \ + $outdir/${NEW_IMGT_PREFIX}.txz $outdir/change_o/change-o-db-defined_first_clones-IGA.txt \ "-" else @@ -523,13 +565,15 @@ echo "No IGA sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGA.txt" fi - if [[ "$(count_imgt_lines $outdir/new_IMGT_IGG.txz)" -gt "1" ]]; then - bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGG.txz false false false $outdir/change_o/change-o-db-IGG.txt + if [[ "$(count_imgt_lines $outdir/${NEW_IMGT_PREFIX}_IGG.txz)" -gt "1" ]]; then + bash $dir/change_o/makedb.sh $outdir/${NEW_IMGT_PREFIX}_IGG.txz false false false $outdir/change_o/change-o-db-IGG.txt bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGG.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGG.txt $outdir/change_o/change-o-defined_clones-summary-IGG.txt - Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones-IGG.txt $outdir/change_o/change-o-db-defined_first_clones-IGG.txt 2>&1 + Rscript $dir/change_o/select_first_in_clone.r \ + $outdir/change_o/change-o-db-defined_clones-IGG.txt \ + $outdir/change_o/change-o-db-defined_first_clones-IGG.txt - python $dir/split_imgt_file.py --outdir $outdir --prefix new_IMGT_IGG_first_seq_of_clone \ - $outdir/new_IMGT.txz $outdir/change_o/change-o-db-defined_first_clones-IGG.txt \ + python $dir/split_imgt_file.py --outdir $outdir --prefix ${NEW_IMGT_PREFIX}_IGG_first_seq_of_clone \ + $outdir/${NEW_IMGT_PREFIX}.txz $outdir/change_o/change-o-db-defined_first_clones-IGG.txt \ "-" else @@ -537,13 +581,15 @@ echo "No IGG sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGG.txt" fi - if [[ "$(count_imgt_lines $outdir/new_IMGT_IGM.txz)" -gt "1" ]]; then - bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGM.txz false false false $outdir/change_o/change-o-db-IGM.txt + if [[ "$(count_imgt_lines $outdir/${NEW_IMGT_PREFIX}_IGM.txz)" -gt "1" ]]; then + bash $dir/change_o/makedb.sh $outdir/${NEW_IMGT_PREFIX}_IGM.txz false false false $outdir/change_o/change-o-db-IGM.txt bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGM.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGM.txt $outdir/change_o/change-o-defined_clones-summary-IGM.txt - Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones-IGM.txt $outdir/change_o/change-o-db-defined_first_clones-IGM.txt 2>&1 + Rscript $dir/change_o/select_first_in_clone.r \ + $outdir/change_o/change-o-db-defined_clones-IGM.txt \ + $outdir/change_o/change-o-db-defined_first_clones-IGM.txt - python $dir/split_imgt_file.py --outdir $outdir --prefix new_IMGT_IGM_first_seq_of_clone \ - $outdir/new_IMGT.txz $outdir/change_o/change-o-db-defined_first_clones-IGM.txt \ + python $dir/split_imgt_file.py --outdir $outdir --prefix ${NEW_IMGT_PREFIX}_IGM_first_seq_of_clone \ + $outdir/${NEW_IMGT_PREFIX}.txz $outdir/change_o/change-o-db-defined_first_clones-IGM.txt \ "-" else @@ -551,13 +597,15 @@ echo "No IGM sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGM.txt" fi - if [[ "$(count_imgt_lines $outdir/new_IMGT_IGE.txz)" -gt "1" ]]; then - bash $dir/change_o/makedb.sh $outdir/new_IMGT_IGE.txz false false false $outdir/change_o/change-o-db-IGE.txt + if [[ "$(count_imgt_lines $outdir/${NEW_IMGT_PREFIX}_IGE.txz)" -gt "1" ]]; then + bash $dir/change_o/makedb.sh $outdir/${NEW_IMGT_PREFIX}_IGE.txz false false false $outdir/change_o/change-o-db-IGE.txt bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGE.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGE.txt $outdir/change_o/change-o-defined_clones-summary-IGE.txt - Rscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones-IGE.txt $outdir/change_o/change-o-db-defined_first_clones-IGE.txt 2>&1 + Rscript $dir/change_o/select_first_in_clone.r \ + $outdir/change_o/change-o-db-defined_clones-IGE.txt \ + $outdir/change_o/change-o-db-defined_first_clones-IGE.txt - python $dir/split_imgt_file.py --outdir $outdir --prefix new_IMGT_IGE_first_seq_of_clone \ - $outdir/new_IMGT.txz $outdir/change_o/change-o-db-defined_first_clones-IGE.txt \ + python $dir/split_imgt_file.py --outdir $outdir --prefix ${NEW_IMGT_PREFIX}_IGE_first_seq_of_clone \ + $outdir/${NEW_IMGT_PREFIX}.txz $outdir/change_o/change-o-db-defined_first_clones-IGE.txt \ "-" else @@ -714,38 +762,38 @@ echo "" >> $output echo "" >> $output echo "" >> $output -echo "" >> $output +echo "" >> $output echo "" >> $output echo "" >> $output -echo "" >> $output +echo "" >> $output echo "" >> $output echo "" >> $output -echo "" >> $output +echo "" >> $output echo "" >> $output echo "" >> $output -echo "" >> $output +echo "" >> $output echo "" >> $output echo "" >> $output -echo "" >> $output +echo "" >> $output echo "" >> $output -echo "" >> $output -echo "" >> $output -echo "" >> $output -echo "" >> $output -echo "" >> $output -echo "" >> $output -echo "" >> $output -echo "" >> $output -echo "" >> $output -echo "" >> $output -echo "" >> $output -echo "" >> $output -echo "" >> $output +echo "" >> $output +echo "" >> $output +echo "" >> $output +echo "" >> $output +echo "" >> $output +echo "" >> $output +echo "" >> $output +echo "" >> $output +echo "" >> $output +echo "" >> $output +echo "" >> $output +echo "" >> $output +echo "" >> $output echo "
info
Sequence overlap between subclassesView
The Change-O DB file with defined clones and subclass annotationDownload
The Change-O DB defined clones summary fileDownload
An IMGT archive with just just the first sequence of a cloneDownload
An IMGT archive with just just the first sequence of a cloneDownload
The Change-O DB file with defined clones of IGADownload
The Change-O DB defined clones summary file of IGADownload
An IMGT archive with just just the first sequence of a clone (IGA)Download
An IMGT archive with just just the first sequence of a clone (IGA)Download
The Change-O DB file with defined clones of IGGDownload
The Change-O DB defined clones summary file of IGGDownload
An IMGT archive with just just the first sequence of a clone (IGG)Download
An IMGT archive with just just the first sequence of a clone (IGG)Download
The Change-O DB file with defined clones of IGMDownload
The Change-O DB defined clones summary file of IGMDownload
An IMGT archive with just just the first sequence of a clone (IGM)Download
An IMGT archive with just just the first sequence of a clone (IGM)Download
The Change-O DB file with defined clones of IGEDownload
The Change-O DB defined clones summary file of IGEDownload
An IMGT archive with just just the first sequence of a clone (IGE)Download
An IMGT archive with just just the first sequence of a clone (IGE)Download
Filtered IMGT output files
An IMGT archive with just the matched and filtered sequencesDownload
An IMGT archive with just the matched and filtered IGA sequencesDownload
An IMGT archive with just the matched and filtered IGA1 sequencesDownload
An IMGT archive with just the matched and filtered IGA2 sequencesDownload
An IMGT archive with just the matched and filtered IGG sequencesDownload
An IMGT archive with just the matched and filtered IGG1 sequencesDownload
An IMGT archive with just the matched and filtered IGG2 sequencesDownload
An IMGT archive with just the matched and filtered IGG3 sequencesDownload
An IMGT archive with just the matched and filtered IGG4 sequencesDownload
An IMGT archive with just the matched and filtered IGM sequencesDownload
An IMGT archive with just the matched and filtered IGE sequencesDownload
An IMGT archive with just the matched and filtered naive IGM sequences (mutations below 2%)Download
An IMGT archive with just the matched and filtered naive memory IGM sequences (mutations 2% or higher)Download
An IMGT archive with just the matched and filtered sequencesDownload
An IMGT archive with just the matched and filtered IGA sequencesDownload
An IMGT archive with just the matched and filtered IGA1 sequencesDownload
An IMGT archive with just the matched and filtered IGA2 sequencesDownload
An IMGT archive with just the matched and filtered IGG sequencesDownload
An IMGT archive with just the matched and filtered IGG1 sequencesDownload
An IMGT archive with just the matched and filtered IGG2 sequencesDownload
An IMGT archive with just the matched and filtered IGG3 sequencesDownload
An IMGT archive with just the matched and filtered IGG4 sequencesDownload
An IMGT archive with just the matched and filtered IGM sequencesDownload
An IMGT archive with just the matched and filtered IGE sequencesDownload
An IMGT archive with just the matched and filtered naive IGM sequences (mutations below 2%)Download
An IMGT archive with just the matched and filtered naive memory IGM sequences (mutations 2% or higher)Download
" >> $output echo "
" >> $output @@ -764,16 +812,16 @@ if [[ "$naive_output" == "yes" ]] then echo "output naive output" - if [[ "${class_filter}" == "101_101" ]] + if [[ "${class_filter}" == "101_101_all" ]] then - echo "copy new_IMGT.txz to ${naive_output_all}" - cp $outdir/new_IMGT.txz ${naive_output_all} + echo "copy ${NEW_IMGT_PREFIX}.txz to ${naive_output_all}" + cp $outdir/${NEW_IMGT_PREFIX}.txz ${naive_output_all} else echo "copy for classes" - cp $outdir/new_IMGT_IGA.txz ${naive_output_ca} - cp $outdir/new_IMGT_IGG.txz ${naive_output_cg} - cp $outdir/new_IMGT_IGM.txz ${naive_output_cm} - cp $outdir/new_IMGT_IGE.txz ${naive_output_ce} + cp $outdir/${NEW_IMGT_PREFIX}_IGA.txz ${naive_output_ca} + cp $outdir/${NEW_IMGT_PREFIX}_IGG.txz ${naive_output_cg} + cp $outdir/${NEW_IMGT_PREFIX}_IGM.txz ${naive_output_cm} + cp $outdir/${NEW_IMGT_PREFIX}_IGE.txz ${naive_output_ce} fi fi