Repository 'shm_csr'
hg clone https://radegast.galaxyproject.org/repos/davidvanzessen/shm_csr

Changeset 55:6cd12c71c3d3 (2017-06-14)
Previous changeset 54:ba3220f921af (2017-05-30) Next changeset 56:ee807645b224 (2017-07-17)
Commit message:
Uploaded
modified:
merge_and_filter.r
new_imgt.r
wrapper.sh
added:
change_o/select_first_in_clone.r
b
diff -r ba3220f921af -r 6cd12c71c3d3 change_o/select_first_in_clone.r
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/change_o/select_first_in_clone.r Wed Jun 14 11:14:00 2017 -0400
[
@@ -0,0 +1,16 @@
+args <- commandArgs(trailingOnly = TRUE)
+
+input.file = args[1]
+output.file = args[2]
+
+print("select_in_first_clone.r")
+print(input.file)
+print(output.file)
+
+input = read.table(input.file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
+
+input = input[!duplicated(input$CLONE),]
+
+names(input)[1] = "Sequence.ID"
+
+write.table(input, output.file, quote=F, sep="\t", row.names=F, col.names=T, na="")
b
diff -r ba3220f921af -r 6cd12c71c3d3 merge_and_filter.r
--- a/merge_and_filter.r Tue May 30 07:40:15 2017 -0400
+++ b/merge_and_filter.r Wed Jun 14 11:14:00 2017 -0400
[
@@ -97,9 +97,9 @@
 
 filtering.steps = rbind(filtering.steps, c("After functionality filter", nrow(summ)))
 
-if(FALSE){ #to speed up debugging
+if(F){ #to speed up debugging
     set.seed(1)
-    summ = summ[sample(nrow(summ), floor(nrow(summ) * 0.1)),]
+    summ = summ[sample(nrow(summ), floor(nrow(summ) * 0.05)),]
     print(paste("Number of sequences after sampling 5%:", nrow(summ)))
 
     filtering.steps = rbind(filtering.steps, c("Number of sequences after sampling 5%", nrow(summ)))
b
diff -r ba3220f921af -r 6cd12c71c3d3 new_imgt.r
--- a/new_imgt.r Tue May 30 07:40:15 2017 -0400
+++ b/new_imgt.r Wed Jun 14 11:14:00 2017 -0400
[
@@ -8,15 +8,15 @@
 
 if(gene != "-"){
  merged = merged[grepl(paste("^", gene, sep=""), merged$best_match),]
-} else {
+}
+
+if("best_match" %in% names(merged)){
  merged = merged[!grepl("unmatched", merged$best_match),]
 }
 
-merged = merged[!grepl("unmatched", merged$best_match),]
-
 for(f in list.files(imgt.dir, pattern="*.txt$")){
  #print(paste("filtering", f))
- path = paste(imgt.dir, f, sep="")
+ path = file.path(imgt.dir, f)
  dat = read.table(path, header=T, sep="\t", fill=T, quote="", stringsAsFactors=F, check.names=FALSE, comment.char="")
 
  dat = dat[dat[,"Sequence ID"] %in% merged$Sequence.ID,]
b
diff -r ba3220f921af -r 6cd12c71c3d3 wrapper.sh
--- a/wrapper.sh Tue May 30 07:40:15 2017 -0400
+++ b/wrapper.sh Wed Jun 14 11:14:00 2017 -0400
[
b'@@ -555,22 +555,58 @@\n \n \tbash $dir/change_o/makedb.sh $outdir/new_IMGT.txz false false false $outdir/change_o/change-o-db.txt\n \tbash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-defined_clones-summary.txt\n-\n+\tRscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-db-defined_first_clones.txt 2>&1\n+\t\n+\tmkdir $outdir/new_IMGT_changeo\n+\tcp $outdir/new_IMGT/* $outdir/new_IMGT_changeo\n+\t\n+\tRscript $dir/new_imgt.r $outdir/new_IMGT_changeo $outdir/change_o/change-o-db-defined_first_clones.txt "-" 2>&1\n+\t\n+\tcd $outdir/new_IMGT_changeo\n+\ttar -cJf ../new_IMGT_first_seq_of_clone.txz *\n+\tcd $outdir/change_o\n+\t\n+\trm -rf $outdir/new_IMGT_changeo\n+\t\n \tRscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/merged.txt "all" "Sequence.ID,best_match" "SEQUENCE_ID" "Sequence.ID" $outdir/change_o/change-o-db-defined_clones.txt 2>&1\n-\n \techo "Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/$outdir/merged.txt \'all\' \'Sequence.ID,best_match\' \'Sequence.ID\' \'Sequence.ID\' \'\\t\' $outdir/change_o/change-o-db-defined_clones.txt 2>&1"\n-\n+\t\n \tif [[ $(wc -l < $outdir/new_IMGT_IGA/1_Summary.txt) -gt "1" ]]; then\n \t\tbash $dir/change_o/makedb.sh $outdir/new_IMGT_IGA.txz false false false $outdir/change_o/change-o-db-IGA.txt\n \t\tbash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGA.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGA.txt $outdir/change_o/change-o-defined_clones-summary-IGA.txt\n+\t\tRscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones-IGA.txt $outdir/change_o/change-o-db-defined_first_clones-IGA.txt 2>&1\n+\t\t\n+\t\tmkdir $outdir/new_IMGT_IGA_changeo\n+\t\tcp $outdir/new_IMGT/* $outdir/new_IMGT_IGA_changeo\n+\t\t\n+\t\tRscript $dir/new_imgt.r $outdir/new_IMGT_IGA_changeo $outdir/change_o/change-o-db-defined_first_clones-IGA.txt "-" 2>&1\n+\t\t\n+\t\tcd $outdir/new_IMGT_IGA_changeo\n+\t\ttar -cJf ../new_IMGT_IGA_first_seq_of_clone.txz *\n+\t\t\n+\t\trm -rf $outdir/new_IMGT_IGA_changeo\n+\t\t\n+\t\tcd $outdir/change_o\n \telse\n \t\techo "No IGA sequences" > "$outdir/change_o/change-o-db-defined_clones-IGA.txt"\n \t\techo "No IGA sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGA.txt"\n \tfi\n-\n+\t\n \tif [[ $(wc -l < $outdir/new_IMGT_IGG/1_Summary.txt) -gt "1" ]]; then\n \t\tbash $dir/change_o/makedb.sh $outdir/new_IMGT_IGG.txz false false false $outdir/change_o/change-o-db-IGG.txt\n \t\tbash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGG.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGG.txt $outdir/change_o/change-o-defined_clones-summary-IGG.txt\n+\t\tRscript $dir/change_o/select_first_in_clone.r $outdir/change_o/change-o-db-defined_clones-IGG.txt $outdir/change_o/change-o-db-defined_first_clones-IGG.txt 2>&1\n+\t\t\n+\t\tmkdir $outdir/new_IMGT_IGG_changeo\n+\t\tcp $outdir/new_IMGT/* $outdir/new_IMGT_IGG_changeo\n+\t\t\n+\t\tRscript $dir/new_imgt.r $outdir/new_IMGT_IGG_changeo $outdir/change_o/change-o-db-defined_first_clones-IGG.txt "-" 2>&1\n+\t\t\n+\t\tcd $outdir/new_IMGT_IGG_changeo\n+\t\ttar -cJf ../new_IMGT_IGG_first_seq_of_clone.txz *\n+\t\trm -rf $outdir/new_IMGT_IGG_changeo\n+\t\t\n+\t\tcd $outdir/change_o\n \telse\n \t\techo "No IGG sequences" > "$outdir/change_o/change-o-db-defined_clones-IGG.txt"\n \t\techo "No IGG sequences" > "$outdir/change_o/change-o-defined_clones-summary-IGG.txt"\n@@ -579,6 +615,19 @@\n \tif [[ $(wc -l < $outdir/new_IMGT_IGM/1_Summary.txt) -gt "1" ]]; then\n \t\tbash $dir/change_o/makedb.sh $outdir/new_IMGT_IGM.txz false false false $outdir/change_o/change-o-db-IGM.txt\n \t\tbash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-IGM.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-IGM.txt $outdir/change_o/change-o-defined_clones-summary-IGM.txt\n+\t'..b'd</a></td></tr>" >> $output\n echo "<tr><td>The data for the IGG subclass distribution plot</td><td><a href=\'IGG_pie.txt\' download=\'IGG_pie.txt\' >Download</a></td></tr>" >> $output\n \n+\n echo "<tr><td colspan=\'2\' style=\'background-color:#E0E0E0;\'>Clonal Relation</td></tr>" >> $output\n echo "<tr><td>Sequence overlap between subclasses</td><td><a href=\'sequence_overview/index.html\'>View</a></td></tr>" >> $output\n echo "<tr><td>The Change-O DB file with defined clones and subclass annotation</td><td><a href=\'change_o/change-o-db-defined_clones.txt\' download=\'change_o/change-o-db-defined_clones.txt\' >Download</a></td></tr>" >> $output\n echo "<tr><td>The Change-O DB defined clones summary file</td><td><a href=\'change_o/change-o-defined_clones-summary.txt\' download=\'change_o/change-o-defined_clones-summary.txt\' >Download</a></td></tr>" >> $output\n+echo "<tr><td>An IMGT archive with just just the first sequence of a clone</td><td><a href=\'new_IMGT_first_seq_of_clone.txz\' download=\'new_IMGT_first_seq_of_clone.txz\' >Download</a></td></tr>" >> $output\n+\n echo "<tr><td>The Change-O DB file with defined clones of IGA</td><td><a href=\'change_o/change-o-db-defined_clones-IGA.txt\' download=\'change_o/change-o-db-defined_clones-IGA.txt\' >Download</a></td></tr>" >> $output\n echo "<tr><td>The Change-O DB defined clones summary file of IGA</td><td><a href=\'change_o/change-o-defined_clones-summary-IGA.txt\' download=\'change_o/change-o-defined_clones-summary-IGA.txt\' >Download</a></td></tr>" >> $output\n+echo "<tr><td>An IMGT archive with just just the first sequence of a clone (IGA)</td><td><a href=\'new_IMGT_IGA_first_seq_of_clone.txz\' download=\'new_IMGT_IGA_first_seq_of_clone.txz\' >Download</a></td></tr>" >> $output\n+\n echo "<tr><td>The Change-O DB file with defined clones of IGG</td><td><a href=\'change_o/change-o-db-defined_clones-IGG.txt\' download=\'change_o/change-o-db-defined_clones-IGG.txt\' >Download</a></td></tr>" >> $output\n echo "<tr><td>The Change-O DB defined clones summary file of IGG</td><td><a href=\'change_o/change-o-defined_clones-summary-IGG.txt\' download=\'change_o/change-o-defined_clones-summary-IGG.txt\' >Download</a></td></tr>" >> $output\n+echo "<tr><td>An IMGT archive with just just the first sequence of a clone (IGG)</td><td><a href=\'new_IMGT_IGG_first_seq_of_clone.txz\' download=\'new_IMGT_IGG_first_seq_of_clone.txz\' >Download</a></td></tr>" >> $output\n+\n echo "<tr><td>The Change-O DB file with defined clones of IGM</td><td><a href=\'change_o/change-o-db-defined_clones-IGM.txt\' download=\'change_o/change-o-db-defined_clones-IGM.txt\' >Download</a></td></tr>" >> $output\n echo "<tr><td>The Change-O DB defined clones summary file of IGM</td><td><a href=\'change_o/change-o-defined_clones-summary-IGM.txt\' download=\'change_o/change-o-defined_clones-summary-IGM.txt\' >Download</a></td></tr>" >> $output\n+echo "<tr><td>An IMGT archive with just just the first sequence of a clone (IGM)</td><td><a href=\'new_IMGT_IGM_first_seq_of_clone.txz\' download=\'new_IMGT_IGM_first_seq_of_clone.txz\' >Download</a></td></tr>" >> $output\n+\n echo "<tr><td>The Change-O DB file with defined clones of IGE</td><td><a href=\'change_o/change-o-db-defined_clones-IGE.txt\' download=\'change_o/change-o-db-defined_clones-IGE.txt\' >Download</a></td></tr>" >> $output\n echo "<tr><td>The Change-O DB defined clones summary file of IGE</td><td><a href=\'change_o/change-o-defined_clones-summary-IGE.txt\' download=\'change_o/change-o-defined_clones-summary-IGE.txt\' >Download</a></td></tr>" >> $output\n+echo "<tr><td>An IMGT archive with just just the first sequence of a clone (IGE)</td><td><a href=\'new_IMGT_IGE_first_seq_of_clone.txz\' download=\'new_IMGT_IGE_first_seq_of_clone.txz\' >Download</a></td></tr>" >> $output\n \n echo "<tr><td colspan=\'2\' style=\'background-color:#E0E0E0;\'>Filtered IMGT output files</td></tr>" >> $output\n echo "<tr><td>An IMGT archive with just the matched and filtered sequences</td><td><a href=\'new_IMGT.txz\' download=\'new_IMGT.txz\' >Download</a></td></tr>" >> $output\n'