# HG changeset patch
# User davidvanzessen
# Date 1478702732 18000
# Node ID 4b695ca652137fdc74b08ea87059acd727b52e69
# Parent 372ccdcf0b2d5d3c4bc78be2d3c5d5dad27fdde4
Uploaded
diff -r 372ccdcf0b2d -r 4b695ca65213 merge_and_filter.r
--- a/merge_and_filter.r Tue Nov 08 07:32:54 2016 -0500
+++ b/merge_and_filter.r Wed Nov 09 09:45:32 2016 -0500
@@ -25,7 +25,7 @@
gene_identification = read.table(gene_identification_file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="")
if(method == "blastn"){
- "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore"
+ #"qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore"
gene_identification = gene_identification[!duplicated(gene_identification$qseqid),]
ref_length = data.frame(sseqid=c("ca1", "ca2", "cg1", "cg2", "cg3", "cg4", "cm"), ref.length=c(81,81,141,141,141,141,52))
gene_identification = merge(gene_identification, ref_length, by="sseqid", all.x=T)
@@ -144,12 +144,7 @@
higher_than=(result$chunk_hit_percentage >= chunk_hit_threshold & result$nt_hit_percentage >= nt_hit_threshold)
-unmatched=result[NULL,c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match")]
-
if(!all(higher_than, na.rm=T)){ #check for no unmatched
- unmatched = result[!higher_than,]
- unmatched = unmatched[,c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match")]
- unmatched$best_match = paste("unmatched,", unmatched$best_match)
result[!higher_than,"best_match"] = paste("unmatched,", result[!higher_than,"best_match"])
}
@@ -200,6 +195,8 @@
filtering.steps = rbind(filtering.steps, c("After remove duplicates based on filter", nrow(result)))
+unmatched = result[grepl("^unmatched", result$best_match),c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match")]
+
print(paste("Number of rows in result:", nrow(result)))
print(paste("Number of rows in unmatched:", nrow(unmatched)))
diff -r 372ccdcf0b2d -r 4b695ca65213 shm_csr.xml
--- a/shm_csr.xml Tue Nov 08 07:32:54 2016 -0500
+++ b/shm_csr.xml Wed Nov 09 09:45:32 2016 -0500
@@ -17,12 +17,12 @@
-
+
-
+
-
+
@@ -31,7 +31,7 @@
-
+