diff conc.awk @ 135:21bb464c1d53 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 78bf7abb931bf3d348837c7211cd3cff32486691
author fubar
date Sun, 15 Dec 2024 23:47:40 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/conc.awk	Sun Dec 15 23:47:40 2024 +0000
@@ -0,0 +1,57 @@
+#!/bin/awk -f
+# input must be paf
+# pipe to sort -n -r -k2
+# few duplicates in a 14M row paf converted from a paired bam from VGP mUroPar1
+{
+    if (($1,$3,$6,$8) in concordance)
+        { concordance[$1,$3,$6,$8]++ }
+    else
+        { concordance[$1,$3,$6,$8] = 1 }
+ }
+ END {
+    for (k in concordance) {print k, concordance[k]}
+ }
+
+
+
+
+## PAF specification
+## 
+## Col	Type	Description
+## 1	string	Query sequence name
+## 2	int	Query sequence length
+## 3	int	Query start (0-based)
+## 4	int	Query end (0-based)
+## 5	char	Relative strand: "+" or "-"
+## 6	string	Target sequence name
+## 7	int	Target sequence length
+## 8	int	Target start on original strand (0-based)
+## 9	int	Target end on original strand (0-based)
+## 10	int	Number of residue matches
+## 11	int	Alignment block length
+## 12	int	Mapping quality (0-255; 255 for missing)
+## 
+## 
+## chr10	135711693	0	410000	+	chr10	135912142	60	410218	18	410158	32	id:f:0.999395	kc:f:1.01221
+## chr10	135711693	415000	1305000	+	chr10	135912142	412798	1295808	17	890000	28	id:f:0.998493	kc:f:0.971423
+## chr10	135711693	1310000	2395000	+	chr10	135912142	1300732	2385827	15	1085095	30	id:f:0.998896	kc:f:1.02767
+## chr10	135711693	2400000	25440000	+	chr10	135912142	2385341	25429367	18	23044026	29	id:f:0.998716	kc:f:1.04082
+## chr10	135711693	25445000	26735000	+	chr10	135912142	25429997	26720864	13	1290867	28	id:f:0.998239	kc:f:1.05397
+## chr10	135711693	26740000	37190000	+	chr10	135912142	26722183	37175820	20	10453637	29	id:f:0.998842	kc:f:1.0341
+## chr10	135711693	37185000	37195000	-	chr10	135912142	37175523	37185020	13	10000	22	id:f:0.993765	kc:f:1.02691
+## chr10	135711693	37195000	37285000	+	chr10	135912142	37181742	37271716	18	90000	27	id:f:0.998021	kc:f:1.02883
+## chr10	135711693	37285000	38650000	+	chr10	135912142	37283684	38647751	18	1365000	33	id:f:0.999466	kc:f:1.03306
+## 
+## #E00526:120:HYYLYCCXY:8:1206:18842:33182 179 chr10:0-100000H1 23129 0 150M chr11:0-100000H2 88118 0 
+## #ATTATCTCAGCAGATGCAGAAAAGGCCTTCAATAAAAGACAACACCCCTTCAGGTTAAAAACACTCAATTAACTAGGTGTTCATGGGACATACCTCAAAATAATAAGAGCCATTTATGACAAACCCACAGACAATATCATGCTGAATGGG 
+## #<<F7---<<JJJFJJF-7<FFJAA7A--<<F<-A7AAA-<F777--F<-77<F--7A-AAJA-<-JJ-JFFA<-<<JAJFF<FF--AJF7FFJ<-JJFFJ7F--AA--J<F-A<JF7<AFFF-<<<FAFF-AFA7F-JF7FJAJJF<AA< NM:i:19 MD:Z:9A28T5T7T2C6T6A6C1A2G1A2A0T2C21C0A16C9A7A1 AS:i:58 
+## #XS:i:58 XA:Z:chr10:0-100000H2,-22753,150M,19;chr14:0-100000H1,+33583,150M,19;chr13:0-100000H1,+48625,150M,19;chr13:0-100000H2,+35689,150M,20; E00526:120:HYYLYCCXY:8:2223:11495:36241 147 chr10:0-100000H1 30113 0 150M 
+## #chr10:0-100000H2 29642 0 AGAGCGCCTCTCCTCCTCCAAAGGAACACAGTTCCTCACCAGCAACAGAACAAAGCTGGACGGAGAATGACTTTGACGAGTTGAGAGAAGAAGGCTTCAGACGATCAAACTACTCCGAGCTACAGGAGGAAATTCAAACCAAAGGCAAAC 
+## #-<A-FF7--7-<7-A-AJJJAF7JJJFAA-FF-AFFFJJJFJJJFFAAAFJAJJJFJJJJFJFJA7AAFJF<<<A<JF7FJAAAFJJ<JJJ7<JJJJAAJJJFJJFJF<<F7JAAJJFJFJJ<AJAFJJJJJJAJJ7AJF<<-AFFF<A< NM:i:13 MD:Z:26T0G17T0G13T6C11A0C28T5T7G6C18G0 AS:i:89 XS:i:89 
+## #XA:Z:chr10:0-100000H2,-29737,150M,13; ...
+## #columns: readID chr1 pos1 chr2 pos2 strand1 strand2
+## #EAS139:136:FC706VJ:2:2104:23462:197393 chr1 10000 chr1 20000 + +
+## #EAS139:136:FC706VJ:2:8762:23765:128766 chr1 50000 chr1 70000 + +
+## #EAS139:136:FC706VJ:2:2342:15343:9863 chr1 60000 chr2 10000 + +
+## #EAS139:136:FC706VJ:2:1286:25:275154 chr1 30000 chr3 40000 + -
+## 
\ No newline at end of file