cpt_compare_gbk: cpt_gbk_compare/gbk

annotate cpt_gbk_compare/gbk_compare.py @ 0:fc603e665d75 draft default tip

Uploaded

author	cpt
date	Tue, 21 Jun 2022 19:46:32 +0000
parents
children

rev	line source
0 fc603e665d75 Uploaded cpt parents: diff changeset	1 #!/usr/bin/env python3
fc603e665d75 Uploaded cpt parents: diff changeset	2 """
fc603e665d75 Uploaded cpt parents: diff changeset	3 Copyright 2019 Ryan Wick (rrwick@gmail.com)
fc603e665d75 Uploaded cpt parents: diff changeset	4 https://github.com/rrwick/Compare-annotations/
fc603e665d75 Uploaded cpt parents: diff changeset	5 This program is free software: you can redistribute it and/or modify it under the terms of the GNU
fc603e665d75 Uploaded cpt parents: diff changeset	6 General Public License as published by the Free Software Foundation, either version 3 of the
fc603e665d75 Uploaded cpt parents: diff changeset	7 License, or (at your option) any later version.
fc603e665d75 Uploaded cpt parents: diff changeset	8 This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
fc603e665d75 Uploaded cpt parents: diff changeset	9 even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
fc603e665d75 Uploaded cpt parents: diff changeset	10 General Public License for more details.
fc603e665d75 Uploaded cpt parents: diff changeset	11 You should have received a copy of the GNU General Public License along with this program. If not,
fc603e665d75 Uploaded cpt parents: diff changeset	12 see <https://www.gnu.org/licenses/>.
fc603e665d75 Uploaded cpt parents: diff changeset	13 """
fc603e665d75 Uploaded cpt parents: diff changeset	14
fc603e665d75 Uploaded cpt parents: diff changeset	15 import argparse
fc603e665d75 Uploaded cpt parents: diff changeset	16 from Bio import SeqIO
fc603e665d75 Uploaded cpt parents: diff changeset	17 from Bio import pairwise2
fc603e665d75 Uploaded cpt parents: diff changeset	18 from Bio.pairwise2 import format_alignment
fc603e665d75 Uploaded cpt parents: diff changeset	19 import itertools
fc603e665d75 Uploaded cpt parents: diff changeset	20 import sys
fc603e665d75 Uploaded cpt parents: diff changeset	21
fc603e665d75 Uploaded cpt parents: diff changeset	22
fc603e665d75 Uploaded cpt parents: diff changeset	23 def addArr(arrA, arrB):
fc603e665d75 Uploaded cpt parents: diff changeset	24 res = []
fc603e665d75 Uploaded cpt parents: diff changeset	25 for x in range(0, min(len(arrA), len(arrB))):
fc603e665d75 Uploaded cpt parents: diff changeset	26 res.append(arrA[x] + arrB[x])
fc603e665d75 Uploaded cpt parents: diff changeset	27 return res
fc603e665d75 Uploaded cpt parents: diff changeset	28
fc603e665d75 Uploaded cpt parents: diff changeset	29 def get_arguments():
fc603e665d75 Uploaded cpt parents: diff changeset	30 parser = argparse.ArgumentParser(description='Compare GenBank annotations',
fc603e665d75 Uploaded cpt parents: diff changeset	31 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
fc603e665d75 Uploaded cpt parents: diff changeset	32
fc603e665d75 Uploaded cpt parents: diff changeset	33 parser.add_argument('annotation_1', type=str,
fc603e665d75 Uploaded cpt parents: diff changeset	34 help='First annotated genome in Genbank format')
fc603e665d75 Uploaded cpt parents: diff changeset	35 parser.add_argument('annotation_2', type=str,
fc603e665d75 Uploaded cpt parents: diff changeset	36 help='Second annotated genome in Genbank format')
fc603e665d75 Uploaded cpt parents: diff changeset	37
fc603e665d75 Uploaded cpt parents: diff changeset	38
fc603e665d75 Uploaded cpt parents: diff changeset	39 parser.add_argument('--match_identity_threshold', type=float, default=0.7,
fc603e665d75 Uploaded cpt parents: diff changeset	40 help='Two genes must have at least this identity to be considerd the same (0.0 to 1.0)')
fc603e665d75 Uploaded cpt parents: diff changeset	41 parser.add_argument('--allowed_skipped_genes', type=int, default=10,
fc603e665d75 Uploaded cpt parents: diff changeset	42 help='This many missing genes are allowed when aligning the annotations')
fc603e665d75 Uploaded cpt parents: diff changeset	43 parser.add_argument("--addNotes", action="store_true", help="Add Note fields")
fc603e665d75 Uploaded cpt parents: diff changeset	44
fc603e665d75 Uploaded cpt parents: diff changeset	45 parser.add_argument(
fc603e665d75 Uploaded cpt parents: diff changeset	46 "-sumOut", type=argparse.FileType("w"), help="Summary out file"
fc603e665d75 Uploaded cpt parents: diff changeset	47 )
fc603e665d75 Uploaded cpt parents: diff changeset	48 args = parser.parse_args()
fc603e665d75 Uploaded cpt parents: diff changeset	49 return args
fc603e665d75 Uploaded cpt parents: diff changeset	50
fc603e665d75 Uploaded cpt parents: diff changeset	51
fc603e665d75 Uploaded cpt parents: diff changeset	52 def main():
fc603e665d75 Uploaded cpt parents: diff changeset	53 args = get_arguments()
fc603e665d75 Uploaded cpt parents: diff changeset	54
fc603e665d75 Uploaded cpt parents: diff changeset	55 # Load in the CDS features from the two assemblies.
fc603e665d75 Uploaded cpt parents: diff changeset	56 old = SeqIO.parse(args.annotation_1, 'genbank')
fc603e665d75 Uploaded cpt parents: diff changeset	57 new = SeqIO.parse(args.annotation_2, 'genbank')
fc603e665d75 Uploaded cpt parents: diff changeset	58 old_record = next(old)
fc603e665d75 Uploaded cpt parents: diff changeset	59 new_record = next(new)
fc603e665d75 Uploaded cpt parents: diff changeset	60 old_features, new_features = [], []
fc603e665d75 Uploaded cpt parents: diff changeset	61 for f in old_record.features:
fc603e665d75 Uploaded cpt parents: diff changeset	62 if f.type == 'CDS':
fc603e665d75 Uploaded cpt parents: diff changeset	63 old_features.append(f)
fc603e665d75 Uploaded cpt parents: diff changeset	64 for f in new_record.features:
fc603e665d75 Uploaded cpt parents: diff changeset	65 if f.type == 'CDS':
fc603e665d75 Uploaded cpt parents: diff changeset	66 new_features.append(f)
fc603e665d75 Uploaded cpt parents: diff changeset	67
fc603e665d75 Uploaded cpt parents: diff changeset	68 args.sumOut.write('Features in First Genbank\'s assembly:\t' + str(len(old_features)) + "\n")
fc603e665d75 Uploaded cpt parents: diff changeset	69 args.sumOut.write('Features in Second Genbank\'s assembly:\t' + str(len(new_features)) + "\n\n")
fc603e665d75 Uploaded cpt parents: diff changeset	70
fc603e665d75 Uploaded cpt parents: diff changeset	71 # Align the features to each other.
fc603e665d75 Uploaded cpt parents: diff changeset	72 offsets = sorted(list(itertools.product(range(args.allowed_skipped_genes + 1),
fc603e665d75 Uploaded cpt parents: diff changeset	73 range(args.allowed_skipped_genes + 1))),
fc603e665d75 Uploaded cpt parents: diff changeset	74 key=lambda x: x[0]+x[1])
fc603e665d75 Uploaded cpt parents: diff changeset	75 old_i, new_i = 0, 0
fc603e665d75 Uploaded cpt parents: diff changeset	76 exactRec = 0
fc603e665d75 Uploaded cpt parents: diff changeset	77 inexactRec = [0, 0 ,0]
fc603e665d75 Uploaded cpt parents: diff changeset	78 hypoRec = [0, 0, 0]
fc603e665d75 Uploaded cpt parents: diff changeset	79 newCount = 0
fc603e665d75 Uploaded cpt parents: diff changeset	80 oldCount = 0
fc603e665d75 Uploaded cpt parents: diff changeset	81 if args.addNotes:
fc603e665d75 Uploaded cpt parents: diff changeset	82 print("First Record CDS Product\tSimilarity\tSecond Record CDS Product\tPercent Identity\tLength Difference\tFirst Gbk's CDS Location\tSecond Gbk's CDS Location\tHypothetical Status\tFirst Record's Notes\tSecond Record's Notes\n")
fc603e665d75 Uploaded cpt parents: diff changeset	83 else:
fc603e665d75 Uploaded cpt parents: diff changeset	84 print("First Record CDS Product\tSimilarity\tSecond Record CDS Product\tPercent Identity\tLength Difference\tFirst Gbk's CDS Location\tSecond Gbk's CDS Location\tHypothetical Status\n")
fc603e665d75 Uploaded cpt parents: diff changeset	85 while True:
fc603e665d75 Uploaded cpt parents: diff changeset	86 if old_i >= len(old_features) and new_i >= len(new_features):
fc603e665d75 Uploaded cpt parents: diff changeset	87 break
fc603e665d75 Uploaded cpt parents: diff changeset	88
fc603e665d75 Uploaded cpt parents: diff changeset	89 for old_offset, new_offset in offsets:
fc603e665d75 Uploaded cpt parents: diff changeset	90 try:
fc603e665d75 Uploaded cpt parents: diff changeset	91 old_feature = old_features[old_i + old_offset]
fc603e665d75 Uploaded cpt parents: diff changeset	92 except IndexError:
fc603e665d75 Uploaded cpt parents: diff changeset	93 old_feature = None
fc603e665d75 Uploaded cpt parents: diff changeset	94 try:
fc603e665d75 Uploaded cpt parents: diff changeset	95 new_feature = new_features[new_i + new_offset]
fc603e665d75 Uploaded cpt parents: diff changeset	96 except IndexError:
fc603e665d75 Uploaded cpt parents: diff changeset	97 new_feature = None
fc603e665d75 Uploaded cpt parents: diff changeset	98 try:
fc603e665d75 Uploaded cpt parents: diff changeset	99 match, identity, length_diff = compare_features(old_feature, new_feature, old_record, new_record, args.match_identity_threshold)
fc603e665d75 Uploaded cpt parents: diff changeset	100 except TypeError:
fc603e665d75 Uploaded cpt parents: diff changeset	101 break
fc603e665d75 Uploaded cpt parents: diff changeset	102 if match:
fc603e665d75 Uploaded cpt parents: diff changeset	103 for j in range(old_offset):
fc603e665d75 Uploaded cpt parents: diff changeset	104 print_in_old_not_new(old_features[old_i + j])
fc603e665d75 Uploaded cpt parents: diff changeset	105 oldCount += 1
fc603e665d75 Uploaded cpt parents: diff changeset	106 for j in range(new_offset):
fc603e665d75 Uploaded cpt parents: diff changeset	107 print_in_new_not_old(new_features[new_i + j])
fc603e665d75 Uploaded cpt parents: diff changeset	108 newCount += 1
fc603e665d75 Uploaded cpt parents: diff changeset	109 if identity == 1.0:
fc603e665d75 Uploaded cpt parents: diff changeset	110 exactRec += 1
fc603e665d75 Uploaded cpt parents: diff changeset	111 res1, res2 = print_match(old_features[old_i + old_offset], new_features[new_i + new_offset], identity, length_diff, args.addNotes)
fc603e665d75 Uploaded cpt parents: diff changeset	112 inexactRec = addArr(inexactRec, res1)
fc603e665d75 Uploaded cpt parents: diff changeset	113 hypoRec = addArr(hypoRec, res2)
fc603e665d75 Uploaded cpt parents: diff changeset	114 old_i += old_offset
fc603e665d75 Uploaded cpt parents: diff changeset	115 new_i += new_offset
fc603e665d75 Uploaded cpt parents: diff changeset	116 break
fc603e665d75 Uploaded cpt parents: diff changeset	117 else:
fc603e665d75 Uploaded cpt parents: diff changeset	118 sys.stderr.write("Exceeded allowed number of skipped genes (" + str(args.allowed_skipped_genes) + "), unable to maintain alignment and continue comparison.\n")
fc603e665d75 Uploaded cpt parents: diff changeset	119 exit(2)
fc603e665d75 Uploaded cpt parents: diff changeset	120
fc603e665d75 Uploaded cpt parents: diff changeset	121 if old_feature is None and new_feature is None:
fc603e665d75 Uploaded cpt parents: diff changeset	122 break
fc603e665d75 Uploaded cpt parents: diff changeset	123
fc603e665d75 Uploaded cpt parents: diff changeset	124 old_i += 1
fc603e665d75 Uploaded cpt parents: diff changeset	125 new_i += 1
fc603e665d75 Uploaded cpt parents: diff changeset	126
fc603e665d75 Uploaded cpt parents: diff changeset	127 args.sumOut.write('Exact Match:\t' + str(exactRec) + "\n\n")
fc603e665d75 Uploaded cpt parents: diff changeset	128
fc603e665d75 Uploaded cpt parents: diff changeset	129 args.sumOut.write('Inexact Match:\t' + str(inexactRec[0] + inexactRec[1] + inexactRec[2]) + "\n")
fc603e665d75 Uploaded cpt parents: diff changeset	130 args.sumOut.write(' Same length:\t' + str(inexactRec[0]) + "\n")
fc603e665d75 Uploaded cpt parents: diff changeset	131 args.sumOut.write(' Second Gbk Seq longer:\t' + str(inexactRec[2]) + "\n")
fc603e665d75 Uploaded cpt parents: diff changeset	132 args.sumOut.write(' First Gbk Seq longer:\t' + str(inexactRec[1]) + "\n\n")
fc603e665d75 Uploaded cpt parents: diff changeset	133
fc603e665d75 Uploaded cpt parents: diff changeset	134 args.sumOut.write('In Second Gbk but not in first:\t' + str(newCount) + "\n")
fc603e665d75 Uploaded cpt parents: diff changeset	135 args.sumOut.write('In First Gbk but not in second:\t' + str(oldCount) + "\n\n")
fc603e665d75 Uploaded cpt parents: diff changeset	136
fc603e665d75 Uploaded cpt parents: diff changeset	137 args.sumOut.write('Hypothetical Annotation Change:\t' + str(hypoRec[1] + hypoRec[2]) + "\n")
fc603e665d75 Uploaded cpt parents: diff changeset	138 args.sumOut.write('Hypothetical:\t' + str(hypoRec[0] + hypoRec[2]) + "\n")
fc603e665d75 Uploaded cpt parents: diff changeset	139
fc603e665d75 Uploaded cpt parents: diff changeset	140
fc603e665d75 Uploaded cpt parents: diff changeset	141 def print_match(f1, f2, identity, length_diff, outNotes):
fc603e665d75 Uploaded cpt parents: diff changeset	142 #print('', flush=True)
fc603e665d75 Uploaded cpt parents: diff changeset	143 line = f1.qualifiers['product'][0] + "\t"
fc603e665d75 Uploaded cpt parents: diff changeset	144 matchArr = [0, 0, 0]
fc603e665d75 Uploaded cpt parents: diff changeset	145 hypoArr = [0, 0, 0]
fc603e665d75 Uploaded cpt parents: diff changeset	146 if identity == 1.0:
fc603e665d75 Uploaded cpt parents: diff changeset	147 # print('Exact match')
fc603e665d75 Uploaded cpt parents: diff changeset	148 line += 'Exact match\t' + f2.qualifiers['product'][0] + "\t100.0\tSame Length\t"
fc603e665d75 Uploaded cpt parents: diff changeset	149 else:
fc603e665d75 Uploaded cpt parents: diff changeset	150 # print('Inexact match (' + '%.2f' % (identity * 100.0) + '% ID, ', end='')
fc603e665d75 Uploaded cpt parents: diff changeset	151 line += 'Inexact match\t' + f2.qualifiers['product'][0] + "\t%.2f\t" % (identity * 100.0)
fc603e665d75 Uploaded cpt parents: diff changeset	152 if length_diff == 0:
fc603e665d75 Uploaded cpt parents: diff changeset	153 # print('same length)')
fc603e665d75 Uploaded cpt parents: diff changeset	154 line +="Same Length\t"
fc603e665d75 Uploaded cpt parents: diff changeset	155 matchArr[0] += 1
fc603e665d75 Uploaded cpt parents: diff changeset	156 elif length_diff > 0:
fc603e665d75 Uploaded cpt parents: diff changeset	157 # print('old seq longer)')
fc603e665d75 Uploaded cpt parents: diff changeset	158 line +="First Gbk Seq Longer\t"
fc603e665d75 Uploaded cpt parents: diff changeset	159 matchArr[1] += 1
fc603e665d75 Uploaded cpt parents: diff changeset	160 elif length_diff < 0:
fc603e665d75 Uploaded cpt parents: diff changeset	161 # print('new seq longer)')
fc603e665d75 Uploaded cpt parents: diff changeset	162 line +="Second Gbk Seq Longer\t"
fc603e665d75 Uploaded cpt parents: diff changeset	163 matchArr[2] += 1
fc603e665d75 Uploaded cpt parents: diff changeset	164 # print(' old: ', end='')
fc603e665d75 Uploaded cpt parents: diff changeset	165 # print_feature_one_line(f1)
fc603e665d75 Uploaded cpt parents: diff changeset	166 line += print_feature_one_line(f1) + "\t"
fc603e665d75 Uploaded cpt parents: diff changeset	167 # print(' new: ', end='')
fc603e665d75 Uploaded cpt parents: diff changeset	168 # print_feature_one_line(f2)
fc603e665d75 Uploaded cpt parents: diff changeset	169 line += print_feature_one_line(f2) + "\t"
fc603e665d75 Uploaded cpt parents: diff changeset	170 p1 = f1.qualifiers['product'][0].lower()
fc603e665d75 Uploaded cpt parents: diff changeset	171 p2 = f2.qualifiers['product'][0].lower()
fc603e665d75 Uploaded cpt parents: diff changeset	172 if 'hypothetical' in p1 and 'hypothetical' in p2:
fc603e665d75 Uploaded cpt parents: diff changeset	173 # print(' still hypothetical')
fc603e665d75 Uploaded cpt parents: diff changeset	174 line += "Hypothetical\t"
fc603e665d75 Uploaded cpt parents: diff changeset	175 hypoArr[0] += 1
fc603e665d75 Uploaded cpt parents: diff changeset	176 elif 'hypothetical' in p1 and 'hypothetical' not in p2:
fc603e665d75 Uploaded cpt parents: diff changeset	177 # print(' no longer hypothetical')
fc603e665d75 Uploaded cpt parents: diff changeset	178 line += "No Longer Hypothetical\t"
fc603e665d75 Uploaded cpt parents: diff changeset	179 hypoArr[1] += 1
fc603e665d75 Uploaded cpt parents: diff changeset	180 elif 'hypothetical' not in p1 and 'hypothetical' in p2:
fc603e665d75 Uploaded cpt parents: diff changeset	181 # print(' became hypothetical')
fc603e665d75 Uploaded cpt parents: diff changeset	182 line += "Became Hypothetical\t"
fc603e665d75 Uploaded cpt parents: diff changeset	183 hypoArr[2] += 1
fc603e665d75 Uploaded cpt parents: diff changeset	184 else:
fc603e665d75 Uploaded cpt parents: diff changeset	185 line += "'Hypothetical' not in second nor first Gbk's product tag"
fc603e665d75 Uploaded cpt parents: diff changeset	186
fc603e665d75 Uploaded cpt parents: diff changeset	187 if outNotes:
fc603e665d75 Uploaded cpt parents: diff changeset	188 line += "\t"
fc603e665d75 Uploaded cpt parents: diff changeset	189 if "note" in f1.qualifiers.keys():
fc603e665d75 Uploaded cpt parents: diff changeset	190 for x in f1.qualifiers["note"]:
fc603e665d75 Uploaded cpt parents: diff changeset	191 line += x
fc603e665d75 Uploaded cpt parents: diff changeset	192 line += "\t"
fc603e665d75 Uploaded cpt parents: diff changeset	193 else:
fc603e665d75 Uploaded cpt parents: diff changeset	194 line += "N/A\t"
fc603e665d75 Uploaded cpt parents: diff changeset	195 if "note" in f2.qualifiers.keys():
fc603e665d75 Uploaded cpt parents: diff changeset	196 for x in f2.qualifiers["note"]:
fc603e665d75 Uploaded cpt parents: diff changeset	197 line += x
fc603e665d75 Uploaded cpt parents: diff changeset	198 else:
fc603e665d75 Uploaded cpt parents: diff changeset	199 line += "N/A"
fc603e665d75 Uploaded cpt parents: diff changeset	200
fc603e665d75 Uploaded cpt parents: diff changeset	201 print(line)
fc603e665d75 Uploaded cpt parents: diff changeset	202 return matchArr, hypoArr
fc603e665d75 Uploaded cpt parents: diff changeset	203
fc603e665d75 Uploaded cpt parents: diff changeset	204 def print_in_old_not_new(f): # rename file outputs
fc603e665d75 Uploaded cpt parents: diff changeset	205 line = f.qualifiers['product'][0] + "\tIn First Gbk but not Second\tN/A\t0.00\t" + str(f.location.end - f.location.start) + "\t" + print_feature_one_line(f) + "\tN/A\tN/A"
fc603e665d75 Uploaded cpt parents: diff changeset	206 # print('')
fc603e665d75 Uploaded cpt parents: diff changeset	207 # print('In old but not in new:')
fc603e665d75 Uploaded cpt parents: diff changeset	208 # print(' ', end='')
fc603e665d75 Uploaded cpt parents: diff changeset	209 # print_feature_one_line(f)
fc603e665d75 Uploaded cpt parents: diff changeset	210 print(line)
fc603e665d75 Uploaded cpt parents: diff changeset	211
fc603e665d75 Uploaded cpt parents: diff changeset	212
fc603e665d75 Uploaded cpt parents: diff changeset	213 def print_in_new_not_old(f): # rename file outputs
fc603e665d75 Uploaded cpt parents: diff changeset	214 line = "N/A\tIn Second Gbk but not First\t" + f.qualifiers['product'][0] + "\t0.00\t" + str(f.location.end - f.location.start) + "\tN/A\t" + print_feature_one_line(f) + "\tN/A"
fc603e665d75 Uploaded cpt parents: diff changeset	215 #print('')
fc603e665d75 Uploaded cpt parents: diff changeset	216 #print('In new but not in old:')
fc603e665d75 Uploaded cpt parents: diff changeset	217 #print(' ', end='')
fc603e665d75 Uploaded cpt parents: diff changeset	218 #print_feature_one_line(f)
fc603e665d75 Uploaded cpt parents: diff changeset	219 print(line)
fc603e665d75 Uploaded cpt parents: diff changeset	220
fc603e665d75 Uploaded cpt parents: diff changeset	221
fc603e665d75 Uploaded cpt parents: diff changeset	222 def print_feature_one_line(f):
fc603e665d75 Uploaded cpt parents: diff changeset	223 #f_str = f.qualifiers['product'][0]
fc603e665d75 Uploaded cpt parents: diff changeset	224 f_str = ""
fc603e665d75 Uploaded cpt parents: diff changeset	225 strand = '+' if f.location.strand == 1 else '-'
fc603e665d75 Uploaded cpt parents: diff changeset	226 f_str += '(' + str(f.location.start) + '-' + str(f.location.end) + ' ' + strand + ', '
fc603e665d75 Uploaded cpt parents: diff changeset	227 f_str += str(f.location.end - f.location.start) + ' bp)'
fc603e665d75 Uploaded cpt parents: diff changeset	228 return(f_str)
fc603e665d75 Uploaded cpt parents: diff changeset	229
fc603e665d75 Uploaded cpt parents: diff changeset	230
fc603e665d75 Uploaded cpt parents: diff changeset	231 def compare_features(f1, f2, r1, r2, match_identity_threshold):
fc603e665d75 Uploaded cpt parents: diff changeset	232 if f1 is None or f2 is None:
fc603e665d75 Uploaded cpt parents: diff changeset	233 return False
fc603e665d75 Uploaded cpt parents: diff changeset	234
fc603e665d75 Uploaded cpt parents: diff changeset	235 s1 = f1.extract(r1).seq
fc603e665d75 Uploaded cpt parents: diff changeset	236 s2 = f2.extract(r2).seq
fc603e665d75 Uploaded cpt parents: diff changeset	237 score = pairwise2.align.globalms(s1, s2, 1, 0, 0, 0, score_only=True)
fc603e665d75 Uploaded cpt parents: diff changeset	238 identity = score / max(len(s1), len(s2))
fc603e665d75 Uploaded cpt parents: diff changeset	239 match = identity >= match_identity_threshold
fc603e665d75 Uploaded cpt parents: diff changeset	240 length_diff = len(s1) - len(s2)
fc603e665d75 Uploaded cpt parents: diff changeset	241 return match, identity, length_diff
fc603e665d75 Uploaded cpt parents: diff changeset	242
fc603e665d75 Uploaded cpt parents: diff changeset	243
fc603e665d75 Uploaded cpt parents: diff changeset	244 if __name__ == '__main__':
fc603e665d75 Uploaded cpt parents: diff changeset	245 main()

Mercurial > repos > cpt > cpt_compare_gbk

annotate cpt_gbk_compare/gbk_compare.py @ 0:fc603e665d75 draft default tip