59 lines
1.8 KiB
Python
59 lines
1.8 KiB
Python
import argparse
|
|
parser = argparse.ArgumentParser(description="Compare two documents line by line")
|
|
parser.add_argument('a', help="reference document")
|
|
parser.add_argument('b', help="target document")
|
|
args = parser.parse_args()
|
|
|
|
fa = open(args.a,'r')
|
|
la = fa.read().split('\n')
|
|
fb = open(args.b,'r')
|
|
lb = fb.read().split('\n')
|
|
|
|
# Solve the Longest Common Subsequence (LCS) subproblem with dynamic programming
|
|
dp = [[0] * (len(lb)+1) for _ in range(len(la)+1)]
|
|
# dp[i+1][j+1] stores the LCS length between la[i] and lb[j]
|
|
for i in range(len(la)):
|
|
for j in range(len(lb)):
|
|
if la[i]==lb[j]:
|
|
dp[i+1][j+1] = dp[i][j] + 1
|
|
else:
|
|
dp[i+1][j+1] = max(dp[i][j+1], dp[i+1][j])
|
|
|
|
# Backtrack to find one scheme to reduce both a and b to their LCS
|
|
diffs = []
|
|
i,j = len(la)-1, len(lb)-1
|
|
while i>=0 or j>=0: # current position (i+1, j+1), try to move to (0, 0)
|
|
oi, oj = i, j
|
|
while j >= 0 and dp[i+1][j+1]==dp[i+1][j]: # can safely delete lb[j]
|
|
j -= 1
|
|
while i >= 0 and dp[i+1][j+1]==dp[i][j+1]: # can safely delete la[i]
|
|
i -= 1
|
|
if i==oi and j==oj:
|
|
assert dp[i+1][j+1]==dp[i][j]+1 and la[i]==lb[j]
|
|
i,j = i-1,j-1
|
|
else:
|
|
diffs.append((oi,i,oj,j))
|
|
diffs.reverse()
|
|
|
|
def describe(oi,i,oj,j):
|
|
# by diff convension line numbering starts from 1
|
|
oi,i,oj,j = oi+1,i+1,oj+1,j+1
|
|
def intv(x,y): # simplify expression if interval is one line
|
|
return str(x) if x==y else f"{x},{y}"
|
|
if i==oi:
|
|
print(f"{i}a" + intv(j+1,oj))
|
|
elif j==oj:
|
|
print(intv(i+1,oi) + f"d{j}")
|
|
else:
|
|
print(intv(i+1,oi) + "c" + intv(j+1,oj))
|
|
|
|
for oi,i,oj,j in diffs:
|
|
# delete (i,oi], add (j,oj]
|
|
describe(oi,i,oj,j)
|
|
for p in range(i+1,oi+1):
|
|
print("< " + la[p])
|
|
if oi>i and oj>j:
|
|
print("---")
|
|
for p in range(j+1,oj+1):
|
|
print("> " + lb[p])
|