import argparse parser = argparse.ArgumentParser(description="Compare two documents line by line") parser.add_argument('a', help="reference document") parser.add_argument('b', help="target document") args = parser.parse_args() fa = open(args.a,'r') la = fa.read().split('\n') fb = open(args.b,'r') lb = fb.read().split('\n') # Solve the Longest Common Subsequence (LCS) subproblem with dynamic programming dp = [[0] * (len(lb)+1) for _ in range(len(la)+1)] # dp[i+1][j+1] stores the LCS length between la[i] and lb[j] for i in range(len(la)): for j in range(len(lb)): if la[i]==lb[j]: dp[i+1][j+1] = dp[i][j] + 1 else: dp[i+1][j+1] = max(dp[i][j+1], dp[i+1][j]) # Backtrack to find one scheme to reduce both a and b to their LCS diffs = [] i,j = len(la)-1, len(lb)-1 while i>=0 or j>=0: # current position (i+1, j+1), try to move to (0, 0) oi, oj = i, j while j >= 0 and dp[i+1][j+1]==dp[i+1][j]: # can safely delete lb[j] j -= 1 while i >= 0 and dp[i+1][j+1]==dp[i][j+1]: # can safely delete la[i] i -= 1 if i==oi and j==oj: assert dp[i+1][j+1]==dp[i][j]+1 and la[i]==lb[j] i,j = i-1,j-1 else: diffs.append((oi,i,oj,j)) diffs.reverse() def describe(oi,i,oj,j): # by diff convension line numbering starts from 1 oi,i,oj,j = oi+1,i+1,oj+1,j+1 def intv(x,y): # simplify expression if interval is one line return str(x) if x==y else f"{x},{y}" if i==oi: print(f"{i}a" + intv(j+1,oj)) elif j==oj: print(intv(i+1,oi) + f"d{j}") else: print(intv(i+1,oi) + "c" + intv(j+1,oj)) for oi,i,oj,j in diffs: # delete (i,oi], add (j,oj] describe(oi,i,oj,j) for p in range(i+1,oi+1): print("< " + la[p]) if oi>i and oj>j: print("---") for p in range(j+1,oj+1): print("> " + lb[p])