add initial files
This commit is contained in:
parent
89ae608cf8
commit
a6541962e5
11
README.md
11
README.md
@ -1,3 +1,10 @@
|
||||
# lcs-diff
|
||||
# Toy LCS-based Diff program
|
||||
|
||||
Toy Longest-Common-Subsequence based diff and patch program, minimizing the edit script's sum of line-addition and line-deletions.
|
||||
Toy Longest-Common-Subsequence based diff and patch program, minimizing the edit script's sum of line-addition and line-deletions.
|
||||
|
||||
Example usage:
|
||||
```bash
|
||||
python diff.py a1 b1 > patch1
|
||||
python patch.py a1 patch1 > c1 # patch is not in-place
|
||||
python diff.py a1 c1 # verify that a1 and c1 has the same content
|
||||
```
|
||||
|
58
diff.py
Normal file
58
diff.py
Normal file
@ -0,0 +1,58 @@
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="Compare two documents line by line")
|
||||
parser.add_argument('a', help="reference document")
|
||||
parser.add_argument('b', help="target document")
|
||||
args = parser.parse_args()
|
||||
|
||||
fa = open(args.a,'r')
|
||||
la = fa.read().split('\n')
|
||||
fb = open(args.b,'r')
|
||||
lb = fb.read().split('\n')
|
||||
|
||||
# Solve the Longest Common Subsequence (LCS) subproblem with dynamic programming
|
||||
dp = [[0] * (len(lb)+1) for _ in range(len(la)+1)]
|
||||
# dp[i+1][j+1] stores the LCS length between la[i] and lb[j]
|
||||
for i in range(len(la)):
|
||||
for j in range(len(lb)):
|
||||
if la[i]==lb[j]:
|
||||
dp[i+1][j+1] = dp[i][j] + 1
|
||||
else:
|
||||
dp[i+1][j+1] = max(dp[i][j+1], dp[i+1][j])
|
||||
|
||||
# Backtrack to find one scheme to reduce both a and b to their LCS
|
||||
diffs = []
|
||||
i,j = len(la)-1, len(lb)-1
|
||||
while i>=0 or j>=0: # current position (i+1, j+1), try to move to (0, 0)
|
||||
oi, oj = i, j
|
||||
while j >= 0 and dp[i+1][j+1]==dp[i+1][j]: # can safely delete lb[j]
|
||||
j -= 1
|
||||
while i >= 0 and dp[i+1][j+1]==dp[i][j+1]: # can safely delete la[i]
|
||||
i -= 1
|
||||
if i==oi and j==oj:
|
||||
assert dp[i+1][j+1]==dp[i][j]+1 and la[i]==lb[j]
|
||||
i,j = i-1,j-1
|
||||
else:
|
||||
diffs.append((oi,i,oj,j))
|
||||
diffs.reverse()
|
||||
|
||||
def describe(oi,i,oj,j):
|
||||
# by diff convension line numbering starts from 1
|
||||
oi,i,oj,j = oi+1,i+1,oj+1,j+1
|
||||
def intv(x,y): # simplify expression if interval is one line
|
||||
return str(x) if x==y else f"{x},{y}"
|
||||
if i==oi:
|
||||
print(f"{i}a" + intv(j+1,oj))
|
||||
elif j==oj:
|
||||
print(intv(i+1,oi) + f"d{j}")
|
||||
else:
|
||||
print(intv(i+1,oi) + "c" + intv(j+1,oj))
|
||||
|
||||
for oi,i,oj,j in diffs:
|
||||
# delete (i,oi], add (j,oj]
|
||||
describe(oi,i,oj,j)
|
||||
for p in range(i+1,oi+1):
|
||||
print("< " + la[p])
|
||||
if oi>i and oj>j:
|
||||
print("---")
|
||||
for p in range(j+1,oj+1):
|
||||
print("> " + lb[p])
|
65
patch.py
Normal file
65
patch.py
Normal file
@ -0,0 +1,65 @@
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser(description="Prints to stdout the patched document")
|
||||
parser.add_argument('original', help='original document')
|
||||
parser.add_argument('patch', help='patch file')
|
||||
args = parser.parse_args()
|
||||
|
||||
fa = open(args.original,'r')
|
||||
la = fa.read().split('\n')
|
||||
# a workspace for us to edit, each element being a list that 1) originally contains a corresponding line
|
||||
# in a, 2) may be emptied if that line is removed, 3) may be where we insert new content
|
||||
ws = [[l] for l in la]
|
||||
fp = open(args.patch,'r')
|
||||
lp = fp.read().split('\n')
|
||||
|
||||
def str2intv(s):
|
||||
if "," in s:
|
||||
x, y = s.split(',')
|
||||
else:
|
||||
x, y = s, s
|
||||
return int(x)-1, int(y)
|
||||
|
||||
def ws_rm(desc, lp, nb):
|
||||
l,r = str2intv(desc)
|
||||
for p in range(l,r):
|
||||
nb += 1
|
||||
assert lp[nb][:2] == "< "
|
||||
assert ws[p][0] == lp[nb][2:],\
|
||||
f"Patch file wants to delete '{lp[nb][2:]}', but originally line {p} is '{ws[p][0]}'"
|
||||
ws[p] = list()
|
||||
return nb + 1, l
|
||||
|
||||
def ws_insert(p, desc, lp, nb):
|
||||
l,r = str2intv(desc)
|
||||
for _ in range(l,r):
|
||||
nb += 1
|
||||
assert lp[nb][:2] == "> "
|
||||
ws[p].append(lp[nb][2:])
|
||||
return nb + 1
|
||||
|
||||
nb = 0
|
||||
while nb < len(lp):
|
||||
line = lp[nb]
|
||||
if line:
|
||||
assert line[0] != '<' and line[0] != '>', f"Expected description at line {nb}, but got {line}"
|
||||
else:
|
||||
nb += 1
|
||||
continue
|
||||
idx_d = line.find('d')
|
||||
if idx_d != -1:
|
||||
nb, _ = ws_rm(line[:idx_d], lp, nb)
|
||||
continue
|
||||
idx_a = line.find('a')
|
||||
if idx_a != -1:
|
||||
p = int(line[:idx_a]) - 1
|
||||
nb = ws_insert(p, line[idx_a+1:], lp, nb)
|
||||
continue
|
||||
idx_c = line.find('c')
|
||||
if idx_c != -1:
|
||||
nb, p = ws_rm(line[:idx_c], lp, nb)
|
||||
nb = ws_insert(p, line[idx_c+1:], lp, nb)
|
||||
continue
|
||||
assert False, f"Unreachable. Cannot parse line {nb}: {line}"
|
||||
|
||||
flatws = [line for _ in ws for line in _]
|
||||
print('\n'.join(flatws), end="") # Don't want to print additional new line
|
Loading…
x
Reference in New Issue
Block a user