[85ab89]: / Allura / allura / lib / diff.py  Maximize  Restore  History

Download this file

221 lines (181 with data), 8.1 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations
import html
import contextlib
import logging
from collections.abc import Iterable, Generator
import sxsdiff
from diff_match_patch import diff_match_patch
import six
from markupsafe import Markup
from sxsdiff.calculator import LineChange, ElementsHolder, PlainElement, AdditionElement, DeletionElement
log = logging.getLogger(__name__)
# There are a few cases where we get empty trailing '' elements. Maybe a bug within sxsdiff.calculator?
# we work around them, using these constants in a few places below
emptyUnchangedElem = PlainElement('')
spaceUnchangedElem = PlainElement(' ')
emptyAddElem = AdditionElement('')
emptyDelElem = DeletionElement('')
def is_single_chg(chg_parts: ElementsHolder) -> bool:
return len(chg_parts) <= 1 or (
len(chg_parts) == 2 and chg_parts.elements[-1] in [emptyUnchangedElem, spaceUnchangedElem]
)
class SxsOutputGenerator(sxsdiff.BaseGenerator):
# based on sxsdiff.generators.github.GitHubStyledGenerator
table_tmpl_start = '''
<table class="side-by-side-diff">
<thead>
<th class="lineno"></th>
<th>%s</th>
<th class="lineno"></th>
<th>%s</th>
</thead>
'''.strip()
table_tmpl_end = '</table>'
def __init__(self, adesc: str, bdesc: str):
self.adesc = adesc
self.bdesc = bdesc
def _spit(self, content):
self.out += content + '\n'
def run(self, diff_result: Iterable[LineChange | None]):
self.out = ''
super().run(diff_result)
return Markup(self.out) # "safe" because we use html.escape in a few key places below
def visit_row(self, line_change: LineChange | None):
if line_change is None:
self._spit_context_marker()
self._spit_context_marker()
elif not line_change.changed:
self._spit_unchanged_side(line_change.left_no, line_change.left)
self._spit_unchanged_side(line_change.right_no, line_change.right)
else:
whole_line_changed = is_single_chg(line_change.left) and is_single_chg(line_change.right)
self._spit_changed_side('diff-rem' if whole_line_changed else 'diff-chg',
line_change.left_no, line_change.left)
self._spit_changed_side('diff-add' if whole_line_changed else 'diff-chg',
line_change.right_no, line_change.right)
@contextlib.contextmanager
def wrap_row(self, line_change):
self._spit('<tr>')
yield
self._spit('</tr>')
@contextlib.contextmanager
def wrap_result(self, sxs_result):
self._spit(self.table_tmpl_start % (self.adesc, self.bdesc))
yield
self._spit(self.table_tmpl_end)
def _spit_context_marker(self):
context = {
'mode': 'diff-gap',
'lineno': '',
'code': '...',
}
self._spit_side_from_context(context)
def _spit_unchanged_side(self, lineno, holder):
context = {
'mode': 'context',
'lineno': lineno,
'code': html.escape(str(holder)),
}
self._spit_side_from_context(context)
def _spit_changed_side(self, mode, lineno, holder):
if not holder:
self._spit_side_from_context({'lineno': '', 'code': '', 'mode': ''})
return
bits = []
for elem in holder.elements:
piece = html.escape(str(elem))
if elem.is_changed and not is_single_chg(holder):
if elem.flag == diff_match_patch.DIFF_INSERT:
clss = 'diff-add'
elif elem.flag == diff_match_patch.DIFF_DELETE:
clss = 'diff-rem'
else:
clss = ''
bits.append(f'<span class="{clss}">{piece}</span>')
else:
bits.append(piece)
code = ''.join(bits)
context = {
'mode': mode,
'lineno': lineno,
'code': code,
}
self._spit_side_from_context(context)
def _spit_side_from_context(self, context):
self._spit(f' <td class="lineno">{context["lineno"]}</td>')
mode = context['mode']
clss = (' class="%s"' % mode) if mode not in ['context', ''] else ''
self._spit(f' <td{clss}><pre>{context["code"]}</pre></td>')
def sxsdiff_cleanup_trailing(input_lines: Iterable[LineChange]) -> Iterable[LineChange]:
cleaned_lines = list(input_lines)
if not cleaned_lines:
return []
lastline = cleaned_lines[-1]
if (
(lastline.left.elements == [emptyUnchangedElem] and lastline.right.elements == [emptyUnchangedElem]) or
(lastline.left.elements == [] and lastline.right.elements == [emptyAddElem]) or
(lastline.left.elements == [emptyDelElem] and lastline.right.elements == [])
):
cleaned_lines.pop()
return cleaned_lines
def update_with_context_chunks(input_lines: Iterable[LineChange], num_context: int) -> Iterable[LineChange | None]:
"""
Remove identical lines, except several around any changed lines
"""
lines_with_flag: list[list] = [] # [LineChange, bool] in each item
changed_line_idxs: list[int] = []
for current_line, line in enumerate(input_lines):
lines_with_flag.append([line, False])
if line.changed:
changed_line_idxs.append(current_line)
for changed_line_idx in changed_line_idxs:
for context_idx in range(changed_line_idx-num_context, changed_line_idx+num_context+1):
try:
lines_with_flag[context_idx][1] = True
except IndexError:
pass
output_lines: list[LineChange | None] = []
prev_was_shown = True
for line, show in lines_with_flag:
if show:
if not prev_was_shown:
output_lines.append(None)
output_lines.append(line)
prev_was_shown = show
return output_lines
class HtmlSideBySideDiff:
def make_table(self, a: list[str], b: list[str], adesc=None, bdesc=None, context=5, tabsize=4) -> str:
"""Make html table that displays side-by-side diff
Arguments:
- a -- list of text lines with \n, to be compared to b
- b -- list of text lines with \n, to be compared to a
- adesc -- description of the 'a' lines (e.g. filename)
- bdesc -- description of the 'b' lines (e.g. filename)
- context -- number of context lines to display
Uses sxsdiff/diff_match_patch which has much better performance than stdlib htmldiff/_mdiff
https://github.com/python/cpython/issues/51180
"""
adesc = six.ensure_text(adesc) or ''
bdesc = six.ensure_text(bdesc) or ''
atext = ''.join(a).expandtabs(tabsize)
btext = ''.join(b).expandtabs(tabsize)
sxsdiff_result = sxsdiff.DiffCalculator().run(atext, btext)
sxsdiff_cleaned = sxsdiff_cleanup_trailing(sxsdiff_result)
sxsdiff_with_contexts = update_with_context_chunks(sxsdiff_cleaned, context)
return SxsOutputGenerator(adesc, bdesc).run(sxsdiff_with_contexts)