# coding: utf-8
# Copyright (c) Jupyter Development Team.
# Distributed under the terms of the Modified BSD License.
from collections import defaultdict
from itertools import chain
import nbformat
import nbdime.log
from .chunks import chunk_typename
from .decisions import MergeDecisionBuilder, push_patch_decision
from ..diff_format import (
DiffOp, ParentDeleted,
op_patch, op_addrange, op_removerange, op_add, op_replace)
from ..patching import patch
from ..prettyprint import merge_render
from ..utils import join_path, resolve_path
# =============================================================================
#
# Utility code follows
#
# =============================================================================
def combine_patches(diffs):
"""Rewrite diffs in canonical form where only one patch
applies to one key and diff entries are sorted by key."""
patches = {}
newdiffs = []
for d in diffs:
if d.op == DiffOp.PATCH:
p = patches.get(d.key)
if p is None:
p = op_patch(d.key, combine_patches(d.diff))
newdiffs.append(p)
patches[d.key] = p
else:
p.diff = combine_patches(p.diff + d.diff)
else:
newdiffs.append(d)
return sorted(newdiffs, key=lambda x: x.key)
def adjust_patch_level(target_path, common_path, diff):
n = len(target_path)
assert common_path[:n] == target_path
if n == len(target_path):
return diff
remainder_path = tuple(reversed(common_path[n:]))
newdiff = []
for d in diff:
nd = d
assert nd is not None
for key in remainder_path:
nd = op_patch(key, nd)
newdiff.append(nd)
return newdiff
def collect_diffs(path, decisions):
local_diff = []
remote_diff = []
for d in decisions:
ld = adjust_patch_level(path, d.common_path, d.local_diff)
rd = adjust_patch_level(path, d.common_path, d.remote_diff)
local_diff.extend(ld)
remote_diff.extend(rd)
local_diff = combine_patches(local_diff)
remote_diff = combine_patches(remote_diff)
return local_diff, remote_diff
def collect_conflicting_diffs(path, decisions):
local_conflict_diffs = []
remote_conflict_diffs = []
for d in decisions:
if d.conflict:
ld = adjust_patch_level(path, d.common_path, d.local_diff)
rd = adjust_patch_level(path, d.common_path, d.remote_diff)
local_conflict_diffs.extend(ld)
remote_conflict_diffs.extend(rd)
return local_conflict_diffs, remote_conflict_diffs
def collect_unresolved_diffs(base_path, unresolved_conflicts):
"""Collect local and remote diffs from conflict tuples.
Assumed to be on the same path."""
# Collect diffs
local_conflict_diffs = []
remote_conflict_diffs = []
for conf in unresolved_conflicts:
(path, ld, rd, strategy) = conf
assert base_path == path
#assert base_path == path[:len(base_path)]
#relative_path = path[len(base_path):]
#assert not relative_path
assert isinstance(ld, list)
assert isinstance(rd, list)
for d in ld:
local_conflict_diffs.append(d)
for d in rd:
remote_conflict_diffs.append(d)
# Combine patches into canonical tree again
local_conflict_diffs = combine_patches(local_conflict_diffs)
remote_conflict_diffs = combine_patches(remote_conflict_diffs)
return local_conflict_diffs, remote_conflict_diffs
def bundle_decisions_by_index(base_path, decisions):
""""""
decisions_by_index = defaultdict(list)
level = len(base_path)
for d in decisions:
assert base_path == d.common_path[:level], (
'decision has incorrect base path: %r vs %r' % (d.common_path, base_path))
if len(d.common_path) > level:
# At least patch/patch will have common_path on a particular item
key = d.common_path[level]
# Wrap decision diffs in patches so common_path points to list
prefix = d.common_path[level:]
d = push_patch_decision(d, prefix)
else:
# Removerange or addrange will have common_path
# on list and key only in the diff entries
keys = set(e.key for e in chain(d.local_diff, d.remote_diff, d.get("custom_diff", ())))
assert len(keys) == 1
key, = keys
decisions_by_index[key].append(d)
return decisions_by_index
# =============================================================================
#
# Autoresolve code follows
#
# =============================================================================
# Thinking through conflict resolution:
# - when conflict first encountered, it's registered as dec.conflict(path, ..., strategy)
# - strategy is used in tryresolve and eventually dropped if a conflict is registered
# - when recursing and subdecisions are returned, they're added to decisions at this level
# - i.e. decisions at this point contains all conflicts not resolved for subdocument,
# as well as unresolved conflicts for this level
# - if we have a strategy at this level, we can try to resolve all
# conflicts of subdocument as well as those at this level
def resolve_strategy_inline_attachments(base_path, attachments, decisions):
strategy = "inline-attachments"
local_conflict_diffs, remote_conflict_diffs = collect_conflicting_diffs(base_path, decisions)
# Drop conflict decisions
decisions.decisions = [d for d in decisions if not d.conflict]
# FIXME: Review this code.
ldiffs_by_key = {d.key: d for d in local_conflict_diffs}
rdiffs_by_key = {d.key: d for d in remote_conflict_diffs}
conflict_keys = sorted(set(ldiffs_by_key) | set(rdiffs_by_key))
for key in conflict_keys:
# key is the attachment filename
ld = ldiffs_by_key[key]
rd = rdiffs_by_key[key]
if ld.op == DiffOp.REMOVE:
# If one side is removing and we have a conflict,
# the other side did an edit and we keep that
# but flag a conflict (TODO: Or don't flag conflict?)
assert rd.op != DiffOp.REMOVE
decisions.remote(base_path, ld, rd, conflict=True, strategy=strategy)
elif rd.op == DiffOp.REMOVE:
decisions.local(base_path, ld, rd, conflict=True, strategy=strategy)
else:
# Not merging attachment contents, but adding attachments
# with new names LOCAL_oldname and REMOTE_oldname instead.
base = attachments[key]
if ld.op == DiffOp.ADD:
assert rd.op == DiffOp.ADD
local = ld.value
elif ld.op == DiffOp.REPLACE:
local = ld.value
else:
assert ld.op == DiffOp.PATCH
local = patch(base, ld.diff)
if rd.op == DiffOp.ADD:
remote = rd.value
elif rd.op == DiffOp.REPLACE:
remote = rd.value
else:
assert rd.op == DiffOp.PATCH
remote = patch(base, rd.diff)
local_name = "LOCAL_" + key
remote_name = "REMOTE_" + key
custom_diff = []
if local_name in attachments:
nbdime.log.warning(
"Replacing previous conflicted attachment with filename %r", local_name)
custom_diff += [op_replace(local_name, local)]
else:
custom_diff += [op_add(local_name, local)]
if remote_name in attachments:
nbdime.log.warning(
"Replacing previous conflicted attachment with filename %r", remote_name)
custom_diff += [op_replace(remote_name, remote)]
else:
custom_diff += [op_add(remote_name, remote)]
decisions.custom(base_path, ld, rd, custom_diff, conflict=True, strategy=strategy)
def output_marker(text):
return nbformat.v4.new_output("stream", name="stderr", text=text)
def _cell_marker_format(text):
return '{0}'.format(text)
def cell_marker(text):
return nbformat.v4.new_markdown_cell(source=_cell_marker_format(text))
def get_outputs_and_note(base, removes, patches):
if removes:
note = " "
suboutputs = []
elif patches:
e, = patches # 0 or 1 item
# Collect which mime types are modified
mkeys = set()
keys = set()
for d in e.diff:
if d.key == "data":
assert d.op == DiffOp.PATCH
for f in d.diff:
mkeys.add(f.key)
else:
keys.add(d.key)
data = base.get("data")
if data:
ukeys = set(data.keys()) - mkeys
else:
ukeys = ()
notes = []
if mkeys or keys:
notes.append("modified: {}".format(", ".join(sorted(mkeys))))
if ukeys:
notes.append("unchanged: {}".format(", ".join(sorted(ukeys))))
if notes:
note = " <" + "; ".join(notes) + ">"
else:
note = ""
suboutputs = [patch(base, e.diff)]
else:
note = " "
suboutputs = [base]
return suboutputs, note
def make_inline_output_conflict(base_output, local_diff, remote_diff):
"""Make a list of outputs with conflict markers from conflicting
local and remote diffs applying to a single base_output"""
# Styling details
local_title = "local"
remote_title = "remote"
marker_size = 7 # default in git
m0 = "<"*marker_size
m1 = "="*marker_size
m2 = ">"*marker_size
# Split diffs by type
d0 = local_diff
d1 = remote_diff
lpatches = [e for e in d0 if e.op == DiffOp.PATCH]
rpatches = [e for e in d1 if e.op == DiffOp.PATCH]
linserts = [e for e in d0 if e.op == DiffOp.ADDRANGE]
rinserts = [e for e in d1 if e.op == DiffOp.ADDRANGE]
lremoves = [e for e in d0 if e.op == DiffOp.REMOVERANGE]
rremoves = [e for e in d1 if e.op == DiffOp.REMOVERANGE]
assert len(lpatches) + len(linserts) + len(lremoves) == len(d0)
assert len(rpatches) + len(rinserts) + len(rremoves) == len(d1)
# Collect new outputs with surrounding markers
outputs = []
# Collect inserts from both sides separately
if linserts or rinserts:
lnote = ""
loutputs = []
for e in linserts: # 0 or 1 item
loutputs.extend(e.valuelist)
rnote = ""
routputs = []
for e in rinserts: # 0 or 1 item
routputs.extend(e.valuelist)
outputs.append(output_marker("%s %s%s\n" % (m0, local_title, lnote)))
outputs.extend(loutputs)
outputs.append(output_marker("%s\n" % (m1,)))
outputs.extend(routputs)
outputs.append(output_marker("%s %s%s\n" % (m2, remote_title, rnote)))
# Keep base output if untouched (only inserts)
keep_base = not (lremoves or rremoves or lpatches or rpatches)
if lremoves and rremoves:
# Don't add anything
pass
elif not keep_base:
assert not (lremoves and lpatches)
assert not (rremoves and rpatches)
lnote = ""
rnote = ""
# Insert changed output with surrounding markers
loutputs, lnote = get_outputs_and_note(base_output, lremoves, lpatches)
routputs, rnote = get_outputs_and_note(base_output, rremoves, rpatches)
outputs.append(output_marker("%s %s%s\n" % (m0, local_title, lnote)))
outputs.extend(loutputs)
outputs.append(output_marker("%s\n" % (m1,)))
outputs.extend(routputs)
outputs.append(output_marker("%s %s%s\n" % (m2, remote_title, rnote)))
# Return marked up output
return outputs, keep_base
def make_inline_cell_conflict(base_cells, local_diff, remote_diff):
"""Make a list of outputs with conflict markers from conflicting
local and remote diffs.
base_cells should be the entire cells array of the base notebook.
"""
# Note: This is currently only used for conflicting, *non-similar*
# insertions/replacements.
assert 1 <= len(local_diff) <= 2
assert 1 <= len(remote_diff) <= 2
assert local_diff[0].op == DiffOp.ADDRANGE and remote_diff[0].op == DiffOp.ADDRANGE
assert len(local_diff) == 1 or local_diff[1].op == DiffOp.REMOVERANGE
assert len(remote_diff) == 1 or remote_diff[1].op == DiffOp.REMOVERANGE
# Styling details
local_title = "local"
remote_title = "remote"
marker_size = 7 # default in git
m0 = "<"*marker_size
m1 = "="*marker_size
m2 = ">"*marker_size
lremove = local_diff[1].length if len(local_diff) > 1 else 0
rremove = remote_diff[1].length if len(remote_diff) > 1 else 0
start = local_diff[0].key
lkeep = max(0, lremove - rremove)
rkeep = max(0, rremove - lremove)
lcells = local_diff[0].valuelist + base_cells[start : start + lkeep]
rcells = remote_diff[0].valuelist + base_cells[start : start + rkeep]
cells = []
cells.append(cell_marker("%s %s" % (m0, local_title)))
cells.extend(lcells)
cells.append(cell_marker("%s" % (m1,)))
cells.extend(rcells)
cells.append(cell_marker("%s %s" % (m2, remote_title)))
# Return marked up cells
return cells
def resolve_strategy_remove_outputs(base_path, outputs, decisions):
strategy = "remove"
decisions_by_index = bundle_decisions_by_index(base_path, decisions)
decisions.decisions = []
for key, decs in sorted(decisions_by_index.items()):
if not any(d.conflict for d in decs):
decisions.decisions.extend(decs)
else:
# Replace all decisions affecting key with resolution
local_diff, remote_diff = collect_diffs(base_path, decs)
if (
len(local_diff) == len(remote_diff) == 1 and
local_diff[0].op == remote_diff[0].op == DiffOp.ADDRANGE
):
# remove in add vs add is a no-op
custom_diff = []
else:
custom_diff = [op_removerange(key, 1)]
decisions.custom(base_path, local_diff, remote_diff,
custom_diff, conflict=False, strategy=strategy)
def resolve_strategy_inline_outputs(base_path, outputs, decisions):
strategy = "inline-outputs"
decisions_by_index = bundle_decisions_by_index(base_path, decisions)
decisions.decisions = []
for key, decs in sorted(decisions_by_index.items()):
if not any(d.conflict for d in decs):
decisions.decisions.extend(decs)
else:
# Replace all decisions affecting key with resolution
local_diff, remote_diff = collect_diffs(base_path, decs)
inlined_conflict, keep_base = make_inline_output_conflict(
outputs[key] if outputs else None, local_diff, remote_diff)
custom_diff = []
custom_diff += [op_addrange(key, inlined_conflict)]
if not keep_base:
custom_diff += [op_removerange(key, 1)]
decisions.custom(base_path, local_diff, remote_diff, custom_diff, conflict=True, strategy=strategy)
def resolve_strategy_record_conflicts(base_path, base, decisions):
strategy = "record-conflict"
decisions.decisions = [push_patch_decision(d, d.common_path[len(base_path):]) for d in decisions]
local_conflict_diffs, remote_conflict_diffs = collect_conflicting_diffs(base_path, decisions)
#local_diff, remote_diff = collect_diffs(base_path, decisions)
local_conflict_diffs = combine_patches(local_conflict_diffs)
remote_conflict_diffs = combine_patches(remote_conflict_diffs)
# Drop conflict decisions
#conflict_decisions = [d for d in decisions if d.conflict]
decisions.decisions = [d for d in decisions if not d.conflict]
# Record remaining conflicts in field nbdime-conflicts
conflicts_dict = {
"local_diff": local_conflict_diffs,
"remote_diff": remote_conflict_diffs,
# TODO: Record local and remote versions of full metadata, easier to manually select one?
#"base_metadata": base,
#"local_metadata": patch(base, local_diff),
#"remote_metadata": patch(base, remote_diff),
}
if "nbdime-conflicts" in base:
nbdime.log.warning("Replacing previous nbdime-conflicts field from base notebook.")
op = op_replace("nbdime-conflicts", conflicts_dict)
else:
nbdime.log.warning(
"Recording unresolved conflicts in %s/nbdime-conflicts.", join_path(base_path))
op = op_add("nbdime-conflicts", conflicts_dict)
custom_diff = [op]
decisions.custom(
base_path, local_conflict_diffs, remote_conflict_diffs, custom_diff,
conflict=True, strategy=strategy)
def resolve_strategy_inline_source(path, base, local_diff, remote_diff):
strategy = "inline-source"
decisions = MergeDecisionBuilder()
if local_diff is ParentDeleted:
# Add marker at top of cell, easier to clean up manually
local_diff = [op_addrange(0, ["<<<<<<< LOCAL CELL DELETED >>>>>>>\n"])]
decisions.local_then_remote(path, local_diff, remote_diff, conflict=True, strategy=strategy)
elif remote_diff is ParentDeleted:
# Add marker at top of cell, easier to clean up manually
remote_diff = [op_addrange(0, ["<<<<<<< REMOTE CELL DELETED >>>>>>>\n"])]
decisions.remote_then_local(path, local_diff, remote_diff, conflict=True, strategy=strategy)
else:
# This is another approach, replacing content with markers
# if local_diff is ParentDeleted:
# local = ""
# else:
# local = patch(base, local_diff)
# if remote_diff is ParentDeleted:
# remote = ""
# else:
# remote = patch(base, remote_diff)
# Run merge renderer on full sources
local = patch(base, local_diff)
remote = patch(base, remote_diff)
merged, status = merge_render(base, local, remote, None)
conflict = status != 0
assert path[-1] == "source"
custom_diff = [op_replace(path[-1], merged)]
decisions.custom(path[:-1],
[op_patch(path[-1], local_diff)],
[op_patch(path[-1], remote_diff)],
custom_diff, conflict=conflict, strategy=strategy)
return decisions
def resolve_strategy_inline_recurse(path, base, decisions):
strategy = "inline-cells"
old_decisions = decisions.decisions
decisions.decisions = []
for d in old_decisions:
if not d.conflict:
decisions.decisions.append(d)
continue
assert d.local_diff and d.remote_diff
laname, lpname = chunk_typename(d.local_diff)
raname, rpname = chunk_typename(d.remote_diff)
chunktype = laname + lpname + "/" + raname + rpname
if (chunktype not in ('AR/A', 'A/AR', 'A/A', 'AR/AR') or
d.common_path != ('cells',)):
decisions.decisions.append(d)
continue
if d.get('similar_insert', None) is None:
# Inserts not similar, cannot recurse. Markup block
cells = make_inline_cell_conflict(base, d.local_diff, d.remote_diff)
rdiff = []
if len(d.local_diff) > 1:
rdiff.append(d.local_diff[1])
elif len(d.remote_diff) > 1:
rdiff.append(d.remote_diff[1])
decisions.custom(path,
d.local_diff,
d.remote_diff,
[op_addrange(d.local_diff[0].key, cells)] + rdiff,
conflict=True,
strategy=strategy)
continue
# Should only be insert range vs insertion range here now:
# FIXME: Remove asserts when sure there are no missed corner cases:
assert chunktype == 'A/A', 'Unexpected chunk type: %r' % chunktype
# We have conflicting, similar inserts, should only be one cell per decision
if not (len(d.local_diff[0].valuelist) == len(d.remote_diff[0].valuelist) == 1):
raise AssertionError('Unexpected diff length. Expected both local and remote '
'inserts to have length 1, as they are assumed similar.')
custom_diff = []
# Diff of local to remote:
lr_diff = d.similar_insert
assert lr_diff[0].op == 'patch'
lcell = d.local_diff[0].valuelist[0]
rcell = d.remote_diff[0].valuelist[0]
assert lcell['cell_type'] == rcell['cell_type'], 'cell types cannot differ'
cell = {}
keys = [e.key for e in lr_diff[0].diff]
for k in lcell.keys():
if k not in keys:
# either identical, or one-way from local
cell[k] = lcell[k]
for k in rcell.keys():
if k not in keys and k not in lcell:
# one-way from remote
cell[k] = rcell[k]
for k in keys:
if k == 'source':
cell[k] = merge_render('', lcell[k], rcell[k], None)[0]
elif k == 'metadata':
cell[k] = {
"local_metadata": lcell[k],
"remote_metadata": rcell[k],
}
elif k == 'id':
cell[k] = {
"local_id": lcell[k],
"remote_id": rcell[k],
}
elif k == 'execution_count':
cell[k] = None # Clear
elif k == 'outputs':
cell[k] = []
# TODO: Do inline merge
pass
else:
raise ValueError('Conflict on unrecognized key: %r' % (k,))
custom_diff = [op_addrange(d.local_diff[0].key, [cell])]
decisions.custom(path,
d.local_diff,
d.remote_diff,
custom_diff,
conflict=True,
strategy=strategy)
def resolve_strategy_generic(path, decisions, strategy):
if not (strategy and strategy != "mergetool" and decisions.has_conflicted()):
return
if strategy.startswith("use-"):
# Example: /cells strategy use-local,
# patch/remove conflict on /cells/3,
# decision is changed to action=local
action = strategy.replace("use-", "")
for d in decisions:
# Resolve conflicts that aren't marked with an
# already applied strategy. This applies to
# at least the inline conflict strategies.
if d.conflict and not d.get("strategy"):
d.action = action
d.conflict = False
else:
msg = "Unexpected strategy {} on {}.".format(
strategy, join_path(path))
nbdime.log.error(msg)
def resolve_conflicted_decisions_list(path, base, decisions, strategy):
if not (strategy and strategy != "mergetool" and decisions.has_conflicted()):
return
if strategy == "inline-outputs":
# Affects conflicts on output dicts at /cells/*/outputs
resolve_strategy_inline_outputs(path, base, decisions)
elif strategy == "inline-cells":
# Affects conflicts on cell dicts at /cells/*
resolve_strategy_inline_recurse(path, base, decisions)
elif strategy == "remove":
# Affects conflicts on output dicts at /cells/*/outputs
resolve_strategy_remove_outputs(path, base, decisions)
elif strategy == "union":
action = "local_then_remote"
for d in decisions:
if d.conflict:
# do not to apply to subdecisions on dicts
if not isinstance(resolve_path(base, d.common_path[len(path):]), dict):
d.action = action
d.conflict = False
elif strategy == "clear-all":
# Old approach that relies on special handling in apply_decisions
# to deal with clear-all overriding other decisions:
# Collect local diffs and remote diffs from unresolved_conflicts
#local_conflict_diffs, remote_conflict_diffs = collect_conflicting_diffs(path, decisions)
#decisions.add_decision(path, "clear_all", local_conflict_diffs, remote_conflict_diffs, conflict=False)
# Just drop all decisions and add a decision to remove the entire range
local_diff, remote_diff = collect_diffs(path, decisions)
custom_diff = [op_removerange(0, len(base))]
decisions.decisions = []
decisions.custom(path, local_diff, remote_diff, custom_diff, conflict=False, strategy=strategy)
elif strategy == "clear":
# FIXME: Only getting here when base is a list of the lines in a string,
# actually resolved in resolve_conflicted_decisions_strings
# Avoid error message in generic
pass
else:
resolve_strategy_generic(path, decisions, strategy)
def resolve_conflicted_decisions_dict(path, base, decisions, strategy):
if not (strategy and strategy != "mergetool" and decisions.has_conflicted()):
return
if strategy == "record-conflict":
# affects conflicts on dicts at /***/metadata or below
resolve_strategy_record_conflicts(path, base, decisions)
elif strategy == "inline-attachments":
# affects conflicts on string at /cells/*/attachments or below
resolve_strategy_inline_attachments(path, base, decisions)
elif strategy == "union":
# union ops on dicts doesn't really work if they are decided to be conflicting
for d in decisions:
if d.conflict:
msg = "Unexpected strategy {} for dict on {}.".format(
strategy, join_path(path))
nbdime.log.error(msg)
else:
resolve_strategy_generic(path, decisions, strategy)
def resolve_conflicted_decisions_strings(path, decisions, strategy):
if not (strategy and strategy != "mergetool" and decisions.has_conflicted()):
return
if strategy == "clear":
for d in decisions:
if d.conflict and not d.get("strategy"):
d.action = "clear"
d.conflict = False
elif strategy == "inline-source":
# Avoid error message in generic
pass
else:
resolve_strategy_generic(path, decisions, strategy)
| |