Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
247 changes: 247 additions & 0 deletions flocker/control/_diffing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
# Copyright ClusterHQ Inc. See LICENSE file for details.
# -*- test-case-name: flocker.control.test.test_diffing -*-

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add a module docstring.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

from pyrsistent import (
PClass,
PMap,
PVector,
PSet,
field,
freeze,
pvector,
)

from zope.interface import Interface, implementer


class _DiffChange(Interface):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The naming convention for interfaces is IDiffChange

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Chose _IDiffChange as this is a private interface.

"""
Interface for a diff change.

This is simply something that can be applied to an object to create a new
object.

This interface is created more to simply documentation than for any of the
actual zope.interface mechanisms.
"""
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Typo "more to simply documentation"
And maybe drop the word "simply" from the line above while you're here.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


def apply(obj):
"""
Apply this diff change to the passed in object and return a new object
that is obj with the ``self`` diff applied.

:param object obj: The object to apply the diff to.

:returns: A new object that is the passed in object with the diff
applied.
"""
pass
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need for pass if there's a docstring.



@implementer(_DiffChange)
class _Remove(PClass):
"""
A ``_DiffChange`` that removes an object from a ``PSet`` or a key from a
``PMap`` inside a nested object tree.

:ivar path: The path in the nested object tree of the object to be removed
from the import set.
"""

path = field(
type=PVector,
factory=freeze
)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A little funky because I don't care what the type of the members of the field are... but that might express what this is better.


def apply(self, obj):
obj_path = self.path[:-1]
removal_path = self.path[-1]
return obj.transform(obj_path, lambda o: o.remove(removal_path))


@implementer(_DiffChange)
class _Set(PClass):
"""
A ``_DiffChange`` that sets a field in a ``PClass`` or sets a key in a
``PMap``.

:ivar path: The path in the nested object to the field/key to be set to a
new value.

:ivar val: The value to set the field/key to.
"""
path = field(
type=PVector,
factory=freeze
)
val = field()

def apply(self, obj):
return obj.transform(self.path, self.val)


@implementer(_DiffChange)
class _Add(PClass):
"""
A ``_DiffChange`` that adds an item to a ``PSet``.

:ivar path: The path to the set to which the item will be added.

:ivar item: The item to be added to the set.
"""
path = field(
type=PVector,
factory=freeze
)
item = field()

def apply(self, obj):
return obj.transform(self.path, lambda x: x.add(self.item))


@implementer(_DiffChange)
class _Diff(PClass):
"""
A ``_DiffChange`` that is simply the serial application of other diff
changes.

This is the object that external modules get and use to apply diffs to
objects.

:ivar changes: A vector of ``_DiffChange`` s that represent a diff between
two objects.
"""

changes = field(
type=PVector,
factory=freeze
)

def apply(self, obj):
for c in self.changes:
obj = c.apply(obj)
return obj
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❓ This looks like a situation where it'd be faster to operate on obj.evolver() rather than the immutable object.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Noted.

Turns out Evolvers don't actually have a .transform method, so this change is a bit non-trivial.

Something similar would be to aggregate all of the transforms, and then apply them all in a single call to obj.transform.

But... That seems like the sort of change that can be done in a subsequent review, and can be done on-demand if required by performance analysis.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sarum90 's comment here is related to the issue described in tobgu/pyrsistent#89 and offers a possible alternative implementation than #2839



def _create_diffs_for_sets(current_path, set_a, set_b):
"""
Computes a series of ``_DiffChange`` s to turn ``set_a`` into ``set_b``
assuming that these sets are at ``current_path`` inside a nested pyrsistent
object.

:param current_path: And iterable of pyrsistent object describing the path
inside the root pyrsistent object where the other arguments are
located. See ``PMap.transform`` for the format of this sort of path.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo "And iterable of pyrsistent object"

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.


:param set_a: The desired input set.

:param set_b: The desired output set.

:returns: An iterable of ``_DiffChange`` s that will turn ``set_a`` into
``set_b``.
"""
resulting_diffs = pvector([]).evolver()
for item in set_a.difference(set_b):
resulting_diffs.append(
_Remove(path=current_path.append(item))
)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❓ Might be neat if _Remove and _Add had the same signature.
_Remove(path=current_path, item=item) should work I think.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea that's much cleaner. Nice catch! Not sure what I was thinking.

for item in set_b.difference(set_a):
resulting_diffs.append(
_Add(path=current_path, item=item)
)
return resulting_diffs.persistent()


def _create_diffs_for_mappings(current_path, mapping_a, mapping_b):
"""
Computes a series of ``_DiffChange`` s to turn ``mapping_a`` into
``mapping_b`` assuming that these mappings are at ``current_path`` inside a
nested pyrsistent object.

:param current_path: And iterable of pyrsistent object describing the path
inside the root pyrsistent object where the other arguments are
located. See ``PMap.transform`` for the format of this sort of path.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same typo here.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.


:param mapping_a: The desired input mapping.

:param mapping_b: The desired output mapping.

:returns: An iterable of ``_DiffChange`` s that will turn ``mapping_a``
into ``mapping_b``.
"""
resulting_diffs = pvector([]).evolver()
a_keys = frozenset(x for x in mapping_a.iterkeys())
b_keys = frozenset(x for x in mapping_b.iterkeys())
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❓ Why not frozenset(mapping_a.keys()) ?

Copy link
Copy Markdown
Contributor Author

@sarum90 sarum90 Jun 2, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea not sure what I was doing here. much better your way.

for key in a_keys.intersection(b_keys):
if mapping_a[key] != mapping_b[key]:
resulting_diffs.extend(
_create_diffs_for(
current_path.append(key),
mapping_a[key],
mapping_b[key]
)
)
for key in b_keys.difference(a_keys):
resulting_diffs.append(
_Set(path=current_path.append(key), val=mapping_b[key])
)
for key in a_keys.difference(b_keys):
resulting_diffs.append(
_Remove(path=current_path.append(key))
)
return resulting_diffs.persistent()


def _create_diffs_for(current_path, subobj_a, subobj_b):
"""
Computes a series of ``_DiffChange`` s to turn ``subobj_a`` into
``subobj_b`` assuming that these subobjs are at ``current_path`` inside a
nested pyrsistent object.

:param current_path: And iterable of pyrsistent object describing the path
inside the root pyrsistent object where the other arguments are
located. See ``PMap.transform`` for the format of this sort of path.

:param subobj_a: The desired input sub object.

:param subobj_b: The desired output sub object.

:returns: An iterable of ``_DiffChange`` s that will turn ``subobj_a``
into ``subobj_b``.
"""
if subobj_a == subobj_b:
return pvector([])
elif type(subobj_a) != type(subobj_b):
return pvector([_Set(path=current_path, val=subobj_b)])
elif isinstance(subobj_a, PClass) and isinstance(subobj_b, PClass):
a_dict = subobj_a._to_dict()
b_dict = subobj_b._to_dict()
return _create_diffs_for_mappings(current_path, a_dict, b_dict)
elif isinstance(subobj_a, PMap) and isinstance(subobj_b, PMap):
return _create_diffs_for_mappings(
current_path, subobj_a, subobj_b)
elif isinstance(subobj_a, PSet) and isinstance(subobj_b, PSet):
return _create_diffs_for_sets(
current_path, subobj_a, subobj_b)
return pvector([_Set(path=current_path, val=subobj_b)])
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❓ I'm having to think about this default. Maybe add an explanatory comment.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And coverage shows some missing coverage in this function.

coverage run --source flocker.control._diffing --branch $(type -p trial) flocker.control.test.test_diffing
(4421) (FLOC-4421-flocker-diffing ? =)[~/.../HybridLogic/flocker]$ coverage report
Name                          Stmts   Miss Branch BrPart  Cover
---------------------------------------------------------------
flocker/control/_diffing.py      68      4     40      3    94%

>     if subobj_a == subobj_b:
!         return pvector([])
>     elif type(subobj_a) != type(subobj_b):
!         return pvector([_Set(path=current_path, val=subobj_b)])
>     elif isinstance(subobj_a, PClass) and isinstance(subobj_b, PClass):
>         a_dict = subobj_a._to_dict()
>         b_dict = subobj_b._to_dict()
>         return _create_diffs_for_mappings(current_path, a_dict, b_dict)
>     elif isinstance(subobj_a, PMap) and isinstance(subobj_b, PMap):
>         return _create_diffs_for_mappings(
>             current_path, subobj_a, subobj_b)
>     elif isinstance(subobj_a, PSet) and isinstance(subobj_b, PSet):
>         return _create_diffs_for_sets(
>             current_path, subobj_a, subobj_b)
!     return pvector([_Set(path=current_path, val=subobj_b)])

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh! Thanks for that idea. I was trying to get away with just using hypothesis... but I should absolutely add smaller unit tests, and I'll use coverage to verify line coverage with those tests.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added some unit tests:

Name                          Stmts   Miss Branch BrPart  Cover
---------------------------------------------------------------
flocker/control/_diffing.py      67      0     36      0   100%

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And a comment



def create_diff(object_a, object_b):
"""
Constructs a diff from ``object_a`` to ``object_b``

:param object_a: The desired input object.

:param object_b: The desired output object.

:returns: A ``_Diff`` that will convert ``object_a`` into ``object_b``
when applied.
"""
changes = _create_diffs_for(pvector([]), object_a, object_b)
return _Diff(changes=changes)


# Ensure that the representation of a ``_Diff`` is entirely serializable:
DIFF_SERIALIZABLE_CLASSES = [
_Set, _Remove, _Add, _Diff
]
6 changes: 4 additions & 2 deletions flocker/control/_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@

from zope.interface import Interface, implementer

from ._diffing import DIFF_SERIALIZABLE_CLASSES


def _sequence_field(checked_class, suffix, item_type, optional, initial):
"""
Expand Down Expand Up @@ -1235,5 +1237,5 @@ def get_information_wipe(self):
Deployment, Node, DockerImage, Port, Link, RestartNever, RestartAlways,
RestartOnFailure, Application, Dataset, Manifestation, AttachedVolume,
NodeState, DeploymentState, NonManifestDatasets, Configuration,
Lease, Leases, PersistentState,
]
Lease, Leases, PersistentState
] + DIFF_SERIALIZABLE_CLASSES
123 changes: 123 additions & 0 deletions flocker/control/test/test_diffing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
# Copyright ClusterHQ Inc. See LICENSE file for details.

"""
Tests for ``flocker.node._diffing``.
"""

from uuid import uuid4

from hypothesis import given

from .._diffing import create_diff
from .._persistence import wire_encode, wire_decode
from .._model import Node, Port
from ..testtools import (
application_strategy,
deployment_strategy
)

from ...testtools import TestCase

from testtools.matchers import Equals, LessThan


class DeploymentDiffTest(TestCase):
"""
Tests for creating and applying diffs between deployments.
"""

@given(
deployment_strategy(),
deployment_strategy(),
)
def test_deployment_diffing(self, deployment_a, deployment_b):
"""
Diffing two arbitrary deployments, then applying the diff to the first
deployment yields the second even after the diff has been serialized
and re-created.
"""
diff = create_diff(deployment_a, deployment_b)
serialized_diff = wire_encode(diff)
newdiff = wire_decode(serialized_diff)
should_b_b = newdiff.apply(deployment_a)
self.assertThat(
should_b_b,
Equals(deployment_b)
)

def test_deployment_diffing_smart(self):
"""
Small modifications to a deployment have diffs that are small. Their
reverse is also small.
"""
# Any large deployment will do, just use hypothesis for convenience of
# generating a large deployment.
deployment = deployment_strategy(min_number_of_nodes=90).example()

new_nodes = list(Node(uuid=uuid4()) for _ in xrange(4))
d = reduce(lambda x, y: x.update_node(y), new_nodes, deployment)
encoded_deployment = wire_encode(deployment)

diff = create_diff(deployment, d)
encoded_diff = wire_encode(diff)
self.assertThat(
len(encoded_diff),
LessThan(len(encoded_deployment)/2)
)
self.assertThat(
wire_decode(encoded_diff).apply(deployment),
Equals(d)
)

removal_diff = create_diff(d, deployment)
encoded_removal_diff = wire_encode(removal_diff)
self.assertThat(
len(encoded_removal_diff),
LessThan(len(encoded_deployment)/2)
)
self.assertThat(
wire_decode(encoded_removal_diff).apply(d),
Equals(deployment)
)

def test_set_diffing_smart(self):
"""
Small modifications to a sets have diffs that are small. Their reverse
is also small.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

typo "to a sets"

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

"""
# Any Application with a large set of ports will do, just use
# hypothesis for convenience of generating a large number of ports on
# an application.
application = application_strategy(min_number_of_ports=1000).example()

new_ports = list(
Port(internal_port=i, external_port=i) for i in xrange(4)
)
a = reduce(
lambda x, y: x.transform(['ports'], lambda x: x.add(y)),
new_ports,
application
)
encoded_application = wire_encode(application)

diff = create_diff(application, a)
encoded_diff = wire_encode(diff)
self.assertThat(
len(encoded_diff),
LessThan(len(encoded_application)/2)
)
self.assertThat(
wire_decode(encoded_diff).apply(application),
Equals(a)
)

removal_diff = create_diff(a, application)
encoded_removal_diff = wire_encode(removal_diff)
self.assertThat(
len(encoded_removal_diff),
LessThan(len(encoded_application)/2)
)
self.assertThat(
wire_decode(encoded_removal_diff).apply(a),
Equals(application)
)
Loading