RAMification + pattern matching: put typing information straight into the Vertices, as a Python attribute (don't put it in separate Vertices+Edges).

2024-09-10 13:18:14 +02:00 · 2024-09-10 13:18:14 +02:00 · ae5eaedb4b
commit ae5eaedb4b
parent 700a4d103f
8 changed files with 284 additions and 129 deletions
--- a/pattern_matching/benchmark.py
+++ b/pattern_matching/benchmark.py
@ -29,7 +29,7 @@ def run_benchmark(jhost, jguest, shost, sguest, expected=None):

    # benchmark Joeri
    m = j.MatcherVF2(host, guest,
-        lambda g_val, h_val: g_val == h_val) # all vertices can be matched
+        lambda g_vtx, h_vtx: g_vtx.value == h_vtx.value) # all vertices can be matched
    iterations = 50
    print(" Patience (joeri)...")
    for n in range(iterations):
--- a/pattern_matching/matcher.py
+++ b/pattern_matching/matcher.py
@ -57,9 +57,9 @@ class Edge:

    def __repr__(self):
        if self.label != None:
-            return f"E({self.src}--{self.label}->{self.tgt})"
+            return f"({self.src}--{self.label}->{self.tgt})"
        else:
-            return f"E({self.src}->{self.tgt})"
+            return f"({self.src}->{self.tgt})"

 class MatcherState:
    def __init__(self):
@ -133,12 +133,9 @@ class MatcherVF2:
        self.guest = guest
        self.compare_fn = compare_fn

-        with Timer("find_connected_components - host"):
-            self.host_vtx_to_component, self.host_component_to_vtxs = find_connected_components(host)
        with Timer("find_connected_components - guest"):
            self.guest_vtx_to_component, self.guest_component_to_vtxs = find_connected_components(guest)

-        print("number of host connected components:", len(self.host_component_to_vtxs))
        print("number of guest connected components:", len(self.guest_component_to_vtxs))

    def match(self):
@ -201,9 +198,9 @@ class MatcherVF2:
                        if h_candidate_edge in state.r_mapping_edges:
                            print_debug("  skip, host edge already matched")
                            continue # skip already matched host edge
-                        h_candidate_vtx = read_edge(h_candidate_edge, direction)
                        print_debug('grow edge', g_candidate_edge, ':', h_candidate_edge, id(g_candidate_edge), id(h_candidate_edge))
                        new_state = state.grow_edge(h_candidate_edge, g_candidate_edge)
+                        h_candidate_vtx = read_edge(h_candidate_edge, direction)
                        yield from attempt_match_vtxs(
                            new_state,
                            g_candidate_vtx,
@ -231,7 +228,7 @@ class MatcherVF2:
            if g_indegree > h_indegree:
                print_debug("  nope, indegree")
                return
-            if not self.compare_fn(g_candidate_vtx.value, h_candidate_vtx.value):
+            if not self.compare_fn(g_candidate_vtx, h_candidate_vtx):
                print_debug("  nope, bad compare")
                return
            new_state = state.grow_vtx(
@ -288,7 +285,7 @@ if __name__ == "__main__":
        # Edge(guest.vtxs[1], guest.vtxs[0]),
    ]

-    m = MatcherVF2(host, guest, lambda g_val, h_val: eval(g_val, {}, {'v':h_val}))
+    m = MatcherVF2(host, guest, lambda g_vtx, h_vtx: eval(g_vtx.value, {}, {'v':h_vtx.value}))
    import time
    durations = 0
    iterations = 1
@ -332,7 +329,7 @@ if __name__ == "__main__":
        Edge(guest.vtxs[1], guest.vtxs[0]),
    ]

-    m = MatcherVF2(host, guest, lambda g_val, h_val: g_val == h_val)
+    m = MatcherVF2(host, guest, lambda g_vtx, h_vtx: g_vtx.value == h_vtx.value)
    import time
    durations = 0
    iterations = 1
--- a/pattern_matching/mvs_adapter.py
+++ b/pattern_matching/mvs_adapter.py
@ -2,63 +2,117 @@ from state.base import State
 from uuid import UUID
 from services.bottom.V0 import Bottom
 from services.scd import SCD
+from services.od import OD
 from pattern_matching.matcher import Graph, Edge, Vertex
 import itertools
 import re
+import functools

 from util.timer import Timer

+from services.primitives.integer_type import Integer
+
 class _is_edge:
    def __repr__(self):
        return "EDGE"
+    def to_json(self):
+        return "EDGE"
 # just a unique symbol that is only equal to itself
 IS_EDGE = _is_edge()

+class _is_modelref:
+    def __repr__(self):
+        return "REF"
+    def to_json(self):
+        return "REF"
+IS_MODELREF = _is_modelref()
+
 class IS_TYPE:
    def __init__(self, type):
        # mvs-node of the type
        self.type = type
-
    def __repr__(self):
        return f"TYPE({str(self.type)[-4:]})"

-    # def __eq__(self, other):
-    #     if not isinstance(other, IS_TYPE):
-    #         return False
-    #     return other.type == self.type
+class NamedNode(Vertex):
+    def __init__(self, value, name):
+        super().__init__(value)
+        # the name of the node in the context of the model
+        # the matcher by default ignores this value
+        self.name = name

-    # def __hash__(self):
-    #     return self.type.__hash__()
+# MVS-nodes become vertices
+class MVSNode(NamedNode):
+    def __init__(self, value, node_id, name):
+        super().__init__(value, name)
+        # useful for debugging
+        self.node_id = node_id
+    def __repr__(self):
+        if self.value == None:
+            return f"N({self.name})"
+        if isinstance(self.value, str):
+            return f"N({self.name}=\"{self.value}\")"
+        return f"N({self.name}={self.value})"
+        # if isinstance(self.value, str):
+        #     return f"N({self.name}=\"{self.value}\",{str(self.node_id)[-4:]})"
+        # return f"N({self.name}={self.value},{str(self.node_id)[-4:]})"

+# MVS-edges become vertices.
+class MVSEdge(NamedNode):
+    def __init__(self, node_id, name):
+        super().__init__(IS_EDGE, name)
+        # useful for debugging
+        self.node_id = node_id
+    def __repr__(self):
+        return f"E({self.name})"
+        # return f"E({self.name}{str(self.node_id)[-4:]})"

+# dirty way of detecting whether a node is a ModelRef
 UUID_REGEX = re.compile(r"[0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z]-[0-9a-z][0-9a-z][0-9a-z][0-9a-z]-[0-9a-z][0-9a-z][0-9a-z][0-9a-z]-[0-9a-z][0-9a-z][0-9a-z][0-9a-z]-[0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z]")

-
 # Converts an object/class diagram in MVS state to the pattern matcher graph type
 # ModelRefs are flattened
-def model_to_graph(state: State, model: UUID):
+def model_to_graph(state: State, model: UUID, metamodel: UUID):
    with Timer("model_to_graph"):
+        od = OD(model, metamodel, state)
+        scd = SCD(model, state)
+        scd_mm = SCD(metamodel, state)
+
        bottom = Bottom(state)

        graph = Graph()

        mvs_edges = []
        modelrefs = {}
-        def extract_modelref(el):
-            value = bottom.read_value(el)
-            # If the value of the el is a ModelRef (only way to detect this is to match a regex - not very clean), then extract it. We'll create a link to the referred model later.
+        # constraints = {}
+
+        def to_vtx(el, name):
+            # print("name:", name)
            if bottom.is_edge(el):
+                # if filter_constraint:
+                #     try:
+                #         supposed_obj = bottom.read_edge_source(el)
+                #         slot_node = od.get_slot(supposed_obj, "constraint")
+                #         if el == slot_node:
+                #             # `el` is the constraint-slot
+                #             constraints[supposed_obj] = el
+                #             return
+                #     except:
+                #         pass
                mvs_edges.append(el)
-                return IS_EDGE
+                return MVSEdge(el, name)
+            # If the value of the el is a ModelRef (only way to detect this is to match a regex - not very clean), then extract it. We'll create a link to the referred model later.
+            value = bottom.read_value(el)
            if isinstance(value, str):
                if UUID_REGEX.match(value) != None:
                    # side-effect
                    modelrefs[el] = UUID(value)
-                    return None
-            return value
+                    return MVSNode(IS_MODELREF, el, name)
+            return MVSNode(value, el, name)

        # MVS-Nodes become vertices
-        uuid_to_vtx = { node: Vertex(value=extract_modelref(node)) for node in bottom.read_outgoing_elements(model) }
+        uuid_to_vtx = { node: to_vtx(node, key) for key in bottom.read_keys(model) for node in bottom.read_outgoing_elements(model, key) }
+        uuid_to_vtx = { key: val for key,val in uuid_to_vtx.items() if val != None }
        graph.vtxs = [ vtx for vtx in uuid_to_vtx.values() ]

        # For every MSV-Edge, two edges are created (for src and tgt)
@ -72,14 +126,18 @@ def model_to_graph(state: State, model: UUID):
            mvs_tgt = bottom.read_edge_target(mvs_edge)
            if mvs_tgt in uuid_to_vtx:
                graph.edges.append(Edge(
-                    src=uuid_to_vtx[mvs_tgt],
-                    tgt=uuid_to_vtx[mvs_edge],
+                    src=uuid_to_vtx[mvs_edge],
+                    tgt=uuid_to_vtx[mvs_tgt],
                    label="tgt"))


        for node, ref in modelrefs.items():
+            # Get MM of ref'ed model
+            type_node, = bottom.read_outgoing_elements(node, "Morphism")
+            print("modelref type node:", type_node)
+
            # Recursively convert ref'ed model to graph
-            ref_model = model_to_graph(state, ref)
+            ref_model = model_to_graph(state, ref, type_node)

            # Flatten and create link to ref'ed model
            graph.vtxs += ref_model.vtxs
@ -91,44 +149,61 @@ def model_to_graph(state: State, model: UUID):

        def add_types(node):
            type_node, = bottom.read_outgoing_elements(node, "Morphism")
-            print('node', node, 'has type', type_node)
+
+            # Put the type straigt into the Vertex-object
+            uuid_to_vtx[node].typ = type_node
+
+            # We used to put the types in separate nodes, but we no longer do this:
+
+            # print('node', node, 'has type', type_node)
            # We create a Vertex storing the type
-            type_vertex = Vertex(value=IS_TYPE(type_node))
-            graph.vtxs.append(type_vertex)
-            type_edge = Edge(
-                src=uuid_to_vtx[node],
-                tgt=type_vertex,
-                label="type")
-            print(type_edge)
-            graph.edges.append(type_edge)
+            # type_vertex = Vertex(value=IS_TYPE(type_node))
+            # graph.vtxs.append(type_vertex)
+            # type_edge = Edge(
+            #     src=uuid_to_vtx[node],
+            #     tgt=type_vertex,
+            #     label="type")
+            # # print(type_edge)
+            # graph.edges.append(type_edge)


-        # Add typing information of classes, attributes, and associations
-        scd = SCD(model, state)
-        for name,node in scd.get_classes().items():
-            add_types(node)
-            for attr_name,attr_node in scd.get_attributes(name):
-                add_types(attr_node)
-        for _,node in scd.get_associations().items():
-            add_types(node)
+        # Add typing information for:
+        #   - classes
+        #   - attributes
+        #   - associations
+        for class_name, class_node in scd_mm.get_classes().items():
+            objects = scd.get_typed_by(class_node)
+            # print("typed by:", class_name, objects)
+            for obj_name, obj_node in objects.items():
+                add_types(obj_node)
+            for attr_name, attr_node in scd_mm.get_attributes(class_name).items():
+                attrs = scd.get_typed_by(attr_node)
+                for slot_name, slot_node in attrs.items():
+                    add_types(slot_node)
+        for assoc_name, assoc_node in scd_mm.get_associations().items():
+            objects = scd.get_typed_by(assoc_node)
+            # print("typed by:", assoc_name, objects)
+            for link_name, link_node in objects.items():
+                add_types(link_node)

        return graph

 # Function object for pattern matching. Decides whether to match host and guest vertices, where guest is a RAMified instance (e.g., the attributes are all strings with Python expressions), and the host is an instance (=object diagram) of the original model (=class diagram)
 class RAMCompare:
-    def __init__(self, bottom):
+    def __init__(self, bottom, host_od):
        self.bottom = bottom
+        self.host_od = host_od

        type_model_id = bottom.state.read_dict(bottom.state.read_root(), "SCD")
        self.scd_model = UUID(bottom.state.read_value(type_model_id))

    def is_subtype_of(self, supposed_subtype: UUID, supposed_supertype: UUID):
-        inheritance_node, = self.bottom.read_outgoing_elements(self.scd_model, "Inheritance")
-
        if supposed_subtype == supposed_supertype:
            # reflexive:
            return True

+        inheritance_node, = self.bottom.read_outgoing_elements(self.scd_model, "Inheritance")
+
        for outgoing in self.bottom.read_outgoing_edges(supposed_subtype):
            if inheritance_node in self.bottom.read_outgoing_elements(outgoing, "Morphism"):
                # 'outgoing' is an inheritance link
@ -139,32 +214,71 @@ class RAMCompare:

        return False

-    def __call__(self, g_val, h_val):
-        if g_val == None:
-            return h_val == None
+    def has_subtype(self, g_vtx_type, h_vtx_type):
+        g_vtx_original_types = self.bottom.read_outgoing_elements(g_vtx_type, "RAMifies")
+        for typ in g_vtx_original_types:
+            # print(g_vtx, "is ramified")
+            result = self.is_subtype_of(h_vtx_type, g_vtx_original_types[0])
+            if result:
+                return True
+        else:
+            # print(g_vtx, "is not ramified")
+            return False
+
+
+    # Memoizing the result of comparison gives a huge performance boost!
+    # Especially `is_subtype_of` is very slow, and will be performed many times over on the same pair of nodes during the matching process.
+    @functools.cache
+    def __call__(self, g_vtx, h_vtx):
+        # First check if the types match (if we have type-information)
+        if hasattr(g_vtx, 'typ'):
+            if not hasattr(h_vtx, 'typ'):
+                return False
+            return self.has_subtype(g_vtx.typ, h_vtx.typ)
+
+        # Then, match by value
+
+        if g_vtx.value == None:
+            return h_vtx.value == None

        # mvs-edges (which are converted to vertices) only match with mvs-edges
-        if g_val == IS_EDGE:
-            return h_val == IS_EDGE
+        if g_vtx.value == IS_EDGE:
+            return h_vtx.value == IS_EDGE

-        if h_val == IS_EDGE:
+        if h_vtx.value == IS_EDGE:
            return False

-        # types only match with their supertypes
-        # we assume that 'RAMifies'-traceability links have been created between guest and host types
-        # we need these links, because the guest types are different types (RAMified)
-        if isinstance(g_val, IS_TYPE):
-            if not isinstance(h_val, IS_TYPE):
-                return False
-            g_val_original_types = self.bottom.read_outgoing_elements(g_val.type, "RAMifies")
-            if len(g_val_original_types) > 0:
-                result = self.is_subtype_of(h_val.type, g_val_original_types[0])
-                return result
-            else:
-                return False
+        if g_vtx.value == IS_MODELREF:
+            return h_vtx.value == IS_MODELREF

-        if isinstance(h_val, IS_TYPE):
+        if h_vtx.value == IS_MODELREF:
            return False

-        # print(g_val, h_val)
-        return eval(g_val, {}, {'v': h_val})
+        # # types only match with their supertypes
+        # # we assume that 'RAMifies'-traceability links have been created between guest and host types
+        # # we need these links, because the guest types are different types (RAMified)
+        # if isinstance(g_vtx.value, IS_TYPE):
+        #     if not isinstance(h_vtx.value, IS_TYPE):
+        #         return False
+        #     return self.has_subtype(g_vtx.value.type, h_vtx.value.type)
+
+        # if isinstance(h_vtx.value, IS_TYPE):
+        #     return False
+
+        # print(g_vtx.value, h_vtx.value)
+        def get_slot(h_vtx, slot_name: str):
+            slot_node = self.host_od.get_slot(h_vtx.node_id, slot_name)
+            return slot_node
+
+        def read_int(slot: UUID):
+            i = Integer(slot, self.bottom.state)
+            return i.read()
+
+        try:
+            return eval(g_vtx.value, {}, {
+                'v': h_vtx.value,
+                'get_slot': functools.partial(get_slot, h_vtx),
+                'read_int': read_int,
+            })
+        except Exception as e:
+            return False