Rename matcher modules to make more sense

2024-11-13 11:10:31 +01:00 · 2024-11-13 11:10:31 +01:00 · 1845c3deb9
commit 1845c3deb9
parent 42757ddc4f
6 changed files with 15 additions and 10 deletions
--- a/transformation/matcher.py
+++ b/transformation/matcher.py
@ -0,0 +1,371 @@
+from api.cd import CDAPI
+from api.od import ODAPI, bind_api_readonly
+from util.eval import exec_then_eval
+from state.base import State
+from uuid import UUID
+from services.bottom.V0 import Bottom
+from services.scd import SCD
+from services import od as services_od
+from transformation.vf2 import Graph, Edge, Vertex, MatcherVF2
+from transformation import ramify
+import itertools
+import re
+import functools
+
+from util.timer import Timer
+
+class _is_edge:
+    def __repr__(self):
+        return "EDGE"
+    def to_json(self):
+        return "EDGE"
+# just a unique symbol that is only equal to itself
+IS_EDGE = _is_edge()
+
+class _is_modelref:
+    def __repr__(self):
+        return "REF"
+    def to_json(self):
+        return "REF"
+IS_MODELREF = _is_modelref()
+
+# class IS_TYPE:
+#     def __init__(self, type):
+#         # mvs-node of the type
+#         self.type = type
+#     def __repr__(self):
+#         return f"TYPE({str(self.type)[-4:]})"
+
+class NamedNode(Vertex):
+    def __init__(self, value, name):
+        super().__init__(value)
+        # the name of the node in the context of the model
+        # the matcher by default ignores this value
+        self.name = name
+
+# MVS-nodes become vertices
+class MVSNode(NamedNode):
+    def __init__(self, value, node_id, name):
+        super().__init__(value, name)
+        # useful for debugging
+        self.node_id = node_id
+    def __repr__(self):
+        if self.value == None:
+            return f"N({self.name})"
+        if isinstance(self.value, str):
+            return f"N({self.name}=\"{self.value}\")"
+        return f"N({self.name}={self.value})"
+        # if isinstance(self.value, str):
+        #     return f"N({self.name}=\"{self.value}\",{str(self.node_id)[-4:]})"
+        # return f"N({self.name}={self.value},{str(self.node_id)[-4:]})"
+
+# MVS-edges become vertices.
+class MVSEdge(NamedNode):
+    def __init__(self, node_id, name):
+        super().__init__(IS_EDGE, name)
+        # useful for debugging
+        self.node_id = node_id
+    def __repr__(self):
+        return f"E({self.name})"
+        # return f"E({self.name}{str(self.node_id)[-4:]})"
+
+# dirty way of detecting whether a node is a ModelRef
+UUID_REGEX = re.compile(r"[0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z]-[0-9a-z][0-9a-z][0-9a-z][0-9a-z]-[0-9a-z][0-9a-z][0-9a-z][0-9a-z]-[0-9a-z][0-9a-z][0-9a-z][0-9a-z]-[0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z]")
+
+# Converts an object diagram in MVS state to the pattern matcher graph type
+# ModelRefs are flattened
+def model_to_graph(state: State, model: UUID, metamodel: UUID,
+    _filter=lambda node: True, prefix=""):
+    # with Timer("model_to_graph"):
+        od = services_od.OD(model, metamodel, state)
+        scd = SCD(model, state)
+        scd_mm = SCD(metamodel, state)
+
+        bottom = Bottom(state)
+
+        graph = Graph()
+
+        mvs_edges = []
+        modelrefs = {}
+        # constraints = {}
+
+        names = {}
+
+        def to_vtx(el, name):
+            # print("name:", name)
+            if bottom.is_edge(el):
+                # if filter_constraint:
+                #     try:
+                #         supposed_obj = bottom.read_edge_source(el)
+                #         slot_node = od.get_slot(supposed_obj, "constraint")
+                #         if el == slot_node:
+                #             # `el` is the constraint-slot
+                #             constraints[supposed_obj] = el
+                #             return
+                #     except:
+                #         pass
+                mvs_edges.append(el)
+                edge = MVSEdge(el, name)
+                names[name] = edge
+                return edge
+            # If the value of the el is a ModelRef (only way to detect this is to match a regex - not very clean), then extract it. We'll create a link to the referred model later.
+            value = bottom.read_value(el)
+            if isinstance(value, str):
+                if UUID_REGEX.match(value) != None:
+                    # side-effect
+                    modelrefs[el] = (UUID(value), name)
+                    return MVSNode(IS_MODELREF, el, name)
+            node = MVSNode(value, el, name)
+            names[name] = node
+            return node
+
+        # Objects and Links become vertices
+        uuid_to_vtx = { node: to_vtx(node, prefix+key) for key in bottom.read_keys(model) for node in bottom.read_outgoing_elements(model, key) if _filter(node) }
+        graph.vtxs = [ vtx for vtx in uuid_to_vtx.values() ]
+
+        # For every Link, two edges are created (for src and tgt)
+        for mvs_edge in mvs_edges:
+            mvs_src = bottom.read_edge_source(mvs_edge)
+            if mvs_src in uuid_to_vtx:
+                graph.edges.append(Edge(
+                    src=uuid_to_vtx[mvs_src],
+                    tgt=uuid_to_vtx[mvs_edge],
+                    label="outgoing"))
+            mvs_tgt = bottom.read_edge_target(mvs_edge)
+            if mvs_tgt in uuid_to_vtx:
+                graph.edges.append(Edge(
+                    src=uuid_to_vtx[mvs_edge],
+                    tgt=uuid_to_vtx[mvs_tgt],
+                    label="tgt"))
+
+
+        for node, (ref_m, name) in modelrefs.items():
+            vtx = uuid_to_vtx[node]
+
+            # Get MM of ref'ed model
+            ref_mm, = bottom.read_outgoing_elements(node, "Morphism")
+            # print("modelref type node:", type_node)
+
+            # Recursively convert ref'ed model to graph
+            # ref_graph = model_to_graph(state, ref_m, ref_mm, prefix=name+'/')
+
+            vtx.modelref = (ref_m, ref_mm)
+
+            # We no longer flatten:
+
+            # # Flatten and create link to ref'ed model
+            # graph.vtxs += ref_model.vtxs
+            # graph.edges += ref_model.edges
+            # graph.edges.append(Edge(
+            #     src=uuid_to_vtx[node],
+            #     tgt=ref_model.vtxs[0], # which node to link to?? dirty
+            #     label="modelref"))
+
+        def add_types(node):
+            vtx = uuid_to_vtx[node]
+            type_node, = bottom.read_outgoing_elements(node, "Morphism")
+
+            # Put the type straight into the Vertex-object
+            # The benefit is that our Vertex-matching callback can then be coded cleverly, look at the types first, resulting in better performance
+            vtx.typ = type_node
+
+            # The old approach (creating special vertices containing the types), commented out:
+
+            # print('node', node, 'has type', type_node)
+            # We create a Vertex storing the type
+            # type_vertex = Vertex(value=IS_TYPE(type_node))
+            # graph.vtxs.append(type_vertex)
+            # type_edge = Edge(
+            #     src=uuid_to_vtx[node],
+            #     tgt=type_vertex,
+            #     label="type")
+            # # print(type_edge)
+            # graph.edges.append(type_edge)
+
+        # Add typing information for:
+        #   - classes
+        #   - attributes
+        #   - associations
+        for class_name, class_node in scd_mm.get_classes().items():
+            objects = scd.get_typed_by(class_node)
+            # print("typed by:", class_name, objects)
+            for obj_name, obj_node in objects.items():
+                if _filter(obj_node):
+                    add_types(obj_node)
+            for attr_name, attr_node in scd_mm.get_attributes(class_name).items():
+                attrs = scd.get_typed_by(attr_node)
+                for slot_name, slot_node in attrs.items():
+                    if _filter(slot_node):
+                        add_types(slot_node)
+        for assoc_name, assoc_node in scd_mm.get_associations().items():
+            objects = scd.get_typed_by(assoc_node)
+            # print("typed by:", assoc_name, objects)
+            for link_name, link_node in objects.items():
+                if _filter(link_node):
+                    add_types(link_node)
+
+        return names, graph
+
+# This function returns a Generator of matches.
+# The idea is that the user can iterate over the match set, lazily generating it: if only interested in the first match, the entire match set doesn't have to be generated.
+def match_od(state, host_m, host_mm, pattern_m, pattern_mm, pivot={}):
+    bottom = Bottom(state)
+
+    # compute subtype relations and such:
+    cdapi = CDAPI(state, host_mm)
+    odapi = ODAPI(state, host_m, host_mm)
+    pattern_odapi = ODAPI(state, pattern_m, pattern_mm)
+    pattern_mm_odapi = ODAPI(state, pattern_mm, cdapi.mm)
+
+    # Function object for pattern matching. Decides whether to match host and guest vertices, where guest is a RAMified instance (e.g., the attributes are all strings with Python expressions), and the host is an instance (=object diagram) of the original model (=class diagram)
+    class RAMCompare:
+        def __init__(self, bottom, host_od):
+            self.bottom = bottom
+            self.host_od = host_od
+
+            type_model_id = bottom.state.read_dict(bottom.state.read_root(), "SCD")
+            self.scd_model = UUID(bottom.state.read_value(type_model_id))
+
+            # constraints need to be checked at the very end, after a complete match is established, because constraint code may refer to matched elements by their name
+            self.conditions_to_check = {}
+
+        def match_types(self, g_vtx_type, h_vtx_type):
+            # types only match with their supertypes
+            # we assume that 'RAMifies'-traceability links have been created between guest and host types
+            try:
+                g_vtx_unramified_type = ramify.get_original_type(self.bottom, g_vtx_type)
+            except:
+                return False
+
+            try:
+                host_type_name = cdapi.type_model_names[h_vtx_type]
+                guest_type_name_unramified = cdapi.type_model_names[g_vtx_unramified_type]
+            except KeyError:
+                return False
+
+            return cdapi.is_subtype(
+                super_type_name=guest_type_name_unramified,
+                sub_type_name=host_type_name)
+
+        # Memoizing the result of comparison gives a huge performance boost!
+        # Especially `is_subtype_of` is very slow, and will be performed many times over on the same pair of nodes during the matching process.
+        # Assuming the model is not altered *during* matching, this is safe.
+        @functools.cache
+        def __call__(self, g_vtx, h_vtx):
+            # First check if the types match (if we have type-information)
+            if hasattr(g_vtx, 'typ'):
+                if not hasattr(h_vtx, 'typ'):
+                    # if guest has a type, host must have a type
+                    return False
+                return self.match_types(g_vtx.typ, h_vtx.typ)
+
+            if hasattr(g_vtx, 'modelref'):
+                if not hasattr(h_vtx, 'modelref'):
+                    return False
+
+                python_code = services_od.read_primitive_value(self.bottom, g_vtx.node_id, pattern_mm)[0]
+
+                self.conditions_to_check[g_vtx.name] = python_code
+                # self.conditions_to_check.append((python_code, h_vtx.name, g_vtx.name))
+
+                return True # do be determined later, if it's actually a match
+
+            if g_vtx.value == None:
+                return h_vtx.value == None
+
+            # mvs-edges (which are converted to vertices) only match with mvs-edges
+            if g_vtx.value == IS_EDGE:
+                return h_vtx.value == IS_EDGE
+
+            if h_vtx.value == IS_EDGE:
+                return False
+
+            if g_vtx.value == IS_MODELREF:
+                return h_vtx.value == IS_MODELREF
+
+            if h_vtx.value == IS_MODELREF:
+                return False
+
+            return True
+
+    # Convert to format understood by matching algorithm
+    h_names, host = model_to_graph(state, host_m, host_mm)
+
+    # Only match matchable pattern elements
+    # E.g., the 'condition'-attribute that is added to every class, cannot be matched with anything
+    def is_matchable(pattern_el):
+        pattern_el_name = pattern_odapi.get_name(pattern_el)
+        if pattern_odapi.get_type_name(pattern_el) == "GlobalCondition":
+            return False
+        # Super-cheap and unreliable way of filtering out the 'condition'-attribute, added to every class:
+        return not (pattern_el_name.endswith("condition")
+            # as an extra safety measure, if the user defined her own 'condition' attribute, RAMification turned this into 'RAM_condition', and we can detect this
+            # of course this breaks if the class name already ended with 'RAM', but let's hope that never happens
+            # also, we are assuming the default "RAM_" prefix is used, but the user can change this...
+            and not pattern_el_name.endswith("RAM_condition"))
+
+    g_names, guest = model_to_graph(state, pattern_m, pattern_mm,
+        _filter=is_matchable)
+
+    graph_pivot = {
+        g_names[guest_name] : h_names[host_name]
+            for guest_name, host_name in pivot.items()
+                if guest_name in g_names
+    }
+
+    obj_conditions = []
+    for class_name, class_node in pattern_mm_odapi.get_all_instances("Class"):
+        for obj_name, obj_node in pattern_odapi.get_all_instances(class_name):
+            python_code = pattern_odapi.get_slot_value_default(obj_node, "condition", 'True')
+            if class_name == "GlobalCondition":
+                obj_conditions.append((python_code, None))
+            else:
+                obj_conditions.append((python_code, obj_name))
+
+
+    def check_conditions(name_mapping):
+        def check(python_code: str, loc):
+            return exec_then_eval(python_code,
+                _globals={
+                    **bind_api_readonly(odapi),
+                    'matched': lambda name: bottom.read_outgoing_elements(host_m, name_mapping[name])[0],
+                },
+                _locals=loc)
+
+        # Attribute conditions
+        for pattern_name, host_name in name_mapping.items():
+            try:
+                python_code = compare.conditions_to_check[pattern_name]
+            except KeyError:
+                continue
+            host_node = odapi.get(host_name)
+            if not check(python_code, {'this': host_node}):
+                return False
+
+        for python_code, pattern_el_name in obj_conditions:
+            if pattern_el_name == None:
+                # GlobalCondition
+                if not check(python_code, {}):
+                    return False
+            else:
+                # object-lvl condition
+                host_el_name = name_mapping[pattern_el_name]
+                host_node = odapi.get(host_el_name)
+                if not check(python_code, {'this': host_node}):
+                    return False
+        return True
+
+
+    compare = RAMCompare(bottom, services_od.OD(host_mm, host_m, state))
+    matcher = MatcherVF2(host, guest, compare)
+    for m in matcher.match(graph_pivot):
+        # Convert mapping
+        name_mapping = {}
+        for guest_vtx, host_vtx in m.mapping_vtxs.items():
+            if isinstance(guest_vtx, NamedNode) and isinstance(host_vtx, NamedNode):
+                name_mapping[guest_vtx.name] = host_vtx.name
+
+        if not check_conditions(name_mapping):
+            continue # not a match after all...
+
+        yield name_mapping