RAMification + pattern matching: put typing information straight into the Vertices, as a Python attribute (don't put it in separate Vertices+Edges).

This commit is contained in:
Joeri Exelmans 2024-09-10 13:18:14 +02:00
parent 700a4d103f
commit ae5eaedb4b
8 changed files with 284 additions and 129 deletions

View file

@ -29,7 +29,7 @@ def run_benchmark(jhost, jguest, shost, sguest, expected=None):
# benchmark Joeri
m = j.MatcherVF2(host, guest,
lambda g_val, h_val: g_val == h_val) # all vertices can be matched
lambda g_vtx, h_vtx: g_vtx.value == h_vtx.value) # all vertices can be matched
iterations = 50
print(" Patience (joeri)...")
for n in range(iterations):

View file

@ -57,9 +57,9 @@ class Edge:
def __repr__(self):
if self.label != None:
return f"E({self.src}--{self.label}->{self.tgt})"
return f"({self.src}--{self.label}->{self.tgt})"
else:
return f"E({self.src}->{self.tgt})"
return f"({self.src}->{self.tgt})"
class MatcherState:
def __init__(self):
@ -133,12 +133,9 @@ class MatcherVF2:
self.guest = guest
self.compare_fn = compare_fn
with Timer("find_connected_components - host"):
self.host_vtx_to_component, self.host_component_to_vtxs = find_connected_components(host)
with Timer("find_connected_components - guest"):
self.guest_vtx_to_component, self.guest_component_to_vtxs = find_connected_components(guest)
print("number of host connected components:", len(self.host_component_to_vtxs))
print("number of guest connected components:", len(self.guest_component_to_vtxs))
def match(self):
@ -201,9 +198,9 @@ class MatcherVF2:
if h_candidate_edge in state.r_mapping_edges:
print_debug(" skip, host edge already matched")
continue # skip already matched host edge
h_candidate_vtx = read_edge(h_candidate_edge, direction)
print_debug('grow edge', g_candidate_edge, ':', h_candidate_edge, id(g_candidate_edge), id(h_candidate_edge))
new_state = state.grow_edge(h_candidate_edge, g_candidate_edge)
h_candidate_vtx = read_edge(h_candidate_edge, direction)
yield from attempt_match_vtxs(
new_state,
g_candidate_vtx,
@ -231,7 +228,7 @@ class MatcherVF2:
if g_indegree > h_indegree:
print_debug(" nope, indegree")
return
if not self.compare_fn(g_candidate_vtx.value, h_candidate_vtx.value):
if not self.compare_fn(g_candidate_vtx, h_candidate_vtx):
print_debug(" nope, bad compare")
return
new_state = state.grow_vtx(
@ -288,7 +285,7 @@ if __name__ == "__main__":
# Edge(guest.vtxs[1], guest.vtxs[0]),
]
m = MatcherVF2(host, guest, lambda g_val, h_val: eval(g_val, {}, {'v':h_val}))
m = MatcherVF2(host, guest, lambda g_vtx, h_vtx: eval(g_vtx.value, {}, {'v':h_vtx.value}))
import time
durations = 0
iterations = 1
@ -332,7 +329,7 @@ if __name__ == "__main__":
Edge(guest.vtxs[1], guest.vtxs[0]),
]
m = MatcherVF2(host, guest, lambda g_val, h_val: g_val == h_val)
m = MatcherVF2(host, guest, lambda g_vtx, h_vtx: g_vtx.value == h_vtx.value)
import time
durations = 0
iterations = 1

View file

@ -2,63 +2,117 @@ from state.base import State
from uuid import UUID
from services.bottom.V0 import Bottom
from services.scd import SCD
from services.od import OD
from pattern_matching.matcher import Graph, Edge, Vertex
import itertools
import re
import functools
from util.timer import Timer
from services.primitives.integer_type import Integer
class _is_edge:
def __repr__(self):
return "EDGE"
def to_json(self):
return "EDGE"
# just a unique symbol that is only equal to itself
IS_EDGE = _is_edge()
class _is_modelref:
def __repr__(self):
return "REF"
def to_json(self):
return "REF"
IS_MODELREF = _is_modelref()
class IS_TYPE:
def __init__(self, type):
# mvs-node of the type
self.type = type
def __repr__(self):
return f"TYPE({str(self.type)[-4:]})"
# def __eq__(self, other):
# if not isinstance(other, IS_TYPE):
# return False
# return other.type == self.type
class NamedNode(Vertex):
def __init__(self, value, name):
super().__init__(value)
# the name of the node in the context of the model
# the matcher by default ignores this value
self.name = name
# def __hash__(self):
# return self.type.__hash__()
# MVS-nodes become vertices
class MVSNode(NamedNode):
def __init__(self, value, node_id, name):
super().__init__(value, name)
# useful for debugging
self.node_id = node_id
def __repr__(self):
if self.value == None:
return f"N({self.name})"
if isinstance(self.value, str):
return f"N({self.name}=\"{self.value}\")"
return f"N({self.name}={self.value})"
# if isinstance(self.value, str):
# return f"N({self.name}=\"{self.value}\",{str(self.node_id)[-4:]})"
# return f"N({self.name}={self.value},{str(self.node_id)[-4:]})"
# MVS-edges become vertices.
class MVSEdge(NamedNode):
def __init__(self, node_id, name):
super().__init__(IS_EDGE, name)
# useful for debugging
self.node_id = node_id
def __repr__(self):
return f"E({self.name})"
# return f"E({self.name}{str(self.node_id)[-4:]})"
# dirty way of detecting whether a node is a ModelRef
UUID_REGEX = re.compile(r"[0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z]-[0-9a-z][0-9a-z][0-9a-z][0-9a-z]-[0-9a-z][0-9a-z][0-9a-z][0-9a-z]-[0-9a-z][0-9a-z][0-9a-z][0-9a-z]-[0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z]")
# Converts an object/class diagram in MVS state to the pattern matcher graph type
# ModelRefs are flattened
def model_to_graph(state: State, model: UUID):
def model_to_graph(state: State, model: UUID, metamodel: UUID):
with Timer("model_to_graph"):
od = OD(model, metamodel, state)
scd = SCD(model, state)
scd_mm = SCD(metamodel, state)
bottom = Bottom(state)
graph = Graph()
mvs_edges = []
modelrefs = {}
def extract_modelref(el):
value = bottom.read_value(el)
# If the value of the el is a ModelRef (only way to detect this is to match a regex - not very clean), then extract it. We'll create a link to the referred model later.
# constraints = {}
def to_vtx(el, name):
# print("name:", name)
if bottom.is_edge(el):
# if filter_constraint:
# try:
# supposed_obj = bottom.read_edge_source(el)
# slot_node = od.get_slot(supposed_obj, "constraint")
# if el == slot_node:
# # `el` is the constraint-slot
# constraints[supposed_obj] = el
# return
# except:
# pass
mvs_edges.append(el)
return IS_EDGE
return MVSEdge(el, name)
# If the value of the el is a ModelRef (only way to detect this is to match a regex - not very clean), then extract it. We'll create a link to the referred model later.
value = bottom.read_value(el)
if isinstance(value, str):
if UUID_REGEX.match(value) != None:
# side-effect
modelrefs[el] = UUID(value)
return None
return value
return MVSNode(IS_MODELREF, el, name)
return MVSNode(value, el, name)
# MVS-Nodes become vertices
uuid_to_vtx = { node: Vertex(value=extract_modelref(node)) for node in bottom.read_outgoing_elements(model) }
uuid_to_vtx = { node: to_vtx(node, key) for key in bottom.read_keys(model) for node in bottom.read_outgoing_elements(model, key) }
uuid_to_vtx = { key: val for key,val in uuid_to_vtx.items() if val != None }
graph.vtxs = [ vtx for vtx in uuid_to_vtx.values() ]
# For every MSV-Edge, two edges are created (for src and tgt)
@ -72,14 +126,18 @@ def model_to_graph(state: State, model: UUID):
mvs_tgt = bottom.read_edge_target(mvs_edge)
if mvs_tgt in uuid_to_vtx:
graph.edges.append(Edge(
src=uuid_to_vtx[mvs_tgt],
tgt=uuid_to_vtx[mvs_edge],
src=uuid_to_vtx[mvs_edge],
tgt=uuid_to_vtx[mvs_tgt],
label="tgt"))
for node, ref in modelrefs.items():
# Get MM of ref'ed model
type_node, = bottom.read_outgoing_elements(node, "Morphism")
print("modelref type node:", type_node)
# Recursively convert ref'ed model to graph
ref_model = model_to_graph(state, ref)
ref_model = model_to_graph(state, ref, type_node)
# Flatten and create link to ref'ed model
graph.vtxs += ref_model.vtxs
@ -91,44 +149,61 @@ def model_to_graph(state: State, model: UUID):
def add_types(node):
type_node, = bottom.read_outgoing_elements(node, "Morphism")
print('node', node, 'has type', type_node)
# Put the type straigt into the Vertex-object
uuid_to_vtx[node].typ = type_node
# We used to put the types in separate nodes, but we no longer do this:
# print('node', node, 'has type', type_node)
# We create a Vertex storing the type
type_vertex = Vertex(value=IS_TYPE(type_node))
graph.vtxs.append(type_vertex)
type_edge = Edge(
src=uuid_to_vtx[node],
tgt=type_vertex,
label="type")
print(type_edge)
graph.edges.append(type_edge)
# type_vertex = Vertex(value=IS_TYPE(type_node))
# graph.vtxs.append(type_vertex)
# type_edge = Edge(
# src=uuid_to_vtx[node],
# tgt=type_vertex,
# label="type")
# # print(type_edge)
# graph.edges.append(type_edge)
# Add typing information of classes, attributes, and associations
scd = SCD(model, state)
for name,node in scd.get_classes().items():
add_types(node)
for attr_name,attr_node in scd.get_attributes(name):
add_types(attr_node)
for _,node in scd.get_associations().items():
add_types(node)
# Add typing information for:
# - classes
# - attributes
# - associations
for class_name, class_node in scd_mm.get_classes().items():
objects = scd.get_typed_by(class_node)
# print("typed by:", class_name, objects)
for obj_name, obj_node in objects.items():
add_types(obj_node)
for attr_name, attr_node in scd_mm.get_attributes(class_name).items():
attrs = scd.get_typed_by(attr_node)
for slot_name, slot_node in attrs.items():
add_types(slot_node)
for assoc_name, assoc_node in scd_mm.get_associations().items():
objects = scd.get_typed_by(assoc_node)
# print("typed by:", assoc_name, objects)
for link_name, link_node in objects.items():
add_types(link_node)
return graph
# Function object for pattern matching. Decides whether to match host and guest vertices, where guest is a RAMified instance (e.g., the attributes are all strings with Python expressions), and the host is an instance (=object diagram) of the original model (=class diagram)
class RAMCompare:
def __init__(self, bottom):
def __init__(self, bottom, host_od):
self.bottom = bottom
self.host_od = host_od
type_model_id = bottom.state.read_dict(bottom.state.read_root(), "SCD")
self.scd_model = UUID(bottom.state.read_value(type_model_id))
def is_subtype_of(self, supposed_subtype: UUID, supposed_supertype: UUID):
inheritance_node, = self.bottom.read_outgoing_elements(self.scd_model, "Inheritance")
if supposed_subtype == supposed_supertype:
# reflexive:
return True
inheritance_node, = self.bottom.read_outgoing_elements(self.scd_model, "Inheritance")
for outgoing in self.bottom.read_outgoing_edges(supposed_subtype):
if inheritance_node in self.bottom.read_outgoing_elements(outgoing, "Morphism"):
# 'outgoing' is an inheritance link
@ -139,32 +214,71 @@ class RAMCompare:
return False
def __call__(self, g_val, h_val):
if g_val == None:
return h_val == None
def has_subtype(self, g_vtx_type, h_vtx_type):
g_vtx_original_types = self.bottom.read_outgoing_elements(g_vtx_type, "RAMifies")
for typ in g_vtx_original_types:
# print(g_vtx, "is ramified")
result = self.is_subtype_of(h_vtx_type, g_vtx_original_types[0])
if result:
return True
else:
# print(g_vtx, "is not ramified")
return False
# Memoizing the result of comparison gives a huge performance boost!
# Especially `is_subtype_of` is very slow, and will be performed many times over on the same pair of nodes during the matching process.
@functools.cache
def __call__(self, g_vtx, h_vtx):
# First check if the types match (if we have type-information)
if hasattr(g_vtx, 'typ'):
if not hasattr(h_vtx, 'typ'):
return False
return self.has_subtype(g_vtx.typ, h_vtx.typ)
# Then, match by value
if g_vtx.value == None:
return h_vtx.value == None
# mvs-edges (which are converted to vertices) only match with mvs-edges
if g_val == IS_EDGE:
return h_val == IS_EDGE
if g_vtx.value == IS_EDGE:
return h_vtx.value == IS_EDGE
if h_val == IS_EDGE:
if h_vtx.value == IS_EDGE:
return False
# types only match with their supertypes
# we assume that 'RAMifies'-traceability links have been created between guest and host types
# we need these links, because the guest types are different types (RAMified)
if isinstance(g_val, IS_TYPE):
if not isinstance(h_val, IS_TYPE):
return False
g_val_original_types = self.bottom.read_outgoing_elements(g_val.type, "RAMifies")
if len(g_val_original_types) > 0:
result = self.is_subtype_of(h_val.type, g_val_original_types[0])
return result
else:
return False
if g_vtx.value == IS_MODELREF:
return h_vtx.value == IS_MODELREF
if isinstance(h_val, IS_TYPE):
if h_vtx.value == IS_MODELREF:
return False
# print(g_val, h_val)
return eval(g_val, {}, {'v': h_val})
# # types only match with their supertypes
# # we assume that 'RAMifies'-traceability links have been created between guest and host types
# # we need these links, because the guest types are different types (RAMified)
# if isinstance(g_vtx.value, IS_TYPE):
# if not isinstance(h_vtx.value, IS_TYPE):
# return False
# return self.has_subtype(g_vtx.value.type, h_vtx.value.type)
# if isinstance(h_vtx.value, IS_TYPE):
# return False
# print(g_vtx.value, h_vtx.value)
def get_slot(h_vtx, slot_name: str):
slot_node = self.host_od.get_slot(h_vtx.node_id, slot_name)
return slot_node
def read_int(slot: UUID):
i = Integer(slot, self.bottom.state)
return i.read()
try:
return eval(g_vtx.value, {}, {
'v': h_vtx.value,
'get_slot': functools.partial(get_slot, h_vtx),
'read_int': read_int,
})
except Exception as e:
return False