(Re)Move some files

This commit is contained in:
Joeri Exelmans 2024-10-03 17:19:25 +02:00
parent 175edb64d9
commit 9faa5770a8
10 changed files with 4 additions and 1419 deletions

View file

@ -0,0 +1,349 @@
# This module contains a VF2-inspired graph matching algorithm
# Author: Joeri Exelmans
import itertools
from util.timer import Timer
# like finding the 'strongly connected componenets', but edges are navigable in any direction
def find_connected_components(graph):
next_component = 0
vtx_to_component = {}
component_to_vtxs = []
for vtx in graph.vtxs:
if vtx in vtx_to_component:
continue
vtx_to_component[vtx] = next_component
vtxs = []
component_to_vtxs.append(vtxs)
add_recursively(vtx, vtxs, vtx_to_component, next_component)
next_component += 1
return (vtx_to_component, component_to_vtxs)
def add_recursively(vtx, vtxs: list, d: dict, component: int, already_visited: set = set()):
if vtx in already_visited:
return
already_visited.add(vtx)
vtxs.append(vtx)
d[vtx] = component
for edge in vtx.outgoing:
add_recursively(edge.tgt, vtxs, d, component, already_visited)
for edge in vtx.incoming:
add_recursively(edge.src, vtxs, d, component, already_visited)
class Graph:
def __init__(self):
self.vtxs = []
self.edges = []
class Vertex:
def __init__(self, value):
self.incoming = []
self.outgoing = []
self.value = value
def __repr__(self):
return f"V({self.value})"
class Edge:
def __init__(self, src: Vertex, tgt: Vertex, label=None):
self.src = src
self.tgt = tgt
self.label = label
# Add ourselves to src/tgt vertices
self.src.outgoing.append(self)
self.tgt.incoming.append(self)
def __repr__(self):
if self.label != None:
return f"({self.src}--{self.label}->{self.tgt})"
else:
return f"({self.src}->{self.tgt})"
class MatcherState:
def __init__(self):
self.mapping_vtxs = {} # guest -> host
self.mapping_edges = {} # guest -> host
self.r_mapping_vtxs = {} # host -> guest
self.r_mapping_edges = {} # host -> guest
self.h_unmatched_vtxs = []
self.g_unmatched_vtxs = []
# boundary is the most recently added (to the mapping) pair of (guest -> host) vertices
self.boundary = None
@staticmethod
def make_initial(host, guest):
state = MatcherState()
state.h_unmatched_vtxs = host.vtxs
state.g_unmatched_vtxs = guest.vtxs
return state
# Grow the match set (creating a new copy)
def grow_edge(self, host_edge, guest_edge):
new_state = MatcherState()
new_state.mapping_vtxs = self.mapping_vtxs
new_state.mapping_edges = dict(self.mapping_edges)
new_state.mapping_edges[guest_edge] = host_edge
new_state.r_mapping_vtxs = self.r_mapping_vtxs
new_state.r_mapping_edges = dict(self.r_mapping_edges)
new_state.r_mapping_edges[host_edge] = guest_edge
new_state.h_unmatched_vtxs = self.h_unmatched_vtxs
new_state.g_unmatched_vtxs = self.g_unmatched_vtxs
return new_state
# Grow the match set (creating a new copy)
def grow_vtx(self, host_vtx, guest_vtx):
new_state = MatcherState()
new_state.mapping_vtxs = dict(self.mapping_vtxs)
new_state.mapping_vtxs[guest_vtx] = host_vtx
new_state.mapping_edges = self.mapping_edges
new_state.r_mapping_vtxs = dict(self.r_mapping_vtxs)
new_state.r_mapping_vtxs[host_vtx] = guest_vtx
new_state.r_mapping_edges = self.r_mapping_edges
new_state.h_unmatched_vtxs = [h_vtx for h_vtx in self.h_unmatched_vtxs if h_vtx != host_vtx]
new_state.g_unmatched_vtxs = [g_vtx for g_vtx in self.g_unmatched_vtxs if g_vtx != guest_vtx]
new_state.boundary = (guest_vtx, host_vtx)
return new_state
def make_hashable(self):
return frozenset(itertools.chain(
((gv,hv) for gv,hv in self.mapping_vtxs.items()),
((ge,he) for ge,he in self.mapping_edges.items()),
))
def __repr__(self):
# return self.make_hashable().__repr__()
return "VTXS: "+self.mapping_vtxs.__repr__()+"\nEDGES: "+self.mapping_edges.__repr__()
class MatcherVF2:
# Guest is the pattern
def __init__(self, host, guest, compare_fn):
self.host = host
self.guest = guest
self.compare_fn = compare_fn
# with Timer("find_connected_components - guest"):
self.guest_vtx_to_component, self.guest_component_to_vtxs = find_connected_components(guest)
# print("number of guest connected components:", len(self.guest_component_to_vtxs))
def match(self):
yield from self._match(
state=MatcherState.make_initial(self.host, self.guest),
already_visited=set())
def _match(self, state, already_visited, indent=0):
# input()
def print_debug(*args):
pass
# print(" "*indent, *args) # uncomment to see a trace of the matching process
print_debug("match")
# Keep track of the states in the search space that we already visited
hashable = state.make_hashable()
if hashable in already_visited:
print_debug(" SKIP - ALREADY VISITED")
# print_debug(" ", hashable)
return
# print_debug(" ", [hash(a) for a in already_visited])
# print_debug(" ADD STATE")
# print_debug(" ", hash(hashable))
already_visited.add(hashable)
if len(state.mapping_vtxs) == len(self.guest.vtxs) and len(state.mapping_edges) == len(self.guest.edges):
print_debug("GOT MATCH:")
print_debug(" ", state.mapping_vtxs)
print_debug(" ", state.mapping_edges)
yield state
return
def read_edge(edge, direction):
if direction == "outgoing":
return edge.tgt
elif direction == "incoming":
return edge.src
else:
raise Exception("wtf!")
def attempt_grow(direction, indent):
for g_matched_vtx, h_matched_vtx in state.mapping_vtxs.items():
print_debug('attempt_grow', direction)
for g_candidate_edge in getattr(g_matched_vtx, direction):
print_debug('g_candidate_edge:', g_candidate_edge)
g_candidate_vtx = read_edge(g_candidate_edge, direction)
# g_to_skip_vtxs.add(g_candidate_vtx)
if g_candidate_edge in state.mapping_edges:
print_debug(" skip, guest edge already matched")
continue # skip already matched guest edge
for h_candidate_edge in getattr(h_matched_vtx, direction):
if g_candidate_edge.label != h_candidate_edge.label:
print_debug(" labels differ")
continue
print_debug('h_candidate_edge:', h_candidate_edge)
if h_candidate_edge in state.r_mapping_edges:
print_debug(" skip, host edge already matched")
continue # skip already matched host edge
print_debug('grow edge', g_candidate_edge, ':', h_candidate_edge, id(g_candidate_edge), id(h_candidate_edge))
new_state = state.grow_edge(h_candidate_edge, g_candidate_edge)
h_candidate_vtx = read_edge(h_candidate_edge, direction)
yield from attempt_match_vtxs(
new_state,
g_candidate_vtx,
h_candidate_vtx,
indent+1)
print_debug('backtrack edge', g_candidate_edge, ':', h_candidate_edge, id(g_candidate_edge), id(h_candidate_edge))
def attempt_match_vtxs(state, g_candidate_vtx, h_candidate_vtx, indent):
print_debug('attempt_match_vtxs')
if g_candidate_vtx in state.mapping_vtxs:
if state.mapping_vtxs[g_candidate_vtx] != h_candidate_vtx:
print_debug(" nope, guest already mapped (mismatch)")
return # guest vtx is already mapped but doesn't match host vtx
if h_candidate_vtx in state.r_mapping_vtxs:
if state.r_mapping_vtxs[h_candidate_vtx] != g_candidate_vtx:
print_debug(" nope, host already mapped (mismatch)")
return # host vtx is already mapped but doesn't match guest vtx
g_outdegree = len(g_candidate_vtx.outgoing)
h_outdegree = len(h_candidate_vtx.outgoing)
if g_outdegree > h_outdegree:
print_debug(" nope, outdegree")
return
g_indegree = len(g_candidate_vtx.incoming)
h_indegree = len(h_candidate_vtx.incoming)
if g_indegree > h_indegree:
print_debug(" nope, indegree")
return
if not self.compare_fn(g_candidate_vtx, h_candidate_vtx):
print_debug(" nope, bad compare")
return
new_state = state.grow_vtx(
h_candidate_vtx,
g_candidate_vtx)
print_debug('grow vtx', g_candidate_vtx, ':', h_candidate_vtx, id(g_candidate_vtx), id(h_candidate_vtx))
yield from self._match(new_state, already_visited, indent+1)
print_debug('backtrack vtx', g_candidate_vtx, ':', h_candidate_vtx, id(g_candidate_vtx), id(h_candidate_vtx))
print_debug('preferred...')
yield from attempt_grow('outgoing', indent+1)
yield from attempt_grow('incoming', indent+1)
print_debug('least preferred...')
if state.boundary != None:
g_boundary_vtx, _ = state.boundary
guest_boundary_component = self.guest_vtx_to_component[g_boundary_vtx]
# only try guest vertices that are in a different component (all vertices in the same component are already discovered via 'attempt_grow')
guest_components_to_try = (c for i,c in enumerate(self.guest_component_to_vtxs) if i != guest_boundary_component)
# for the host vertices however, we have to try them from all components, because different connected components of our pattern (=guest) could be mapped onto the same connected component in the host
else:
guest_components_to_try = self.guest_component_to_vtxs
for g_candidate_vtxs in guest_components_to_try:
for g_candidate_vtx in g_candidate_vtxs:
if g_candidate_vtx in state.mapping_vtxs:
print_debug("skip (already matched)", g_candidate_vtx)
continue
for h_candidate_vtx in state.h_unmatched_vtxs:
yield from attempt_match_vtxs(state, g_candidate_vtx, h_candidate_vtx, indent+1)
if indent == 0:
print_debug('visited', len(already_visited), 'states total')
# demo time...
if __name__ == "__main__":
host = Graph()
host.vtxs = [Vertex(0), Vertex(1), Vertex(2), Vertex(3)]
host.edges = [
Edge(host.vtxs[0], host.vtxs[1]),
Edge(host.vtxs[1], host.vtxs[2]),
Edge(host.vtxs[2], host.vtxs[0]),
Edge(host.vtxs[2], host.vtxs[3]),
Edge(host.vtxs[3], host.vtxs[2]),
]
guest = Graph()
guest.vtxs = [
Vertex('v != 3'), # cannot be matched with Vertex(3) - changing this to True, you get 2 morphisms instead of one
Vertex('True')] # can be matched with any node
guest.edges = [
# Look for a simple loop:
Edge(guest.vtxs[0], guest.vtxs[1]),
# Edge(guest.vtxs[1], guest.vtxs[0]),
]
m = MatcherVF2(host, guest, lambda g_vtx, h_vtx: eval(g_vtx.value, {}, {'v':h_vtx.value}))
import time
durations = 0
iterations = 1
print("Patience...")
for n in range(iterations):
time_start = time.perf_counter_ns()
matches = [mm for mm in m.match()]
time_end = time.perf_counter_ns()
time_duration = time_end - time_start
durations += time_duration
print(f'{iterations} iterations, took {durations/1000000:.3f} ms, {durations/iterations/1000000:.3f} ms per iteration')
print("found", len(matches), "matches")
for mm in matches:
print("match:")
print(" ", mm.mapping_vtxs)
print(" ", mm.mapping_edges)
print("######################")
host = Graph()
host.vtxs = [
Vertex('pony'), # 1
Vertex('pony'), # 3
Vertex('bear'),
Vertex('bear'),
]
host.edges = [
# match:
Edge(host.vtxs[0], host.vtxs[1]),
Edge(host.vtxs[1], host.vtxs[0]),
]
guest = Graph()
guest.vtxs = [
Vertex('pony'), # 0
Vertex('pony'), # 1
Vertex('bear')]
guest.edges = [
Edge(guest.vtxs[0], guest.vtxs[1]),
Edge(guest.vtxs[1], guest.vtxs[0]),
]
m = MatcherVF2(host, guest, lambda g_vtx, h_vtx: g_vtx.value == h_vtx.value)
import time
durations = 0
iterations = 1
print("Patience...")
for n in range(iterations):
time_start = time.perf_counter_ns()
matches = [mm for mm in m.match()]
time_end = time.perf_counter_ns()
time_duration = time_end - time_start
durations += time_duration
print(f'{iterations} iterations, took {durations/1000000:.3f} ms, {durations/iterations/1000000:.3f} ms per iteration')
print("found", len(matches), "matches")
for mm in matches:
print("match:")
print(" ", mm.mapping_vtxs)
print(" ", mm.mapping_edges)

View file

@ -0,0 +1,313 @@
from state.base import State
from uuid import UUID
from services.bottom.V0 import Bottom
from services.scd import SCD
from services.od import OD
from transformation.matcher.matcher import Graph, Edge, Vertex, MatcherVF2
from transformation import ramify
import itertools
import re
import functools
from util.timer import Timer
from services.primitives.integer_type import Integer
class _is_edge:
def __repr__(self):
return "EDGE"
def to_json(self):
return "EDGE"
# just a unique symbol that is only equal to itself
IS_EDGE = _is_edge()
class _is_modelref:
def __repr__(self):
return "REF"
def to_json(self):
return "REF"
IS_MODELREF = _is_modelref()
# class IS_TYPE:
# def __init__(self, type):
# # mvs-node of the type
# self.type = type
# def __repr__(self):
# return f"TYPE({str(self.type)[-4:]})"
class NamedNode(Vertex):
def __init__(self, value, name):
super().__init__(value)
# the name of the node in the context of the model
# the matcher by default ignores this value
self.name = name
# MVS-nodes become vertices
class MVSNode(NamedNode):
def __init__(self, value, node_id, name):
super().__init__(value, name)
# useful for debugging
self.node_id = node_id
def __repr__(self):
if self.value == None:
return f"N({self.name})"
if isinstance(self.value, str):
return f"N({self.name}=\"{self.value}\")"
return f"N({self.name}={self.value})"
# if isinstance(self.value, str):
# return f"N({self.name}=\"{self.value}\",{str(self.node_id)[-4:]})"
# return f"N({self.name}={self.value},{str(self.node_id)[-4:]})"
# MVS-edges become vertices.
class MVSEdge(NamedNode):
def __init__(self, node_id, name):
super().__init__(IS_EDGE, name)
# useful for debugging
self.node_id = node_id
def __repr__(self):
return f"E({self.name})"
# return f"E({self.name}{str(self.node_id)[-4:]})"
# dirty way of detecting whether a node is a ModelRef
UUID_REGEX = re.compile(r"[0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z]-[0-9a-z][0-9a-z][0-9a-z][0-9a-z]-[0-9a-z][0-9a-z][0-9a-z][0-9a-z]-[0-9a-z][0-9a-z][0-9a-z][0-9a-z]-[0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z][0-9a-z]")
# Converts an object diagram in MVS state to the pattern matcher graph type
# ModelRefs are flattened
def model_to_graph(state: State, model: UUID, metamodel: UUID, prefix=""):
# with Timer("model_to_graph"):
od = OD(model, metamodel, state)
scd = SCD(model, state)
scd_mm = SCD(metamodel, state)
bottom = Bottom(state)
graph = Graph()
mvs_edges = []
modelrefs = {}
# constraints = {}
def to_vtx(el, name):
# print("name:", name)
if bottom.is_edge(el):
# if filter_constraint:
# try:
# supposed_obj = bottom.read_edge_source(el)
# slot_node = od.get_slot(supposed_obj, "constraint")
# if el == slot_node:
# # `el` is the constraint-slot
# constraints[supposed_obj] = el
# return
# except:
# pass
mvs_edges.append(el)
return MVSEdge(el, name)
# If the value of the el is a ModelRef (only way to detect this is to match a regex - not very clean), then extract it. We'll create a link to the referred model later.
value = bottom.read_value(el)
if isinstance(value, str):
if UUID_REGEX.match(value) != None:
# side-effect
modelrefs[el] = (UUID(value), name)
return MVSNode(IS_MODELREF, el, name)
return MVSNode(value, el, name)
# MVS-Nodes become vertices
uuid_to_vtx = { node: to_vtx(node, prefix+key) for key in bottom.read_keys(model) for node in bottom.read_outgoing_elements(model, key) }
graph.vtxs = [ vtx for vtx in uuid_to_vtx.values() ]
# For every MSV-Edge, two edges are created (for src and tgt)
for mvs_edge in mvs_edges:
mvs_src = bottom.read_edge_source(mvs_edge)
if mvs_src in uuid_to_vtx:
graph.edges.append(Edge(
src=uuid_to_vtx[mvs_src],
tgt=uuid_to_vtx[mvs_edge],
label="outgoing"))
mvs_tgt = bottom.read_edge_target(mvs_edge)
if mvs_tgt in uuid_to_vtx:
graph.edges.append(Edge(
src=uuid_to_vtx[mvs_edge],
tgt=uuid_to_vtx[mvs_tgt],
label="tgt"))
for node, (ref_m, name) in modelrefs.items():
vtx = uuid_to_vtx[node]
# Get MM of ref'ed model
ref_mm, = bottom.read_outgoing_elements(node, "Morphism")
# print("modelref type node:", type_node)
# Recursively convert ref'ed model to graph
# ref_graph = model_to_graph(state, ref_m, ref_mm, prefix=name+'/')
vtx.modelref = (ref_m, ref_mm)
# We no longer flatten:
# # Flatten and create link to ref'ed model
# graph.vtxs += ref_model.vtxs
# graph.edges += ref_model.edges
# graph.edges.append(Edge(
# src=uuid_to_vtx[node],
# tgt=ref_model.vtxs[0], # which node to link to?? dirty
# label="modelref"))
def add_types(node):
vtx = uuid_to_vtx[node]
type_node, = bottom.read_outgoing_elements(node, "Morphism")
# Put the type straight into the Vertex-object
# The benefit is that our Vertex-matching callback can then be coded cleverly, look at the types first, resulting in better performance
vtx.typ = type_node
# The old approach (creating special vertices containing the types), commented out:
# print('node', node, 'has type', type_node)
# We create a Vertex storing the type
# type_vertex = Vertex(value=IS_TYPE(type_node))
# graph.vtxs.append(type_vertex)
# type_edge = Edge(
# src=uuid_to_vtx[node],
# tgt=type_vertex,
# label="type")
# # print(type_edge)
# graph.edges.append(type_edge)
# Add typing information for:
# - classes
# - attributes
# - associations
for class_name, class_node in scd_mm.get_classes().items():
objects = scd.get_typed_by(class_node)
# print("typed by:", class_name, objects)
for obj_name, obj_node in objects.items():
add_types(obj_node)
for attr_name, attr_node in scd_mm.get_attributes(class_name).items():
attrs = scd.get_typed_by(attr_node)
for slot_name, slot_node in attrs.items():
add_types(slot_node)
for assoc_name, assoc_node in scd_mm.get_associations().items():
objects = scd.get_typed_by(assoc_node)
# print("typed by:", assoc_name, objects)
for link_name, link_node in objects.items():
add_types(link_node)
return graph
def match_od(state, host_m, host_mm, pattern_m, pattern_mm):
# Function object for pattern matching. Decides whether to match host and guest vertices, where guest is a RAMified instance (e.g., the attributes are all strings with Python expressions), and the host is an instance (=object diagram) of the original model (=class diagram)
class RAMCompare:
def __init__(self, bottom, host_od):
self.bottom = bottom
self.host_od = host_od
type_model_id = bottom.state.read_dict(bottom.state.read_root(), "SCD")
self.scd_model = UUID(bottom.state.read_value(type_model_id))
def is_subtype_of(self, supposed_subtype: UUID, supposed_supertype: UUID):
if supposed_subtype == supposed_supertype:
# reflexive:
return True
inheritance_node, = self.bottom.read_outgoing_elements(self.scd_model, "Inheritance")
for outgoing in self.bottom.read_outgoing_edges(supposed_subtype):
if inheritance_node in self.bottom.read_outgoing_elements(outgoing, "Morphism"):
# 'outgoing' is an inheritance link
supertype = self.bottom.read_edge_target(outgoing)
if supertype != supposed_subtype:
if self.is_subtype_of(supertype, supposed_supertype):
return True
return False
def match_types(self, g_vtx_type, h_vtx_type):
# types only match with their supertypes
# we assume that 'RAMifies'-traceability links have been created between guest and host types
try:
g_vtx_original_type = ramify.get_original_type(self.bottom, g_vtx_type)
except:
return False
return self.is_subtype_of(h_vtx_type, g_vtx_original_type)
# Memoizing the result of comparison gives a huge performance boost!
# Especially `is_subtype_of` is very slow, and will be performed many times over on the same pair of nodes during the matching process.
# Assuming the model is not altered *during* matching, this is safe.
@functools.cache
def __call__(self, g_vtx, h_vtx):
# First check if the types match (if we have type-information)
if hasattr(g_vtx, 'typ'):
if not hasattr(h_vtx, 'typ'):
# if guest has a type, host must have a type
return False
return self.match_types(g_vtx.typ, h_vtx.typ)
if hasattr(g_vtx, 'modelref'):
if not hasattr(h_vtx, 'modelref'):
return False
g_ref_m, g_ref_mm = g_vtx.modelref
h_ref_m, h_ref_mm = h_vtx.modelref
nested_matches = [m for m in match_od(state, h_ref_m, h_ref_mm, g_ref_m, g_ref_mm)]
# print('nested_matches:', nested_matches)
if len(nested_matches) == 0:
return False
elif len(nested_matches) == 1:
return True
else:
raise Exception("We have a problem: there is more than 1 match in the nested models.")
# Then, match by value
if g_vtx.value == None:
return h_vtx.value == None
# mvs-edges (which are converted to vertices) only match with mvs-edges
if g_vtx.value == IS_EDGE:
return h_vtx.value == IS_EDGE
if h_vtx.value == IS_EDGE:
return False
if g_vtx.value == IS_MODELREF:
return h_vtx.value == IS_MODELREF
if h_vtx.value == IS_MODELREF:
return False
# # print(g_vtx.value, h_vtx.value)
# def get_slot(h_vtx, slot_name: str):
# slot_node = self.host_od.get_slot(h_vtx.node_id, slot_name)
# return slot_node
# def read_int(slot: UUID):
# i = Integer(slot, self.bottom.state)
# return i.read()
try:
return eval(g_vtx.value, {}, {
'v': h_vtx.value,
# 'get_slot': functools.partial(get_slot, h_vtx),
# 'read_int': read_int,
})
except Exception as e:
return False
# Convert to format understood by matching algorithm
host = model_to_graph(state, host_m, host_mm)
guest = model_to_graph(state, pattern_m, pattern_mm)
matcher = MatcherVF2(host, guest, RAMCompare(Bottom(state), OD(host_mm, host_m, state)))
for m in matcher.match():
# print("\nMATCH:\n", m)
# Convert mapping
name_mapping = {}
for guest_vtx, host_vtx in m.mapping_vtxs.items():
if isinstance(guest_vtx, NamedNode) and isinstance(host_vtx, NamedNode):
name_mapping[guest_vtx.name] = host_vtx.name
yield name_mapping