528 lines
No EOL
20 KiB
Python
528 lines
No EOL
20 KiB
Python
# coding: utf-8
|
|
|
|
"""
|
|
Author: Sten Vercamman
|
|
Univeristy of Antwerp
|
|
|
|
Example code for paper: Efficient model transformations for novices
|
|
url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen
|
|
|
|
The main goal of this code is to give an overview, and an understandable
|
|
implementation, of known techniques for pattern matching and solving the
|
|
sub-graph homomorphism problem. The presented techniques do not include
|
|
performance adaptations/optimizations. It is not optimized to be efficient
|
|
but rather for the ease of understanding the workings of the algorithms.
|
|
The paper does list some possible extensions/optimizations.
|
|
|
|
It is intended as a guideline, even for novices, and provides an in-depth look
|
|
at the workings behind various techniques for efficient pattern matching.
|
|
"""
|
|
|
|
from searchGraph import *
|
|
from enum import *
|
|
|
|
# Enum for all primitive operation types
|
|
# note: inc represent primitive operation in (as in is a reserved keyword in python)
|
|
PRIM_OP = Enum(['lkp', 'inc', 'out', 'src', 'tgt'])
|
|
|
|
class PlanGraph(object):
|
|
"""
|
|
Holds the PlanGraph for a pattern.
|
|
Can create the search plan of the pattern for a given SearchGraph.
|
|
"""
|
|
def __init__(self, pattern):
|
|
if not isinstance(pattern, Graph):
|
|
raise TypeError('PlanGraph expects the pattern to be a Graph')
|
|
# member variables:
|
|
self.vertices = [] # will not be searched in
|
|
self.edges = [] # will not be searched in
|
|
|
|
# representation map, maps vertex from pattern to element from PlanGraph
|
|
# (no need for edges)
|
|
repr_map = {}
|
|
|
|
# 1.1: for every vertex in the pattern graph,
|
|
# create a vertex representing the pattern element
|
|
for str_type, vertices in pattern.vertices.items():
|
|
for vertex in vertices:
|
|
# we only need to know the type of the vertex
|
|
plan_vertex = Vertex(str_type)
|
|
# and we need to know that is was a vertex
|
|
plan_vertex.is_vertex = True
|
|
# for re-linking the edges, we'll need to map the
|
|
# vertex of the pattern to the plan_vertex
|
|
repr_map[vertex] = plan_vertex
|
|
# save created plan_vertex
|
|
self.vertices.append(plan_vertex)
|
|
# 1.2: for every edge in the pattern graph,
|
|
# create a vertex representing the pattern elemen
|
|
for str_type, edges in pattern.edges.items():
|
|
for edge in edges:
|
|
# we only need to know the type of the edge
|
|
plan_vertex = Vertex(edge.type)
|
|
# and we need to know that is was an edge
|
|
plan_vertex.is_vertex = False
|
|
# save created plan_vertex
|
|
self.vertices.append(plan_vertex)
|
|
# 4: for every element x from the PlanGraph
|
|
# that represents an edge e in the pattern:
|
|
# 4.1: create an edge labelled tgt from x to the vertex in the PlanGraph
|
|
# representing the target vertex of e in the pattern graph,
|
|
# and a reverted edge labelled in
|
|
# 4.1.1: tgt:
|
|
plan_edge = Edge(plan_vertex, repr_map[edge.tgt])
|
|
# backup src and tgt (Edmonds might override it)
|
|
plan_edge.orig_src = plan_edge.src
|
|
plan_edge.orig_tgt = plan_edge.tgt
|
|
plan_edge.label = PRIM_OP.tgt
|
|
# link vertices connected to this plan_edge
|
|
plan_edge.src.addOutgoingEdge(plan_edge)
|
|
plan_edge.tgt.addIncomingEdge(plan_edge)
|
|
# tgt and src cost are always 1, we use logaritmic cost,
|
|
# (=> cost = ln(1) = 0.0) so that we do not need to minimaze
|
|
# a product, but can minimize a sum
|
|
# (as ln(c1...ck) = ln(c1) + ... + ln (ck))
|
|
plan_edge.cost = 0.0
|
|
# backup orig cost, as Edmonds changes cost
|
|
plan_edge.orig_cost = plan_edge.cost
|
|
# save created edge
|
|
self.edges.append(plan_edge)
|
|
# 4.1.2: in:
|
|
plan_edge = Edge(repr_map[edge.tgt], plan_vertex)
|
|
# backup src and tgt (Edmonds might override it)
|
|
plan_edge.orig_src = plan_edge.src
|
|
plan_edge.orig_tgt = plan_edge.tgt
|
|
plan_edge.label = PRIM_OP.inc
|
|
# link vertices connected to this plan_edge
|
|
plan_edge.src.addOutgoingEdge(plan_edge)
|
|
plan_edge.tgt.addIncomingEdge(plan_edge)
|
|
# save created edge
|
|
self.edges.append(plan_edge)
|
|
|
|
# 4.2: create an edge labelled src from x to the vertex in the PlanGraph
|
|
# representing the source vertex of e in the pattern graph
|
|
# and a reverted edge labelled out
|
|
# 4.2.1: src
|
|
plan_edge = Edge(plan_vertex, repr_map[edge.src])
|
|
# backup src and tgt (Edmonds might override it)
|
|
plan_edge.orig_src = plan_edge.src
|
|
plan_edge.orig_tgt = plan_edge.tgt
|
|
plan_edge.label = PRIM_OP.src
|
|
# link vertices connected to this plan_edge
|
|
plan_edge.src.addOutgoingEdge(plan_edge)
|
|
plan_edge.tgt.addIncomingEdge(plan_edge)
|
|
# tgt and src cost are always 1, we use logaritmic cost,
|
|
# (=> cost = ln(1) = 0.0) so that we do not need to minimaze
|
|
# a product, but can minimize a sum
|
|
# (as ln(c1...ck) = ln(c1) + ... + ln (ck))
|
|
plan_edge.cost = 0.0
|
|
# backup orig cost, as Edmonds changes cost
|
|
plan_edge.orig_cost = plan_edge.cost
|
|
# save created edge
|
|
self.edges.append(plan_edge)
|
|
# 4.2.2: out
|
|
plan_edge = Edge(repr_map[edge.src], plan_vertex)
|
|
# backup src and tgt (Edmonds might override it)
|
|
plan_edge.orig_src = plan_edge.src
|
|
plan_edge.orig_tgt = plan_edge.tgt
|
|
plan_edge.label = PRIM_OP.out
|
|
# link vertices connected to this plan_edge
|
|
plan_edge.src.addOutgoingEdge(plan_edge)
|
|
plan_edge.tgt.addIncomingEdge(plan_edge)
|
|
# save created edge
|
|
self.edges.append(plan_edge)
|
|
# 2: create a root vertex
|
|
self.root = Vertex('root')
|
|
# don't add it to the vertices
|
|
|
|
# 3: for each element in the PlanGraph (that is not the root vertex),
|
|
# create an edge from the root to it, and label it lkp
|
|
for vertex in self.vertices:
|
|
plan_edge = Edge(self.root, vertex)
|
|
# backup src and tgt (Edmonds might override it)
|
|
plan_edge.orig_src = plan_edge.src
|
|
plan_edge.orig_tgt = plan_edge.tgt
|
|
plan_edge.label = PRIM_OP.lkp
|
|
# link vertices connected to this plan_edge
|
|
plan_edge.src.addOutgoingEdge(plan_edge)
|
|
plan_edge.tgt.addIncomingEdge(plan_edge)
|
|
# save created edge
|
|
self.edges.append(plan_edge)
|
|
|
|
def updatePlanCost(self, graph):
|
|
"""
|
|
returns True if sucessfully updated cost,
|
|
returns False if a type in the pattern is not in the graph.
|
|
"""
|
|
if not isinstance(graph, SearchGraph):
|
|
raise TypeError('updatePlanCost expects a SearchGraph')
|
|
# update, lkp, in and out (not src and tgt as they are constant)
|
|
|
|
for edge in self.edges:
|
|
if edge.label == PRIM_OP.lkp:
|
|
edge.cost = graph.getCostLkp(edge.tgt.type, edge.tgt.is_vertex)
|
|
if edge.cost == None:
|
|
print('failed lkp')
|
|
return False
|
|
elif edge.label == PRIM_OP.inc:
|
|
# in(v, e), binds an incoming edge e from an already bound vertex v,
|
|
# depends on the number of incoming edges of type e for the vertex type
|
|
edge.cost = graph.getCostInc(edge.src.type, edge.tgt.type)
|
|
if edge.cost == None:
|
|
print('failed in')
|
|
return False
|
|
elif edge.label == PRIM_OP.out:
|
|
# (analogue for out(v, e))
|
|
edge.cost = graph.getCostOut(edge.src.type, edge.tgt.type)
|
|
if edge.cost == None:
|
|
print('failed out')
|
|
return False
|
|
# else: ignore src and tgt
|
|
# backup orig cost, as Edmonds changes cost
|
|
edge.orig_cost = edge.cost
|
|
return True
|
|
|
|
def Edmonds(self, searchGraph):
|
|
"""
|
|
Returns the minimum directed spanning tree (MDST)
|
|
for the pattern and the provided graph.
|
|
Returns None if it is impossible to find the pattern in the Graph
|
|
(vertex type of edge type from pattern not in Graph).
|
|
"""
|
|
# update the cost for the PlanGraph
|
|
if not self.updatePlanCost(searchGraph):
|
|
print('type in pattern not found in Graph (in Edmonds)')
|
|
# (returns False if a type in the pattern can not be found in the graph)
|
|
return None
|
|
# Complete Edmonds algorithm has optimization steps:
|
|
# a: remove edges entering the root
|
|
# b: merge parallel edges from same src to same tgt with mim weight
|
|
# we can ignore this as:
|
|
# a: the root does not have incoming edges
|
|
# b: the PlanGraph does not have such paralllel edges
|
|
|
|
# 1: for each node v (other than root), find incoming edge with lowest weight
|
|
# insert those
|
|
pi_v = {}
|
|
for plan_vertex in self.vertices:
|
|
min_weight = float('infinity')
|
|
min_edge = None
|
|
for plan_edge in plan_vertex.incoming_edges:
|
|
if plan_edge.cost < min_weight:
|
|
min_weight = plan_edge.cost
|
|
min_edge = plan_edge
|
|
# save plan_vertex and it's minimum incoming edge
|
|
pi_v[plan_vertex] = min_edge
|
|
if min_edge == None:
|
|
raise RuntimeError('baka: no min_edge found')
|
|
|
|
def getCycle(vertex, reverse_graph, visited):
|
|
"""
|
|
Walk from vertex to root, we walk in a reverse order, as each vertex
|
|
only has one incoming edge, so we walk to the source of that incoming
|
|
edge. We stop when we already visited a vertex we walked on.
|
|
In both cases we return None.
|
|
When we visit a vertex from our current path, we return that cycle,
|
|
by first removing its tail.
|
|
"""
|
|
def addToVisited(walked, visited):
|
|
for vertex in walked:
|
|
visited.add(vertex)
|
|
|
|
walked = [] # we could only save it once, but we need order
|
|
current_path = set() # and lookup in an array is slower than in set
|
|
# we asume root is in visited (it must be in it)
|
|
while vertex not in visited:
|
|
if vertex in current_path:
|
|
# we found a cycle, the cycle however might look like a: O--,
|
|
# g f e where we first visited a, then b, c, d,...
|
|
# h d c b a k points back to d, completing a cycle,
|
|
# i j k but c b a is the tail that does not belong
|
|
# in the cycle, removing this is "easy" as we know that
|
|
# we first visited the tail, so they are the first elements
|
|
# in our walked path
|
|
for tail_part in walked:
|
|
if tail_part != vertex:
|
|
current_path.remove(tail_part)
|
|
else:
|
|
break
|
|
|
|
addToVisited(walked, visited)
|
|
return current_path
|
|
current_path.add(vertex)
|
|
walked.append(vertex)
|
|
# by definition, an MDST only has one incoming edge per vertex
|
|
# so we follow it upwards
|
|
# vertex <--(minimal edge)-- src
|
|
vertex = reverse_graph[vertex].src
|
|
|
|
# no cycle found (the current path let to a visited vertex)
|
|
addToVisited(walked, visited) # add walked to visited
|
|
return None
|
|
|
|
class VertexGraph(Vertex):
|
|
"""
|
|
Acts as a super vertex, holds a subgraph (that is/was once a cyle).
|
|
Uses for Edmonds contractions step.
|
|
The incoming edges are the edges leading to the vertices in the
|
|
VertexGraph (they exclude edges from a vertex in the cycle to
|
|
another vertex in the cycle).
|
|
Analogue for outgoing edges.
|
|
"""
|
|
def __init__(self, cycle, reverseGraph):
|
|
# Call parent class constructor
|
|
str_type = ''
|
|
for vertex in cycle:
|
|
str_type += str(vertex.type)
|
|
Vertex.__init__(self, str_type)
|
|
# member variables:
|
|
self.internalMDST = {}
|
|
|
|
minIntWeight = self.findMinIntWeight(cycle, reverseGraph)
|
|
self.updateMinExtEdge(minIntWeight, reverseGraph)
|
|
|
|
|
|
def findMinIntWeight(self, cycle, reverseGraph):
|
|
"""
|
|
Find the the smallest cost of the cycle his internal incoming edges.
|
|
(Also save its internalMDST (currently a cycle).)
|
|
(The VertexGraph formed by the cycle will be added to the
|
|
reverseGraph by calling findMinExtEdge.)
|
|
"""
|
|
minIntWeight = float('infinity')
|
|
|
|
cycleEdges = []
|
|
origTgts = []
|
|
for cyclePart in cycle:
|
|
cycleEdges.append(reverseGraph[cyclePart])
|
|
origTgts.append(reverseGraph[cyclePart].orig_tgt)
|
|
|
|
for vertex in cycle:
|
|
# add incoming edges to this VertexGraph
|
|
for inc_edge in vertex.incoming_edges:
|
|
# edge from within the cycle
|
|
if inc_edge.src in cycle:
|
|
minIntWeight = min(minIntWeight, inc_edge.cost)
|
|
else:
|
|
# edge from outside the cycle
|
|
self.addIncomingEdge(inc_edge)
|
|
# add outgoing edges to this VertexGraph
|
|
for out_edge in vertex.outgoing_edges:
|
|
if out_edge.tgt not in cycle:
|
|
# edge leaves the cycle
|
|
self.addOutgoingEdge(out_edge)
|
|
# update src to this VertexGraph
|
|
out_edge.src = self
|
|
# save internal MDST
|
|
min_edge = reverseGraph[vertex]
|
|
if min_edge.src in cycle:
|
|
self.internalMDST[vertex] = min_edge
|
|
else:
|
|
raise TypeError('how is this a cycle')
|
|
|
|
return minIntWeight
|
|
|
|
def updateMinExtEdge(self, minIntWeight, reverseGraph):
|
|
"""
|
|
Modifies all external incoming edges their cost and finds the
|
|
minimum external incoming edge with this modified weight.
|
|
This found edge will break the cycle, update the internalMDST
|
|
from a cycle to an MDST, updates the reverseGraph to include
|
|
the vertexGraph.
|
|
"""
|
|
minExt = None
|
|
minModWeight = -float('infinity')
|
|
|
|
# Find incoming edge from outside of the circle with minimal
|
|
# modified cost. This edge will break the cycle.
|
|
for inc_edge in self.incoming_edges:
|
|
# An incoming edge (with src from within the cycle), can be
|
|
# from a contracted part of the graph. Assume bc is a
|
|
# contracted part (VertexGraph) a, bc is a newly formed
|
|
# cycle (due to the breaking of the previous cycle bc). bc
|
|
# has at least lkp incoming edges to b and c, but we should
|
|
# not consider the lkp of c to break the cycle.
|
|
# If we want to break a, bc, select plausable edges,
|
|
# /<--\
|
|
# a bc bc's MDST b <-- c
|
|
# \-->/
|
|
# by looking at their original targets.
|
|
# (if cycle inc_edge.orig_tgt == external inc_edge.orig_tgt)
|
|
if reverseGraph[inc_edge.tgt].orig_tgt == inc_edge.orig_tgt:
|
|
# modify costL cost of inc_edge -
|
|
# (cost of previously choosen minimum edge to cycle vertex - minIntWeight)
|
|
inc_edge.cost -= (reverseGraph[inc_edge.tgt].cost - minIntWeight)
|
|
if minExt is None or minModWeight > inc_edge.cost:
|
|
# save better edge from outside of the cycle
|
|
minExt = inc_edge
|
|
minModWeight = inc_edge.cost
|
|
|
|
# Example: a, b is a cycle (we know that there are no other
|
|
# incoming edges to a and/or b, as there is on;y exactly one
|
|
# incoming edge per vertex), and the arow from c to b represents
|
|
# the minExt edge. We will remove the bottem arrow (from a to b)
|
|
# /<--\ and save the minExt edge in the reverseGraph.
|
|
# a b <-- c This breaks the cycle. As the internalMDST
|
|
# \-->/ saves the intenal MDST, and currently still
|
|
# holds a cycle, we have to remove it from the internalMDST.
|
|
# We have to remove all vertex bindings of the cycle from the
|
|
# reverseGraph (as it is contracted into a single VertexGraph),
|
|
# and store the minExt edge to this VertexGraph in it.
|
|
for int_vertex, _ in self.internalMDST.items():
|
|
del reverseGraph[int_vertex] # remove cycle from reverseGraph
|
|
|
|
del self.internalMDST[minExt.tgt] # remove/break cycle
|
|
|
|
for inc_edge in self.incoming_edges:
|
|
# update inc_edge's target to this VertexGraph
|
|
inc_edge.tgt = self
|
|
|
|
# save minExt edge to this VertexGraph in the reverseGraph
|
|
reverseGraph[self] = minExt
|
|
|
|
while True:
|
|
# 2: find all cycles:
|
|
cycles = []
|
|
visited = set([self.root]) # root does not have incoming edges,
|
|
for vertex in list(pi_v.keys()): # it can not be part of a cycle
|
|
if vertex not in visited: # getCycle depends on root being in visited
|
|
cycle = getCycle(vertex, pi_v, visited)
|
|
if cycle != None:
|
|
cycles.append(cycle)
|
|
|
|
# 2: if the set of edges {pi(v), v} does not contain any cycles,
|
|
# Then we found our minimum directed spanning tree
|
|
# otherwise, we'll have to resolve the cycles
|
|
if len(cycles) == 0:
|
|
break
|
|
|
|
# 3: For each formed cycle:
|
|
# 3a: find internal incoming edge with the smallest cost
|
|
# 3b: modify the cost of each arc which enters the cycle
|
|
# 3c: replace smallert internal edge with the modified edge which has the smallest cost
|
|
for cycle in cycles:
|
|
# Breaks a cycle by:
|
|
# - contracting cycle into VertexGraph
|
|
# - finding the internal incoming edge with the smallest cost
|
|
# - modify the cost of each arc which enters the cycle
|
|
# - replacing the smallest internal edge with the modified edge which has the smallest cost
|
|
# - changing reverseGraph accordingly (removes elements from cycle, ads vertexGraph)
|
|
# (This will find a solution as the graph keeps shrinking with every cycle,
|
|
# in the worst case the same amount as there are vertices, until
|
|
# onlty the root and one vertexGraph remains)
|
|
vertexGraph = VertexGraph(cycle, pi_v)
|
|
|
|
class SortedContainer(object):
|
|
"""
|
|
A container that keeps elemets sorted based on a given sortValue.
|
|
Elements with the same value, will be returned in the order they got inserted.
|
|
"""
|
|
def __init__(self):
|
|
# member variables:
|
|
self.keys = [] # stores key in sorted order (sorted when pop gets called)
|
|
self.sorted = {} # {key, [elems with same key]}
|
|
|
|
def add(self, sortValue, element):
|
|
"""
|
|
Adds element with sortValue to the SortedContainer.
|
|
"""
|
|
elems = self.sorted.get(sortValue)
|
|
if elems == None:
|
|
self.sorted[sortValue] = [element]
|
|
self.keys.append(sortValue)
|
|
else:
|
|
elems.append(element)
|
|
|
|
def pop(self):
|
|
"""
|
|
Sorts the SortedContainer, returns element with smallest sortValue.
|
|
"""
|
|
self.keys.sort()
|
|
elems = self.sorted[self.keys[0]]
|
|
elem = elems.pop()
|
|
if len(elems) == 0:
|
|
del self.sorted[self.keys[0]]
|
|
del self.keys[0]
|
|
return elem
|
|
|
|
def empty(self):
|
|
"""
|
|
Returns whether or not the sorted container is empty.
|
|
"""
|
|
return (len(self.keys) == 0)
|
|
|
|
def createPRIM_OP(edge, inc_cost=True):
|
|
"""
|
|
Helper function to keep argument list short,
|
|
return contracted data for a PRIM_OP.
|
|
"""
|
|
if edge.label == PRIM_OP.inc or edge.label == PRIM_OP.out:
|
|
if inc_cost: # op # vertex type # actual edge type
|
|
return (edge.label, edge.orig_src.type, edge.orig_tgt.type, edge.cost)
|
|
else:
|
|
return (edge.label, edge.orig_src.type, edge.orig_tgt.type)
|
|
elif edge.label == PRIM_OP.lkp:
|
|
if inc_cost: # op # vertex/edge type # is vertex or edge
|
|
return (edge.label, edge.orig_tgt.type, edge.orig_tgt.is_vertex, edge.cost)
|
|
else:
|
|
return (edge.label, edge.orig_tgt.type, edge.orig_tgt.is_vertex)
|
|
else: # src, tgt operation
|
|
if inc_cost: # op # actual edge type
|
|
return (edge.label, edge.orig_src.type, edge.cost)
|
|
else:
|
|
return (edge.label, edge.orig_src.type)
|
|
|
|
def flattenReverseGraph(vertex, inc_edge, reverseGraph):
|
|
"""
|
|
Flattens the reverseGraph, so that the vertexGraph node can get
|
|
processed to create a forwardGraph.
|
|
"""
|
|
if not isinstance(vertex, VertexGraph):
|
|
reverseGraph[vertex] = inc_edge
|
|
else:
|
|
reverseGraph[inc_edge.orig_tgt] = inc_edge
|
|
for vg, eg in inc_edge.tgt.internalMDST.items():
|
|
flattenReverseGraph(vg, eg, reverseGraph)
|
|
if isinstance(inc_edge.src, VertexGraph):
|
|
for vg, eg in inc_edge.src.internalMDST.items():
|
|
flattenReverseGraph(vg, eg, reverseGraph)
|
|
|
|
def createForwardGraph(vertex, inc_edge, forwardGraph):
|
|
"""
|
|
Create a forwardGraph, keeping in mind that their can be vertexGraph
|
|
in the reverseGraph.
|
|
"""
|
|
if not isinstance(vertex, VertexGraph):
|
|
forwardGraph.setdefault(inc_edge.orig_src, []).append(inc_edge)
|
|
else:
|
|
forwardGraph.setdefault(inc_edge.orig_src, []).append(inc_edge)
|
|
for vg, eg in vertex.internalMDST.items():
|
|
createForwardGraph(vg, eg, forwardGraph)
|
|
|
|
MDST = []
|
|
# pi_v contains {vertex, incoming_edge}
|
|
# we want to start from root and follow the outgoing edges
|
|
# so we have to build the forwardGraph graph for pi_v
|
|
# (Except for the root (has 0), each vertex has exactly one incoming edge,
|
|
# but might have multiple outgoing edges)
|
|
forwardGraph = {} # {vertex, [outgoing edge 1, ... ] }
|
|
reverseGraph = {}
|
|
|
|
# flatten reverseGraph (for the vertexGraph elements)
|
|
for v, e in pi_v.items():
|
|
flattenReverseGraph(v, e, reverseGraph)
|
|
|
|
# create the forwardGraph
|
|
for vertex, edge in reverseGraph.items():
|
|
createForwardGraph(vertex, edge, forwardGraph)
|
|
|
|
# create the MDST in a best first manner (lowest value first)
|
|
current = SortedContainer() # allows easy walking true tree
|
|
for edge in forwardGraph[self.root]:
|
|
current.add(edge.orig_cost, edge) # use orig cost, not modified
|
|
while current.empty() != True:
|
|
p_op = current.pop() # p_op contains an outgoing edge
|
|
MDST.append(createPRIM_OP(p_op))
|
|
for edge in forwardGraph.get(p_op.orig_tgt, []):
|
|
current.add(edge.orig_cost, edge)
|
|
return MDST |