Add Sten Vercammen's pattern matching library (ported to Python 3, numpy dependency replaced by standard library)

This commit is contained in:
Joeri Exelmans 2024-09-02 15:38:30 +02:00
parent 151ffe0ff0
commit 95a8076a17
9 changed files with 2120 additions and 0 deletions

31
pattern_matching/enum.py Normal file
View file

@ -0,0 +1,31 @@
# coding: utf-8
"""
Author: Sten Vercamman
Univeristy of Antwerp
Example code for paper: Efficient model transformations for novices
url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen
The main goal of this code is to give an overview, and an understandable
implementation, of known techniques for pattern matching and solving the
sub-graph homomorphism problem. The presented techniques do not include
performance adaptations/optimizations. It is not optimized to be efficient
but rather for the ease of understanding the workings of the algorithms.
The paper does list some possible extensions/optimizations.
It is intended as a guideline, even for novices, and provides an in-depth look
at the workings behind various techniques for efficient pattern matching.
"""
class Enum(object):
"""
Custom Enum object for compatibility (enum is introduced in python 3.4)
Usage create : a = Enum(['e0', 'e1', ...])
Usage call : a.e0
"""
def __init__(self, args):
next = 0
for arg in args:
self.__dict__[arg] = next
next += 1

View file

@ -0,0 +1,202 @@
# coding: utf-8
"""
Author: Sten Vercamman
Univeristy of Antwerp
Example code for paper: Efficient model transformations for novices
url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen
The main goal of this code is to give an overview, and an understandable
implementation, of known techniques for pattern matching and solving the
sub-graph homomorphism problem. The presented techniques do not include
performance adaptations/optimizations. It is not optimized to be efficient
but rather for the ease of understanding the workings of the algorithms.
The paper does list some possible extensions/optimizations.
It is intended as a guideline, even for novices, and provides an in-depth look
at the workings behind various techniques for efficient pattern matching.
"""
import graph
# import numpy as np
import math
import collections
import random
class GraphGenerator(object):
"""
Generates a random Graph with dv an array containing all vertices (there type),
de an array containing all edges (their type) and dc_inc an array representing
the incoming edges (analogue for dc_out)
"""
def __init__(self, dv, de, dc_inc, dc_out, debug=False):
if len(de) != len(dc_inc):
raise ValueError('de and dc_inc should be the same length.')
if len(de) != len(dc_out):
raise ValueError('de and dc_out should be the same length.')
self.dv = dv
self.de = de
self.dc_inc = dc_inc
self.dc_out = dc_out
# print for debugging, so you know the used values
if debug:
print('dv')
print('[',','.join(map(str,dv)),']')
print('_____')
print('de')
print('[',','.join(map(str,de)),']')
print('_____')
print('dc_inc')
print('[',','.join(map(str,dc_inc)),']')
print('_____')
print('dc_out')
print('[',','.join(map(str,dc_out)),']')
print('_____')
self.graph = graph.Graph()
self.vertices = []
# create all the vertices:
for v_type in self.dv:
# v_type represents the type of the vertex
self.vertices.append(self.graph.addCreateVertex('v' + str(v_type)))
index = 0
# create all edges
for e_type in self.de:
# e_type represents the type of the edge
src = self.vertices[self.dc_out[index]] # get src vertex
tgt = self.vertices[self.dc_inc[index]] # get tgt vertex
self.graph.addCreateEdge(src, tgt, 'e' + str(e_type)) # create edge
index += 1
def getRandomGraph(self):
return self.graph
def getRandomPattern(self, max_nr_of_v, max_nr_of_e, start=0, debug=False):
# create pattern
pattern = graph.Graph()
# map from graph to new pattern
graph_to_pattern = {}
# map of possible edges
# we don't need a dict, but python v2.7 does not have an OrderedSet
possible_edges = collections.OrderedDict()
# set of chosen edges
chosen_edges = set()
# start node from graph
g_node = self.vertices[start]
p_node = pattern.addCreateVertex(g_node.type)
# for debuging, print the order in which the pattern gets created and
# connects it edges
if debug:
print('v'+str(id(p_node))+'=pattern.addCreateVertex('+"'"+str(g_node.type)+"'"+')')
# save corrolation
graph_to_pattern[g_node] = p_node
def insertAllEdges(edges, possible_edges, chosen_edges):
for edge in edges:
# if we did not chose the edge
if edge not in chosen_edges:
# if inc_edge not in possible edges, add it with value 1
possible_edges[edge] = None
def insertEdges(g_vertex, possible_edges, chosen_edges):
insertAllEdges(g_vertex.incoming_edges, possible_edges, chosen_edges)
insertAllEdges(g_vertex.outgoing_edges, possible_edges, chosen_edges)
insertEdges(g_node, possible_edges, chosen_edges)
while max_nr_of_v > len(graph_to_pattern) and max_nr_of_e > len(chosen_edges):
candidate = None
if len(possible_edges) == 0:
break
# get a random number between 0 and len(possible_edges)
# We us a triangular distribution to approximate the fact that
# the first element is the longest in the possible_edges and
# already had the post chance of beeing choosen.
# (The approximation is because the first few ellements where
# added in the same itteration, but doing this exact is
# computationally expensive.)
if len(possible_edges) == 1:
randie = 0
else:
randie = int(round(random.triangular(1, len(possible_edges), len(possible_edges)))) - 1
candidate = list(possible_edges.keys())[randie]
del possible_edges[candidate]
chosen_edges.add(candidate)
src = graph_to_pattern.get(candidate.src)
tgt = graph_to_pattern.get(candidate.tgt)
src_is_new = True
if src != None and tgt != None:
# create edge between source and target
pattern.addCreateEdge(src, tgt, candidate.type)
if debug:
print('pattern.addCreateEdge('+'v'+str(id(src))+', '+'v'+str(id(tgt))+', '+"'"+str(candidate.type)+"'"+')')
# skip adding new edges
continue
elif src == None:
# create pattern vertex
src = pattern.addCreateVertex(candidate.src.type)
if debug:
print('v'+str(id(src))+'=pattern.addCreateVertex('+"'"+str(candidate.src.type)+"'"+')')
# map newly created pattern vertex
graph_to_pattern[candidate.src] = src
# create edge between source and target
pattern.addCreateEdge(src, tgt, candidate.type)
if debug:
print('pattern.addCreateEdge('+'v'+str(id(src))+', '+'v'+str(id(tgt))+', '+"'"+str(candidate.type)+"'"+')')
elif tgt == None:
src_is_new = False
# create pattern vertex
tgt = pattern.addCreateVertex(candidate.tgt.type)
if debug:
print('v'+str(id(tgt))+'=pattern.addCreateVertex('+"'"+str(candidate.tgt.type)+"'"+')')
# map newly created pattern vertex
graph_to_pattern[candidate.tgt] = tgt
# create edge between source and target
pattern.addCreateEdge(src, tgt, candidate.type)
if debug:
print('pattern.addCreateEdge('+'v'+str(id(src))+', '+'v'+str(id(tgt))+', '+"'"+str(candidate.type)+"'"+')')
else:
raise RuntimeError('Bug: src or tgt of edge should be in out pattern')
# select the vertex from the chosen edge that was not yet part of the pattern
if src_is_new:
new_vertex = candidate.src
else:
new_vertex = candidate.tgt
# insert all edges from the new vertex
insertEdges(new_vertex, possible_edges, chosen_edges)
return pattern
def createConstantPattern():
"""
Use this to create the same pattern over and over again.
"""
# create pattern
pattern = graph.Graph()
# copy and paste printed pattern from debug output or create a pattern
# below the following line:
# ----------------------------------------------------------------------
v4447242448=pattern.addCreateVertex('v4')
v4457323088=pattern.addCreateVertex('v6')
pattern.addCreateEdge(v4447242448, v4457323088, 'e4')
v4457323216=pattern.addCreateVertex('v8')
pattern.addCreateEdge(v4457323216, v4447242448, 'e4')
v4457323344=pattern.addCreateVertex('v7')
pattern.addCreateEdge(v4457323216, v4457323344, 'e3')
v4457323472=pattern.addCreateVertex('v7')
pattern.addCreateEdge(v4457323344, v4457323472, 'e1')
# ----------------------------------------------------------------------
return pattern

157
pattern_matching/graph.py Normal file
View file

@ -0,0 +1,157 @@
# coding: utf-8
"""
Author: Sten Vercamman
Univeristy of Antwerp
Example code for paper: Efficient model transformations for novices
url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen
The main goal of this code is to give an overview, and an understandable
implementation, of known techniques for pattern matching and solving the
sub-graph homomorphism problem. The presented techniques do not include
performance adaptations/optimizations. It is not optimized to be efficient
but rather for the ease of understanding the workings of the algorithms.
The paper does list some possible extensions/optimizations.
It is intended as a guideline, even for novices, and provides an in-depth look
at the workings behind various techniques for efficient pattern matching.
"""
class Properties(object):
"""
Holds all Properties.
"""
def __init__(self):
# member variables:
self.properties = {}
def addProperty(self, name, value):
"""
Adds property (overrides if name already exists).
"""
self.properties[name] = value
def getProperty(self, name):
"""
Returns property with given name or None if not found.
"""
return self.properties.get(name)
class Edge(Properties):
"""
Describes an Edge with source and target Node.
The Edge can have several properties, like a name, a weight, etc...
"""
def __init__(self, src, tgt, str_type=None):
# Call parent class constructor
Properties.__init__(self)
# member variables:
self.src = src
self.tgt = tgt
self.type = str_type
class Vertex(Properties):
"""
Describes a Vertex with incoming, outgoing and undirected (both ways) edges.
The vertex can have several properties, like a name, a weight, etc...
"""
def __init__(self, str_type):
# Call parent class constructor
Properties.__init__(self)
# member variables:
self.incoming_edges = set() # undirected edges should be stored both in
self.outgoing_edges = set() # incoming and outgoing edges
self.type = str_type
def addIncomingEdge(self, edge):
"""
Adds an incoming Edge.
"""
if not isinstance(edge, Edge):
raise TypeError('addIncomingEdge without it being an edge')
self.incoming_edges.add(edge)
def addOutgoingEdge(self, edge):
"""
Adds an outgoing Edge.
"""
if not isinstance(edge, Edge):
raise TypeError('addOutgoingEdge without it being an edge')
self.outgoing_edges.add(edge)
def addUndirectedEdge(self, edge):
"""
Adds an undirected (or bi-directed) Edge.
"""
self.addIncomingEdge(edge)
self.addOutgoingEdge(edge)
class Graph(object):
"""
Holds a Graph.
"""
def __init__(self):
# member variables:
# redundant type keeping, "needed" for fast iterating over specific type
self.vertices = {} # {type, set(v1, v2, ...)}
self.edges = {} # {type, set(e1, e2, ...)}
def addCreateVertex(self, str_type):
"""
Creates a Vertex of str_type, stores it and returs it
(so that properties can be added to it).
"""
vertex = Vertex(str_type)
self.addVertex(vertex)
return vertex
def addVertex(self, vertex):
"""
Stores a Vertex into the Graph.
"""
if not isinstance(vertex, Vertex):
raise TypeError('addVertex expects a Vertex')
# add vertex, but it first creates a new set for the vertex type
# if the type does not exist in the dictionary
self.vertices.setdefault(vertex.type, set()).add(vertex)
def getVerticesOfType(self, str_type):
"""
Returns all vertices of a specific type,
Return [] if there are no vertices with the given type
"""
return self.vertices.get(str_type, [])
def getEdgesOfType(self, str_type):
"""
Returns all edges of a specific type,
Return [] if there are no edges with the given type
"""
return self.edges.get(str_type, [])
def addCreateEdge(self, src, tgt, str_type):
"""
Creates edge of str_type from src to tgt, and returns it,
so that properties can be added to the edge.
"""
if not isinstance(src, Vertex):
raise TypeError('addCreateEdge: src is not a Vertex')
if not isinstance(tgt, Vertex):
raise TypeError('addCreateEdge: tgt is not a Vertex')
edge = Edge(src, tgt, str_type)
# link vertices connected to this edge
edge.src.addOutgoingEdge(edge)
edge.tgt.addIncomingEdge(edge)
self.addEdge(edge)
return edge
def addEdge(self, edge):
"""
Stores an Edge into the Graph.
"""
if not isinstance(edge, Edge):
raise TypeError('addEdge expects an Edge')
# add edge, but it first creates a new set for the edge type
# if the type does not exist in the dictionary
self.edges.setdefault(edge.type, set()).add(edge)

View file

@ -0,0 +1,44 @@
# coding: utf-8
"""
Author: Sten Vercamman
Univeristy of Antwerp
Example code for paper: Efficient model transformations for novices
url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen
The main goal of this code is to give an overview, and an understandable
implementation, of known techniques for pattern matching and solving the
sub-graph homomorphism problem. The presented techniques do not include
performance adaptations/optimizations. It is not optimized to be efficient
but rather for the ease of understanding the workings of the algorithms.
The paper does list some possible extensions/optimizations.
It is intended as a guideline, even for novices, and provides an in-depth look
at the workings behind various techniques for efficient pattern matching.
"""
import graph as mg
def printGraph(fileName, graph, matched_v={}, matched_e={}):
if not isinstance(graph, mg.Graph):
raise TypeError('Can only print Graph Graphs')
with open(fileName, 'w') as f:
f.write('digraph randomGraph {\n\n')
for str_type, plan_vertices in graph.vertices.items():
for plan_vertex in plan_vertices:
vertex_str = str(id(plan_vertex)) + ' [label="'+str(str_type)+'"'
if plan_vertex in list(matched_v.values()):
vertex_str += ', style=dashed, style=filled]\n'
else:
vertex_str += ']\n'
f.write(vertex_str)
for out_edge in plan_vertex.outgoing_edges:
edge_str = str(id(plan_vertex)) + ' -> ' + str(id(out_edge.tgt)) + ' [label="'+str(out_edge.type)+'"'
if out_edge in list(matched_e.values()):
edge_str += ', style=dashed, penwidth = 4]\n'
else:
edge_str += ']\n'
f.write(edge_str)
f.write('\n}')

88
pattern_matching/main.py Normal file
View file

@ -0,0 +1,88 @@
# coding: utf-8
"""
Author: Sten Vercamman
Univeristy of Antwerp
Example code for paper: Efficient model transformations for novices
url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen
The main goal of this code is to give an overview, and an understandable
implementation, of known techniques for pattern matching and solving the
sub-graph homomorphism problem. The presented techniques do not include
performance adaptations/optimizations. It is not optimized to be efficient
but rather for the ease of understanding the workings of the algorithms.
The paper does list some possible extensions/optimizations.
It is intended as a guideline, even for novices, and provides an in-depth look
at the workings behind various techniques for efficient pattern matching.
"""
from generator import *
from patternMatching import *
import graphToDot
import random
debug = False
if __name__ == '__main__':
"""
The main function called when running from the command line.
"""
nr_of_vertices = 50
nr_of_diff_types_v = 10
nr_of_edges = 150
nr_of_diff_types_e = 10
dv = [random.randint(0, nr_of_diff_types_v) for _ in range(nr_of_vertices)]
# dv = np.random.random_integers(0, nr_of_diff_types_v, nr_of_vertices)
de = [random.randint(0, nr_of_diff_types_e) for _ in range(nr_of_edges)]
# de = np.random.random_integers(0, nr_of_diff_types_e, nr_of_edges)
dc_inc = [random.randint(0, nr_of_vertices-1) for _ in range(nr_of_edges)]
# dc_inc = np.random.random_integers(0, nr_of_vertices-1, nr_of_edges)
dc_out = [random.randint(0, nr_of_vertices-1) for _ in range(nr_of_edges)]
# dc_out = np.random.random_integers(0, nr_of_vertices-1, nr_of_edges)
# override random graph by copy pasting output from terminal
dv = [ 10,5,4,0,8,6,8,0,4,8,5,5,7,0,10,0,5,6,10,4,0,3,0,8,2,7,5,8,1,0,2,10,0,0,1,6,8,4,7,6,4,2,10,10,6,4,6,0,2,7 ]
de = [ 8,10,8,1,6,7,4,3,5,2,0,0,9,6,0,3,8,3,2,7,2,3,10,8,10,8,10,2,5,5,10,6,7,5,1,2,1,2,2,3,7,7,2,1,7,2,9,10,8,1,9,4,1,3,1,1,8,2,2,9,10,9,1,9,4,10,10,10,9,3,5,3,6,6,9,1,2,6,3,2,4,10,9,6,5,6,2,4,3,2,4,10,6,2,8,8,0,5,1,7,3,4,3,8,7,3,0,8,3,3,8,5,10,5,9,3,1,10,3,2,6,3,10,0,5,10,9,10,0,1,4,7,10,3,1,9,1,2,3,7,4,3,7,8,8,4,5,10,1,4 ]
dc_inc = [ 0,25,18,47,22,25,16,45,38,25,5,45,15,44,17,46,6,17,35,8,16,29,48,47,25,34,4,20,24,1,47,44,8,25,32,3,16,6,33,21,6,13,41,10,17,25,21,33,31,30,5,4,45,26,16,42,12,25,29,3,32,30,14,26,11,13,7,13,3,43,43,22,48,37,20,28,15,40,19,33,43,16,49,36,11,25,9,42,3,22,16,40,42,44,27,30,1,18,10,35,19,6,9,43,37,38,45,19,41,14,37,45,0,31,29,31,24,20,44,46,8,45,43,3,38,38,35,12,19,45,7,34,20,28,12,17,45,17,35,49,20,21,49,1,35,38,38,36,33,30 ]
dc_out = [ 9,2,49,49,37,33,16,21,5,46,4,15,9,6,14,22,16,33,23,21,15,31,37,23,47,3,30,26,35,9,29,21,39,32,22,43,5,9,41,30,31,30,37,33,31,34,23,22,34,26,44,36,38,33,48,5,9,34,13,7,48,41,43,26,26,7,12,6,12,28,22,8,29,22,24,27,16,4,31,41,32,15,19,20,38,0,26,18,43,46,40,17,29,14,34,14,32,17,32,47,16,45,7,4,35,22,42,11,38,2,0,29,4,38,17,44,9,23,5,10,31,17,1,11,16,5,37,27,35,32,45,16,18,1,14,4,42,24,43,31,21,38,6,34,39,46,20,1,38,47 ]
gg = GraphGenerator(dv, de, dc_inc, dc_out, debug)
graph = gg.getRandomGraph()
pattern = gg.getRandomPattern(5, 15, debug=debug)
# override random pattern by copy pasting output from terminal to create
# pattern, paste it in the createConstantPattern function in the generator.py
# pattern = gg.createConstantPattern()
# generate here to know pattern and graph before searching it
graphToDot.printGraph('randomPattern.dot', pattern)
graphToDot.printGraph('randomGraph.dot', graph)
#PM = PatternMatching('naive')
#PM = PatternMatching('SP')
# PM = PatternMatching('Ullmann')
PM = PatternMatching('VF2')
v,e = PM.match(pattern, graph)
# regenerate graph, to show matched pattern
graphToDot.printGraph('randomGraph.dot', graph, v, e)
if debug:
print(len(v))
print('___')
print(v)
for key, value in v.items():
print(value.type)
print(len(e))
print(e)
print('___')
for key, value in e.items():
print(value.type)

View file

@ -0,0 +1,947 @@
# coding: utf-8
"""
Author: Sten Vercamman
Univeristy of Antwerp
Example code for paper: Efficient model transformations for novices
url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen
The main goal of this code is to give an overview, and an understandable
implementation, of known techniques for pattern matching and solving the
sub-graph homomorphism problem. The presented techniques do not include
performance adaptations/optimizations. It is not optimized to be efficient
but rather for the ease of understanding the workings of the algorithms.
The paper does list some possible extensions/optimizations.
It is intended as a guideline, even for novices, and provides an in-depth look
at the workings behind various techniques for efficient pattern matching.
"""
from planGraph import *
import collections
import itertools
# import numpy as np
class PatternMatching(object):
"""
Returns an occurrence of a given pattern from the given Graph
"""
def __init__(self, matching_type='SP', optimize=True):
# store the type of matching we want to use
self.type = matching_type
self.bound_vertices = {} # saves the currently bound vertices
self.bound_edges = {} # saves the currently bound edges
self.result = None
self.previous = []
self.optimize = optimize
def match(self, pattern, graph):
"""
Call this function to find an occurrence of the pattern in the (host) graph.
Setting the type of matching (naive, SP, Ullmann, VF2) is done by
setting self.matching_type to its name.
"""
if not (isinstance(pattern, SearchGraph) or isinstance(pattern, Graph)):
raise TypeError('pattern must be a SearchGraph or Graph')
if not (isinstance(graph, SearchGraph) or isinstance(graph, Graph)):
raise TypeError('graph must be a SearchGraph or Graph')
self.pattern = pattern
self.graph = graph
if self.type == 'naive':
result = self.matchNaive(vertices=graph.vertices, edges=graph.edges)
elif self.type == 'SP':
result = self.matchSP()
elif self.type == 'Ullmann':
result = self.matchUllmann()
elif self.type == 'VF2':
result = self.matchVF2()
else:
raise ValueError('Unknown type for matching')
# cleanup
self.pattern = None
self.graph = None
self.bound_vertices = {}
self.bound_edges = {}
self.result = None
return result
def matchNaive(self, pattern_vertices=None, vertices=None, edges=None):
"""
Try to find an occurrence of the pattern in the Graph naively.
"""
# allow call with specific arguments
if pattern_vertices == None:
pattern_vertices = self.pattern.vertices
if vertices == None:
vertices = self.bound_vertices
if edges == None:
edges = self.bound_edges
def visitEdge(pattern_vertices, p_edge, inc, g_edges, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
"""
Visit a pattern edge, and try to bind it to a graph edge.
(If the first fails, try the second, and so on...)
"""
for g_edge in g_edges:
# only reckon the edge if its in edges and not visited
# (as the graph might be a subgraph of a more complex graph)
if g_edge not in edges.get(g_edge.type, []) or g_edge in visited_g_edges:
continue
if g_edge.type == p_edge.type and g_edge not in visited_g_edges:
visited_p_edges[p_edge] = g_edge
visited_g_edges.add(g_edge)
if inc:
p_vertex = p_edge.src
else:
p_vertex = p_edge.tgt
if visitVertices(pattern_vertices, p_vertex, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
return True
# remove added edges if they lead to no match, retry with others
del visited_p_edges[p_edge]
visited_g_edges.remove(g_edge)
# no edge leads to a possitive match
return False
def visitEdges(pattern_vertices, p_edges, inc, g_edges, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
"""
Visit all edges of the pattern vertex (edges given as argument).
We need to try visiting them for all its permutations, as matching
v -e1-> first and v -e2-> second and v -e3-> third, might not result
in a matching an occurrence of the pattern, but matching v -e2->
first and v -e3-> second and v -e1-> third might.
"""
def removePrevEdge(visitedEdges, visited_p_edges, visited_g_edges):
"""
Undo the binding of the brevious edge, (the current bindinds do
not lead to an occurrence of the pattern in the graph).
"""
for wrong_edge in visitedEdges:
# remove binding (pattern edge to graph edge)
wrong_g_edge = visited_p_edges.get(wrong_edge)
del visited_p_edges[wrong_edge]
# remove visited graph edge
visited_g_edges.remove(wrong_g_edge)
for it in itertools.permutations(p_edges):
visitedEdges = []
foundallEdges = True
for edge in it:
if visited_p_edges.get(edge) == None:
if not visitEdge(pattern_vertices, edge, inc, g_edges, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
# this did not work, so we have to undo all added edges
# (the current edge is not added, as it failed)
# we then can try a different permutation
removePrevEdge(visitedEdges, visited_p_edges, visited_g_edges)
foundallEdges = False
break # try other order
# add good visited (we know it succeeded)
visitedEdges.append(edge)
else:
# we visited this pattern edge, and have the coressponding graph edge
# if it is an incoming pattern edge, we need to make sure that
# the graph target that is map from the pattern target
# (of this incoming pattern edge, which has to be bound at this point)
# has the graph adge as an incoming edge,
# otherwise the graph is not properly connected
if inc:
if not visited_p_edges[edge] in visited_p_vertices[edge.tgt].incoming_edges:
# did not work
removePrevEdge(visitedEdges, visited_p_edges, visited_g_edges)
foundallEdges = False
break # try other order
else:
# analog for an outgoing edge
if not visited_p_edges[edge] in visited_p_vertices[edge.src].outgoing_edges:
# did not work
removePrevEdge(visitedEdges, visited_p_edges, visited_g_edges)
foundallEdges = False
break # try other order
# all edges are good, look no further
if foundallEdges:
break
return foundallEdges
def visitVertex(pattern_vertices, p_vertex, g_vertex, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
"""
Visit a pattern vertex, and try to bind it to the graph vertex
(both are given as argument). A binding is successful if all the
pattern vertex his incoming and outgoing edges can be bound
(to the graph vertex).
"""
if g_vertex in visited_g_vertices:
return False
# save visited graph vertex
visited_g_vertices.add(g_vertex)
# map pattern vertex to visited graph vertex
visited_p_vertices[p_vertex] = g_vertex
if visitEdges(pattern_vertices, p_vertex.incoming_edges, True, g_vertex.incoming_edges, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
if visitEdges(pattern_vertices, p_vertex.outgoing_edges, False, g_vertex.outgoing_edges, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
return True
# cleanup, remove from visited as this does not lead to
# an occurrence of the pttern in the graph
visited_g_vertices.remove(g_vertex)
del visited_p_vertices[p_vertex]
return False
def visitVertices(pattern_vertices, p_vertex, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
"""
Visit a pattern vertex and try to bind a graph vertex to it.
"""
# if already matched or if it is a vertex not in the pattern_vertices
# (second is for when you want to match the pattern partionally)
if visited_p_vertices.get(p_vertex) != None or p_vertex not in pattern_vertices.get(p_vertex.type, set()):
return True
# try visiting graph vertices of same type as pattern vertex
for g_vertex in vertices.get(p_vertex.type, []):
if g_vertex not in visited_g_vertices:
if visitVertex(pattern_vertices, p_vertex, g_vertex, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
return True
return False
visited_p_vertices = {}
visited_p_edges = {}
visited_g_vertices = set()
visited_g_edges = set()
# for loop is need for when pattern consists of multiple not connected structures
allVertices = []
for _, p_vertices in pattern_vertices.items():
allVertices.extend(p_vertices)
foundIt = False
for it_p_vertices in itertools.permutations(allVertices):
foundIt = True
for p_vertex in it_p_vertices:
if not visitVertices(pattern_vertices, p_vertex, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
foundIt = False
# reset visited
visited_p_vertices = {}
visited_p_edges = {}
visited_g_vertices = set()
visited_g_edges = set()
break
if foundIt:
break
if foundIt:
return (visited_p_vertices, visited_p_edges)
else:
return None
def matchSP(self):
"""
Find an occurrence of the pattern in the Graph
by using the generated SearchPlan.
"""
if isinstance(self.graph, Graph):
sg = SearchGraph(self.graph)
elif isinstance(self.graph, SearchGraph):
sg = self.graph
else:
raise TypeError('Pattern matching with a SearchPlan must be given a Graph or SearchGraph')
pg = PlanGraph(self.pattern)
SP = pg.Edmonds(sg)
self.fileIndex = 0
def propConnected():
"""
Checks if the found vertices and edges can be uniquely matched
onto the pattern graph.
"""
self.result = self.matchNaive()
return self.result != None
def matchOP(elem, bound, ops, index):
"""
Execute a primitive operation, return whether ot not it succeeded.
"""
type_bound = bound.setdefault(elem.type, set())
# if elem not yet bound, bind it, and try matching the next operations
if elem not in type_bound:
type_bound.add(elem)
# if matching of next operation failed, try with a different elem
if matchAllOP(ops, index+1):
return True
else:
type_bound.remove(elem)
return False
def matchAllOP(ops, index=0):
"""
Try to match an occurrence of the pattern in the graph,
by recursivly ,atching elements that adhere to the SearchPlan
"""
# if we matched all elements,
# check if the bound elements are properly connected
if index == len(ops):
return propConnected()
op = ops[index]
if op[0] == PRIM_OP.lkp: # lkp(elem)
if op[2]: # lookup a vertex
# If the graph does not have a vertex of the same vertex
# type, we'll have to return False, happens if elems == [].
elems = self.graph.vertices.get(op[1], [])
bound = self.bound_vertices
else: # loopup an edge
# If the graph does not have an edge of the same edge
# type, we'll have to return False, happens if elems == [].
elems = self.graph.edges.get(op[1], [])
bound = self.bound_edges
# if elems == [], we'll skip the loop and return False
for elem in elems:
if matchOP(elem, bound, ops, index):
return True
# if all not bound elems fails, backtrack
return False
elif op[0] == PRIM_OP.src: # src(e): bind src of a bound edge e
# Should always succeed, as the edge must be already bound
# (there should be at least one elem in self.bound_edges[op[1]]).
for edge in self.bound_edges[op[1]]:
if matchOP(edge.src, self.bound_vertices, ops, index):
return True
# if all not bound elems fails, backtrack
return False
elif op[0] == PRIM_OP.tgt: # tgt(e): bind tgt of a bound edge e
# Should always succeed, as the edge must be already bound
# (there should be at least one elem in self.bound_edges[op[1]]).
for edge in self.bound_edges[op[1]]:
if matchOP(edge.tgt, self.bound_vertices, ops, index):
return True
# if all not bound elems fails, backtrack
return False
elif op[0] == PRIM_OP.inc: # in(v, e): bind incoming edge e of a bound vertex v
# It's possible we will try to find a vertex of a certain type
# in the bound_vertices which should be bound implicitly
# (by a src/tgt op), that is not bound. Happens when implicit
# binding bounded a "wrong" vertex. We then need to return False
# (happens by skiping for loop by looping over [])
for vertex in self.bound_vertices.get(op[1], []):
for edge in vertex.incoming_edges:
if edge.type == op[2]:
if matchOP(edge, self.bound_edges, ops, index):
return True
# if all not bound elems fails, backtrack
return False
elif op[0] == PRIM_OP.out: # out(v, e): bind outgoing edge e of a bound vertex v
# Return False if we expect an element to be bound that is not
# bound (for the same reason as the inc op).
for vertex in self.bound_vertices.get(op[1], []):
for edge in vertex.outgoing_edges:
if edge.type == op[2]:
if matchOP(edge, self.bound_edges, ops, index):
return True
# if all not bound elems fails, backtrack
return False
else:
raise TypeError('Unknown PRIM_OP type')
# try and match all (primitive) operations from the SearchPlan
matchAllOP(SP)
# Either nothing is found, or we found an occurrence,
# it is impossble to have a partionally matched occurrence
for key, bound_elems in self.bound_vertices.items():
if len(bound_elems) == 0:
# The pattern does not exist in the Graph
return None
else:
# We found a pattern
return self.result
def createAdjacencyMatrixMap(self, graph):
"""
Return adjacency matrix and the order of the vertices.
"""
matrix = collections.OrderedDict() # { vertex, (index, [has edge from index to pos?]) }
# contains all vertices we'll use for the AdjacencyMatrix
allVertices = []
if self.optimize:
# insert only the vertices from the graph which have a type
# that is present in the pattern
for vertex_type, _ in self.pattern.vertices.items():
graph_vertices = graph.vertices.get(vertex_type)
if graph_vertices != None:
allVertices.extend(graph_vertices)
else:
# we will not be able to find the pattern
# as the pattern contains a vertex of a certain type
# that is not present in the host graph
return False
else:
# insert all vertices from the graph
for _, vertices in graph.vertices.items():
allVertices.extend(vertices)
# create squared zero matrix
index = 0
for vertex in allVertices:
matrix[vertex] = (index, [False] * len(allVertices))
index += 1
for _, edges in graph.edges.items():
for edge in edges:
if self.optimize:
if edge.tgt not in matrix or edge.src not in matrix:
# skip adding edge if the target or source type
# is not present in the pattern
# (and therefor not added to the matrix)
continue
index = matrix[edge.tgt][0]
matrix[edge.src][1][index] = True
AM = []
vertices_order = []
for vertex, row in matrix.items():
AM.append(row[1])
vertices_order.append(vertex)
return AM, vertices_order
def matchUllmann(self):
"""
Find an occurrence of the pattern in the Graph
by using Ullmann for solving the Constraint Satisfaction Problem (CSP).
"""
def createM_star(h, p):
"""
Create M*[v, w] = 1 if deg(v) <= deg(w), for v in V_P, w in V_H
= 0 otherwise
M and P are given to ensure corect order.
"""
m = [] # [[..], ...]
for p_vertex in p:
row = []
for g_vertex in h:
# for the degree function, we choose to look at the
# outgoing edges AND the incoming edges
# (one might prefer to use only one of them)
if self.optimize:
# also check if type matches
if p_vertex.type != g_vertex.type:
row.append(False)
continue
row.append( len(p_vertex.incoming_edges) <=
len(g_vertex.incoming_edges) and
len(p_vertex.outgoing_edges) <=
len(g_vertex.outgoing_edges))
m.append(row)
return m
def createDecreasingOrder(h):
"""
It turns out that the more edges a vertex has, the sooner it will
fail in matching the pattern. For efficiency reasons, we want it
to fail as fast as possible.
"""
order = [] # [(value, index), ...]
index = 0
for g_vertex in h:
order.append(( len(g_vertex.outgoing_edges) +
len(g_vertex.outgoing_edges), index))
index += 1
order.sort(key = lambda elem: elem[0])
# sort and only return the indices (which specify the order)
return [index for (_, index) in order]
def propConnected(M, H, P, h, p):
"""
Checks if the vertices represented in M are isomorphic to P and if
they can be matched onto the pattern graph.
"""
print(M, H, P, h, p)
# P_candi = np.dot(M, np.transpose(np.dot(M, H)))
"""
# If we do not aply the refineM function, we will want to check if
# this succeeds, as it checks for isomorphism.
# If we apply the refineM function, it is garanteed to be isomorphic.
index_column = 0
for row in P_candi:
index_row = 0
for item in row:
# for all i,j: P[i, j] = 1 : M(MH)^T [j, i] = 1
# (not the other way around)
# (return False when item is 0 and P[i,j] is 1)
if item < P[index_row][index_column]:
return False
index_row += 1
index_column += 1
"""
vertices = {}
index_column = 0
for row in M:
index_row = 0
for item in row:
# there should only be one item per row
if item:
vertex = h[index_row]
vertices.setdefault(vertex.type, set()).add(vertex)
break
index_row += 1
index_column += 1
self.result = self.matchNaive(vertices=vertices, edges=self.graph.edges)
return self.result != None
def refineM(M, H, P, h, pp):
"""
Refine M, for every vertex from the pattern, check if each possible
matching (candidate) his neighbours can also be matched. (M's column
represents vertices from P, and the row represents its candidate.)
If this is not possible set M[i,j] to false, refining/reducing the
search space.
"""
any_changes=True
while any_changes:
any_changes = False
# for all vertices from the pattern
for i in range(0, len(P)): # P is a nxn-matrix
# for all its possible assignments
for j in range(0, len(H[0])):
# if bound vertex of P, check if all neigbours are matchable
if M[i][j]:
# for all the pattern his neighbours
for k in range(0, len(P)):
# if it is a neighbour (from outgoing edges)
if P[i][k]:
match = False
for p in range(0, len(H[0])):
# check if we can match a candidate neighbour
# (from M* to to the graph (H))
if M[k][p] and H[j][p]:
if self.optimize:
# also check correct type
if pp[k].type != h[p].type:
continue
match = True
break
if not match:
M[i][j] = False
any_changes = True
# if it is a neighbour (from incoming edges)
if P[k][i]:
match = False
for p in range(0, len(H[0])):
# check if we can match a candidate neighbour
# (from M* to to the graph (H))
if M[k][p] and H[p][j]:
if self.optimize:
# also check correct type
if pp[i].type != h[j].type:
continue
match = True
break
if not match:
M[i][j] = False
any_changes = True
def findM(M_star, M, order, H, P, h, p, index_M=0):
"""
Find an isomorphic mapping for the vertices of P to H.
This mapping is represented by a matrix M if,
and only if M(MH)^T = P^T.
"""
# We are at the end, we found an candidate.
# Remember that we are at the end, bu first check if there is
# a row with ony False, if so, we do not need to check if it is
# properly connected.
check_prop = False
if index_M == len(M):
check_prop = True
index_M -= 1
# we need to refer to this row
old_row = M_star[index_M]
# previous rows (these are sparse, 1 per row, save only its position)
prev_pos = []
for i in range(0, index_M):
row = M[i]
only_false = True
for j in range(0, len(old_row)):
if row[j]:
only_false = False
prev_pos.append(j)
break
if only_false:
# check if a row with only False occurs,
# if so, we will not find an occurence
return False
# We are at the end, we found an candidate.
if check_prop:
index_M += 1
return propConnected(M, H, P, h, p)
M[index_M] = [False] * len(old_row)
index_order = 0
for index_order in range(0, len(order)):
index_row = order[index_order]
# put previous True back on False
if index_order > 0:
M[index_M][order[index_order - 1]] = False
if old_row[index_row]:
M[index_M][index_row] = True
findMPart = True
# 1 0 0 Assume 3th round, and we select x,
# 0 1 0 no element at the same possition in the row,
# 0 x 0 of the elements above itselve in the same
# column may be 1. In the example it is, then try
# selecting an other element.
for index_column in range(0, index_M):
if M[index_column][index_row]:
findMPart = False
break
if not findMPart:
continue
refineM(M, H, P, h, p)
if findM(M_star, M, order, H, P, h, p, index_M + 1):
return True
# reset previous rows their True's
prev_row = 0
for pos in prev_pos:
M[prev_row][pos] = True
prev_row += 1
# reset rows below current row
for index_column in range(index_M + 1, len(M)):
# deep copy, we do not want to just copy pointer to array/list
M[index_column] = M_star[index_column][:]
# reset current row (the rest is already reset)
M[index_M] = M_star[index_M][:]
return False
# create adjecency matrix of the graph
H, h = self.createAdjacencyMatrixMap(self.graph)
# create adjecency matrix of the pattern
P, p = self.createAdjacencyMatrixMap(self.pattern)
# create M* binary matrix
M_star = createM_star(h, p)
# create the order we will use later on
order = createDecreasingOrder(h)
# deepcopy M_s into M
M = [row[:] for row in M_star]
if self.optimize:
refineM(M, H, P, h, p)
findM(M_star, M, order, H, P, h, p)
return self.result
def matchVF2(self):
class VF2_Obj(object):
"""
Structor for keeping the VF2 data.
"""
def __init__(self, len_graph_vertices, len_pattern_vertices):
# represents if n-the element (h[n] or p[n]) matched
self.core_graph = [False]*len_graph_vertices
self.core_pattern = [False]*len_pattern_vertices
# save mapping from pattern to graph
self.mapping = {}
# preference lvl 1
# ordered set of vertices adjecent to M_graph connected via an outgoing edge
self.N_out_graph = [-1]*len_graph_vertices
# ordered set of vertices adjecent to M_pattern connected via an outgoing edge
self.N_out_pattern = [-1]*len_pattern_vertices
# preference lvl 2
# ordered set of vertices adjecent to M_graph connected via an incoming edge
self.N_inc_graph = [-1]*len_graph_vertices
# ordered set of vertices adjecent to M_pattern connected via an incoming edge
self.N_inc_pattern = [-1]*len_pattern_vertices
# preference lvl 3
# not in the above
def findM(H, P, h, p, VF2_obj, index_M=0):
"""
Find an isomorphic mapping for the vertices of P to H.
This mapping is represented by a matrix M if,
and only if M(MH)^T = P^T.
This operates in a simular way as Ullmann. Ullmann has a predefind
order for matching (sorted on most edges first). VF2's order is to
first try to match the adjacency vertices connected via outgoing
edges, then thos connected via incoming edges and then those that
not connected to the currently mathed vertices.
"""
def addOutNeighbours(neighbours, N, index_M):
"""
Given outgoing neighbours (a row from an adjacency matrix),
label them as added by saving when they got added (index_M
represents this, otherwise it is -1)
"""
for neighbour_index in range(0, len(neighbours)):
if neighbours[neighbour_index]:
if N[neighbour_index] == -1:
N[neighbour_index] = index_M
def addIncNeighbours(G, j, N, index_M):
"""
Given the adjacency matrix, and the colum j, representing that
we want to add the incoming edges to vertex j,
label them as added by saving when they got added (index_M
represents this, otherwise it is -1)
"""
for i in range(0, len(G)):
if G[i][j]:
if N[i] == -1:
N[i] = index_M
def delNeighbours(N, index_M):
"""
Remove neighbours that where added at index_M.
If we call this function, we are backtracking and we want to
remove the added neighbours from the just tried matching (n, m)
pair (whiched failed).
"""
for n in range(0, len(N)):
if N[n] == index_M:
N[n] = -1
def feasibilityTest(H, P, h, p, VF2_obj, n, m):
"""
Examine all the nodes connected to n and m; if such nodes are
in the current partial mapping, check if each branch from or to
n has a corresponding branch from or to m and vice versa.
If the nodes and the branches of the graphs being matched also
carry semantic attributes, another condition must also hold for
F(s, n, m) to be true; namely the attributes of the nodes and of
the branches being paired must be compatible.
Another pruning step is to check if the nr of ext_edges between
the matched_vertices from the pattern and its adjecent vertices
are less than or equal to the nr of ext_edges between
matched_vertices from the graph and its adjecent vertices.
And if the nr of ext_edges between those adjecent vertices from
the pattern and the not connected vertices are less than or
equal to the nr of ext_edges between those adjecent vertices from
the graph and its adjecent vertices.
"""
# Get all neighbours from graph node n and pattern node m
# (including n and m)
neighbours_graph = {}
neighbours_graph[h[n].type] = set([h[n]])
neighbours_pattern = {}
neighbours_pattern[p[m].type] = set([p[m]])
# add all neihgbours of pattern vertex m
for i in range(0, len(P)): # P is a nxn-matrix
if (P[m][i] or P[i][m]) and VF2_obj.core_pattern[i]:
neighbours_pattern.setdefault(p[i].type, set()).add(p[i])
# add all neihgbours of graph vertex n
for i in range(0, len(H)): # P is a nxn-matrix
if (H[n][i] or H[i][n]) and VF2_obj.core_graph[i]:
neighbours_graph.setdefault(h[i].type, set()).add(h[i])
# take a coding shortcut,
# use self.matchNaive function to see if it is feasable.
# this way, we immidiatly test the semantic attributes
if not self.matchNaive(pattern_vertices=neighbours_pattern, vertices=neighbours_graph, edges=self.graph.edges):
return False
# count ext_edges from core_graph to a adjecent vertices and
# cuotn ext_edges for adjecent vertices and not matched vertices
# connected via the ext_edges
ext_edges_graph_ca = 0
ext_edges_graph_an = 0
# for all core vertices
for x in range(0, len(VF2_obj.core_graph)):
# for all its neighbours
for y in range(0, len(H)):
if H[x][y]:
# if it is a neighbor and not yet matched
if (VF2_obj.N_out_graph[y] != -1 or VF2_obj.N_inc_graph[y] != -1) and VF2_obj.core_graph[y]:
# if we matched it
if VF2_obj.core_graph[x] != -1:
ext_edges_graph_ca += 1
else:
ext_edges_graph_an += 1
# count ext_edges from core_pattern to a adjecent vertices
# connected via the ext_edges
ext_edges_pattern_ca = 0
ext_edges_pattern_an = 0
# for all core vertices
for x in range(0, len(VF2_obj.core_pattern)):
# for all its neighbours
for y in range(0, len(P)):
if P[x][y]:
# if it is a neighbor and not yet matched
if (VF2_obj.N_out_pattern[y] != -1 or VF2_obj.N_inc_pattern[y] != -1) and VF2_obj.core_pattern[y]:
# if we matched it
if VF2_obj.core_pattern[x] != -1:
ext_edges_pattern_ca += 1
else:
ext_edges_pattern_an += 1
# The nr of ext_edges between matched_vertices from the pattern
# and its adjecent vertices must be less than or equal to the nr
# of ext_edges between matched_vertices from the graph and its
# adjecent vertices, otherwise we wont find an occurrence
if ext_edges_pattern_ca > ext_edges_graph_ca:
return False
# The nr of ext_edges between those adjancent vertices from the
# pattern and its not connected vertices must be less than or
# equal to the nr of ext_edges between those adjacent vertices
# from the graph and its not connected vertices,
# otherwise we wont find an occurrence
if ext_edges_pattern_an > ext_edges_graph_an:
return False
return True
def matchPhase(H, P, h, p, index_M, VF2_obj, n, m):
"""
The matching fase of the VF2 algorithm. If the chosen n, m pair
passes the feasibilityTest, the pair gets added and we start
to search for the next matching pair.
"""
# all candidate pair (n, m) represent graph x pattern
if feasibilityTest(H, P, h, p, VF2_obj, n, m):
# adapt VF2_obj
VF2_obj.core_graph[n] = True
VF2_obj.core_pattern[m] = True
VF2_obj.mapping[h[n]] = p[m]
addOutNeighbours(H[n], VF2_obj.N_out_graph, index_M)
addIncNeighbours(H, n, VF2_obj.N_inc_graph, index_M)
addOutNeighbours(P[m], VF2_obj.N_out_pattern, index_M)
addIncNeighbours(P, m, VF2_obj.N_inc_pattern, index_M)
if findM(H, P, h, p, VF2_obj, index_M + 1):
return True
# else, cleanup, adapt VF2_obj
VF2_obj.core_graph[n] = False
VF2_obj.core_pattern[m] = False
del VF2_obj.mapping[h[n]]
delNeighbours(VF2_obj.N_out_graph, index_M)
delNeighbours(VF2_obj.N_inc_graph, index_M)
delNeighbours(VF2_obj.N_out_pattern, index_M)
delNeighbours(VF2_obj.N_inc_pattern, index_M)
return False
def preferred(H, P, h, p, index_M, VF2_obj, N_graph, N_pattern):
"""
Try to match the adjacency vertices connected via outgoing
or incoming edges. (Depending on what is given for N_graph and
N_pattern.)
"""
for n in range(0, len(N_graph)):
# skip graph vertices that are not in VF2_obj.N_out_graph
# (or already matched)
if N_graph[n] == -1 or VF2_obj.core_graph[n]:
continue
for m in range(0, len(N_pattern)):
# skip graph vertices that are not in VF2_obj.N_out_pattern
# (or already matched)
if N_pattern[m] == -1 or VF2_obj.core_pattern[m]:
continue
if matchPhase(H, P, h, p, index_M, VF2_obj, n, m):
return True
return False
def leastPreferred(H, P, h, p, index_M, VF2_obj):
"""
Try to match the vertices that are not connected to the curretly
matched vertices.
"""
for n in range(0, len(VF2_obj.N_out_graph)):
# skip vertices that are connected to the graph
# (or already matched)
if not (VF2_obj.N_out_graph[n] == -1 and VF2_obj.N_inc_graph[n] == -1) or VF2_obj.core_graph[n]:
continue
for m in range(0, len(VF2_obj.N_out_pattern)):
# skip vertices that are connected to the graph
# (or already matched)
if not (VF2_obj.N_out_pattern[m] == -1 and VF2_obj.N_inc_pattern[m] == -1) or VF2_obj.core_pattern[m]:
continue
if matchPhase(H, P, h, p, index_M, VF2_obj, n, m):
return True
return False
# We are at the end, we found an candidate.
if index_M == len(p):
bound_graph_vertices = {}
for vertex_bound, _ in VF2_obj.mapping.items():
bound_graph_vertices.setdefault(vertex_bound.type, set()).add(vertex_bound)
self.result = self.matchNaive(vertices=bound_graph_vertices, edges=self.graph.edges)
return self.result != None
# try the candidates is the preffered order
# first try the adjacent vertices connected via the outgoing edges.
if preferred(H, P, h, p, index_M, VF2_obj, VF2_obj.N_out_graph, VF2_obj.N_out_pattern):
return True
# then try the adjacent vertices connected via the incoming edges.
if preferred(H, P, h, p, index_M, VF2_obj, VF2_obj.N_inc_graph, VF2_obj.N_inc_pattern):
return True
# and lastly, try the vertices not connected to the currently matched vertices
if leastPreferred(H, P, h, p, index_M, VF2_obj):
return True
return False
# create adjecency matrix of the graph
H, h = self.createAdjacencyMatrixMap(self.graph)
# create adjecency matrix of the pattern
P, p = self.createAdjacencyMatrixMap(self.pattern)
VF2_obj = VF2_Obj(len(h), len(p))
findM(H, P, h, p, VF2_obj)
return self.result

View file

@ -0,0 +1,528 @@
# coding: utf-8
"""
Author: Sten Vercamman
Univeristy of Antwerp
Example code for paper: Efficient model transformations for novices
url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen
The main goal of this code is to give an overview, and an understandable
implementation, of known techniques for pattern matching and solving the
sub-graph homomorphism problem. The presented techniques do not include
performance adaptations/optimizations. It is not optimized to be efficient
but rather for the ease of understanding the workings of the algorithms.
The paper does list some possible extensions/optimizations.
It is intended as a guideline, even for novices, and provides an in-depth look
at the workings behind various techniques for efficient pattern matching.
"""
from searchGraph import *
from enum import *
# Enum for all primitive operation types
# note: inc represent primitive operation in (as in is a reserved keyword in python)
PRIM_OP = Enum(['lkp', 'inc', 'out', 'src', 'tgt'])
class PlanGraph(object):
"""
Holds the PlanGraph for a pattern.
Can create the search plan of the pattern for a given SearchGraph.
"""
def __init__(self, pattern):
if not isinstance(pattern, Graph):
raise TypeError('PlanGraph expects the pattern to be a Graph')
# member variables:
self.vertices = [] # will not be searched in
self.edges = [] # will not be searched in
# representation map, maps vertex from pattern to element from PlanGraph
# (no need for edges)
repr_map = {}
# 1.1: for every vertex in the pattern graph,
# create a vertex representing the pattern element
for str_type, vertices in pattern.vertices.items():
for vertex in vertices:
# we only need to know the type of the vertex
plan_vertex = Vertex(str_type)
# and we need to know that is was a vertex
plan_vertex.is_vertex = True
# for re-linking the edges, we'll need to map the
# vertex of the pattern to the plan_vertex
repr_map[vertex] = plan_vertex
# save created plan_vertex
self.vertices.append(plan_vertex)
# 1.2: for every edge in the pattern graph,
# create a vertex representing the pattern elemen
for str_type, edges in pattern.edges.items():
for edge in edges:
# we only need to know the type of the edge
plan_vertex = Vertex(edge.type)
# and we need to know that is was an edge
plan_vertex.is_vertex = False
# save created plan_vertex
self.vertices.append(plan_vertex)
# 4: for every element x from the PlanGraph
# that represents an edge e in the pattern:
# 4.1: create an edge labelled tgt from x to the vertex in the PlanGraph
# representing the target vertex of e in the pattern graph,
# and a reverted edge labelled in
# 4.1.1: tgt:
plan_edge = Edge(plan_vertex, repr_map[edge.tgt])
# backup src and tgt (Edmonds might override it)
plan_edge.orig_src = plan_edge.src
plan_edge.orig_tgt = plan_edge.tgt
plan_edge.label = PRIM_OP.tgt
# link vertices connected to this plan_edge
plan_edge.src.addOutgoingEdge(plan_edge)
plan_edge.tgt.addIncomingEdge(plan_edge)
# tgt and src cost are always 1, we use logaritmic cost,
# (=> cost = ln(1) = 0.0) so that we do not need to minimaze
# a product, but can minimize a sum
# (as ln(c1...ck) = ln(c1) + ... + ln (ck))
plan_edge.cost = 0.0
# backup orig cost, as Edmonds changes cost
plan_edge.orig_cost = plan_edge.cost
# save created edge
self.edges.append(plan_edge)
# 4.1.2: in:
plan_edge = Edge(repr_map[edge.tgt], plan_vertex)
# backup src and tgt (Edmonds might override it)
plan_edge.orig_src = plan_edge.src
plan_edge.orig_tgt = plan_edge.tgt
plan_edge.label = PRIM_OP.inc
# link vertices connected to this plan_edge
plan_edge.src.addOutgoingEdge(plan_edge)
plan_edge.tgt.addIncomingEdge(plan_edge)
# save created edge
self.edges.append(plan_edge)
# 4.2: create an edge labelled src from x to the vertex in the PlanGraph
# representing the source vertex of e in the pattern graph
# and a reverted edge labelled out
# 4.2.1: src
plan_edge = Edge(plan_vertex, repr_map[edge.src])
# backup src and tgt (Edmonds might override it)
plan_edge.orig_src = plan_edge.src
plan_edge.orig_tgt = plan_edge.tgt
plan_edge.label = PRIM_OP.src
# link vertices connected to this plan_edge
plan_edge.src.addOutgoingEdge(plan_edge)
plan_edge.tgt.addIncomingEdge(plan_edge)
# tgt and src cost are always 1, we use logaritmic cost,
# (=> cost = ln(1) = 0.0) so that we do not need to minimaze
# a product, but can minimize a sum
# (as ln(c1...ck) = ln(c1) + ... + ln (ck))
plan_edge.cost = 0.0
# backup orig cost, as Edmonds changes cost
plan_edge.orig_cost = plan_edge.cost
# save created edge
self.edges.append(plan_edge)
# 4.2.2: out
plan_edge = Edge(repr_map[edge.src], plan_vertex)
# backup src and tgt (Edmonds might override it)
plan_edge.orig_src = plan_edge.src
plan_edge.orig_tgt = plan_edge.tgt
plan_edge.label = PRIM_OP.out
# link vertices connected to this plan_edge
plan_edge.src.addOutgoingEdge(plan_edge)
plan_edge.tgt.addIncomingEdge(plan_edge)
# save created edge
self.edges.append(plan_edge)
# 2: create a root vertex
self.root = Vertex('root')
# don't add it to the vertices
# 3: for each element in the PlanGraph (that is not the root vertex),
# create an edge from the root to it, and label it lkp
for vertex in self.vertices:
plan_edge = Edge(self.root, vertex)
# backup src and tgt (Edmonds might override it)
plan_edge.orig_src = plan_edge.src
plan_edge.orig_tgt = plan_edge.tgt
plan_edge.label = PRIM_OP.lkp
# link vertices connected to this plan_edge
plan_edge.src.addOutgoingEdge(plan_edge)
plan_edge.tgt.addIncomingEdge(plan_edge)
# save created edge
self.edges.append(plan_edge)
def updatePlanCost(self, graph):
"""
returns True if sucessfully updated cost,
returns False if a type in the pattern is not in the graph.
"""
if not isinstance(graph, SearchGraph):
raise TypeError('updatePlanCost expects a SearchGraph')
# update, lkp, in and out (not src and tgt as they are constant)
for edge in self.edges:
if edge.label == PRIM_OP.lkp:
edge.cost = graph.getCostLkp(edge.tgt.type, edge.tgt.is_vertex)
if edge.cost == None:
print('failed lkp')
return False
elif edge.label == PRIM_OP.inc:
# in(v, e), binds an incoming edge e from an already bound vertex v,
# depends on the number of incoming edges of type e for the vertex type
edge.cost = graph.getCostInc(edge.src.type, edge.tgt.type)
if edge.cost == None:
print('failed in')
return False
elif edge.label == PRIM_OP.out:
# (analogue for out(v, e))
edge.cost = graph.getCostOut(edge.src.type, edge.tgt.type)
if edge.cost == None:
print('failed out')
return False
# else: ignore src and tgt
# backup orig cost, as Edmonds changes cost
edge.orig_cost = edge.cost
return True
def Edmonds(self, searchGraph):
"""
Returns the minimum directed spanning tree (MDST)
for the pattern and the provided graph.
Returns None if it is impossible to find the pattern in the Graph
(vertex type of edge type from pattern not in Graph).
"""
# update the cost for the PlanGraph
if not self.updatePlanCost(searchGraph):
print('type in pattern not found in Graph (in Edmonds)')
# (returns False if a type in the pattern can not be found in the graph)
return None
# Complete Edmonds algorithm has optimization steps:
# a: remove edges entering the root
# b: merge parallel edges from same src to same tgt with mim weight
# we can ignore this as:
# a: the root does not have incoming edges
# b: the PlanGraph does not have such paralllel edges
# 1: for each node v (other than root), find incoming edge with lowest weight
# insert those
pi_v = {}
for plan_vertex in self.vertices:
min_weight = float('infinity')
min_edge = None
for plan_edge in plan_vertex.incoming_edges:
if plan_edge.cost < min_weight:
min_weight = plan_edge.cost
min_edge = plan_edge
# save plan_vertex and it's minimum incoming edge
pi_v[plan_vertex] = min_edge
if min_edge == None:
raise RuntimeError('baka: no min_edge found')
def getCycle(vertex, reverse_graph, visited):
"""
Walk from vertex to root, we walk in a reverse order, as each vertex
only has one incoming edge, so we walk to the source of that incoming
edge. We stop when we already visited a vertex we walked on.
In both cases we return None.
When we visit a vertex from our current path, we return that cycle,
by first removing its tail.
"""
def addToVisited(walked, visited):
for vertex in walked:
visited.add(vertex)
walked = [] # we could only save it once, but we need order
current_path = set() # and lookup in an array is slower than in set
# we asume root is in visited (it must be in it)
while vertex not in visited:
if vertex in current_path:
# we found a cycle, the cycle however might look like a: O--,
# g f e where we first visited a, then b, c, d,...
# h d c b a k points back to d, completing a cycle,
# i j k but c b a is the tail that does not belong
# in the cycle, removing this is "easy" as we know that
# we first visited the tail, so they are the first elements
# in our walked path
for tail_part in walked:
if tail_part != vertex:
current_path.remove(tail_part)
else:
break
addToVisited(walked, visited)
return current_path
current_path.add(vertex)
walked.append(vertex)
# by definition, an MDST only has one incoming edge per vertex
# so we follow it upwards
# vertex <--(minimal edge)-- src
vertex = reverse_graph[vertex].src
# no cycle found (the current path let to a visited vertex)
addToVisited(walked, visited) # add walked to visited
return None
class VertexGraph(Vertex):
"""
Acts as a super vertex, holds a subgraph (that is/was once a cyle).
Uses for Edmonds contractions step.
The incoming edges are the edges leading to the vertices in the
VertexGraph (they exclude edges from a vertex in the cycle to
another vertex in the cycle).
Analogue for outgoing edges.
"""
def __init__(self, cycle, reverseGraph):
# Call parent class constructor
str_type = ''
for vertex in cycle:
str_type += str(vertex.type)
Vertex.__init__(self, str_type)
# member variables:
self.internalMDST = {}
minIntWeight = self.findMinIntWeight(cycle, reverseGraph)
self.updateMinExtEdge(minIntWeight, reverseGraph)
def findMinIntWeight(self, cycle, reverseGraph):
"""
Find the the smallest cost of the cycle his internal incoming edges.
(Also save its internalMDST (currently a cycle).)
(The VertexGraph formed by the cycle will be added to the
reverseGraph by calling findMinExtEdge.)
"""
minIntWeight = float('infinity')
cycleEdges = []
origTgts = []
for cyclePart in cycle:
cycleEdges.append(reverseGraph[cyclePart])
origTgts.append(reverseGraph[cyclePart].orig_tgt)
for vertex in cycle:
# add incoming edges to this VertexGraph
for inc_edge in vertex.incoming_edges:
# edge from within the cycle
if inc_edge.src in cycle:
minIntWeight = min(minIntWeight, inc_edge.cost)
else:
# edge from outside the cycle
self.addIncomingEdge(inc_edge)
# add outgoing edges to this VertexGraph
for out_edge in vertex.outgoing_edges:
if out_edge.tgt not in cycle:
# edge leaves the cycle
self.addOutgoingEdge(out_edge)
# update src to this VertexGraph
out_edge.src = self
# save internal MDST
min_edge = reverseGraph[vertex]
if min_edge.src in cycle:
self.internalMDST[vertex] = min_edge
else:
raise TypeError('how is this a cycle')
return minIntWeight
def updateMinExtEdge(self, minIntWeight, reverseGraph):
"""
Modifies all external incoming edges their cost and finds the
minimum external incoming edge with this modified weight.
This found edge will break the cycle, update the internalMDST
from a cycle to an MDST, updates the reverseGraph to include
the vertexGraph.
"""
minExt = None
minModWeight = -float('infinity')
# Find incoming edge from outside of the circle with minimal
# modified cost. This edge will break the cycle.
for inc_edge in self.incoming_edges:
# An incoming edge (with src from within the cycle), can be
# from a contracted part of the graph. Assume bc is a
# contracted part (VertexGraph) a, bc is a newly formed
# cycle (due to the breaking of the previous cycle bc). bc
# has at least lkp incoming edges to b and c, but we should
# not consider the lkp of c to break the cycle.
# If we want to break a, bc, select plausable edges,
# /<--\
# a bc bc's MDST b <-- c
# \-->/
# by looking at their original targets.
# (if cycle inc_edge.orig_tgt == external inc_edge.orig_tgt)
if reverseGraph[inc_edge.tgt].orig_tgt == inc_edge.orig_tgt:
# modify costL cost of inc_edge -
# (cost of previously choosen minimum edge to cycle vertex - minIntWeight)
inc_edge.cost -= (reverseGraph[inc_edge.tgt].cost - minIntWeight)
if minExt is None or minModWeight > inc_edge.cost:
# save better edge from outside of the cycle
minExt = inc_edge
minModWeight = inc_edge.cost
# Example: a, b is a cycle (we know that there are no other
# incoming edges to a and/or b, as there is on;y exactly one
# incoming edge per vertex), and the arow from c to b represents
# the minExt edge. We will remove the bottem arrow (from a to b)
# /<--\ and save the minExt edge in the reverseGraph.
# a b <-- c This breaks the cycle. As the internalMDST
# \-->/ saves the intenal MDST, and currently still
# holds a cycle, we have to remove it from the internalMDST.
# We have to remove all vertex bindings of the cycle from the
# reverseGraph (as it is contracted into a single VertexGraph),
# and store the minExt edge to this VertexGraph in it.
for int_vertex, _ in self.internalMDST.items():
del reverseGraph[int_vertex] # remove cycle from reverseGraph
del self.internalMDST[minExt.tgt] # remove/break cycle
for inc_edge in self.incoming_edges:
# update inc_edge's target to this VertexGraph
inc_edge.tgt = self
# save minExt edge to this VertexGraph in the reverseGraph
reverseGraph[self] = minExt
while True:
# 2: find all cycles:
cycles = []
visited = set([self.root]) # root does not have incoming edges,
for vertex in list(pi_v.keys()): # it can not be part of a cycle
if vertex not in visited: # getCycle depends on root being in visited
cycle = getCycle(vertex, pi_v, visited)
if cycle != None:
cycles.append(cycle)
# 2: if the set of edges {pi(v), v} does not contain any cycles,
# Then we found our minimum directed spanning tree
# otherwise, we'll have to resolve the cycles
if len(cycles) == 0:
break
# 3: For each formed cycle:
# 3a: find internal incoming edge with the smallest cost
# 3b: modify the cost of each arc which enters the cycle
# 3c: replace smallert internal edge with the modified edge which has the smallest cost
for cycle in cycles:
# Breaks a cycle by:
# - contracting cycle into VertexGraph
# - finding the internal incoming edge with the smallest cost
# - modify the cost of each arc which enters the cycle
# - replacing the smallest internal edge with the modified edge which has the smallest cost
# - changing reverseGraph accordingly (removes elements from cycle, ads vertexGraph)
# (This will find a solution as the graph keeps shrinking with every cycle,
# in the worst case the same amount as there are vertices, until
# onlty the root and one vertexGraph remains)
vertexGraph = VertexGraph(cycle, pi_v)
class SortedContainer(object):
"""
A container that keeps elemets sorted based on a given sortValue.
Elements with the same value, will be returned in the order they got inserted.
"""
def __init__(self):
# member variables:
self.keys = [] # stores key in sorted order (sorted when pop gets called)
self.sorted = {} # {key, [elems with same key]}
def add(self, sortValue, element):
"""
Adds element with sortValue to the SortedContainer.
"""
elems = self.sorted.get(sortValue)
if elems == None:
self.sorted[sortValue] = [element]
self.keys.append(sortValue)
else:
elems.append(element)
def pop(self):
"""
Sorts the SortedContainer, returns element with smallest sortValue.
"""
self.keys.sort()
elems = self.sorted[self.keys[0]]
elem = elems.pop()
if len(elems) == 0:
del self.sorted[self.keys[0]]
del self.keys[0]
return elem
def empty(self):
"""
Returns whether or not the sorted container is empty.
"""
return (len(self.keys) == 0)
def createPRIM_OP(edge, inc_cost=True):
"""
Helper function to keep argument list short,
return contracted data for a PRIM_OP.
"""
if edge.label == PRIM_OP.inc or edge.label == PRIM_OP.out:
if inc_cost: # op # vertex type # actual edge type
return (edge.label, edge.orig_src.type, edge.orig_tgt.type, edge.cost)
else:
return (edge.label, edge.orig_src.type, edge.orig_tgt.type)
elif edge.label == PRIM_OP.lkp:
if inc_cost: # op # vertex/edge type # is vertex or edge
return (edge.label, edge.orig_tgt.type, edge.orig_tgt.is_vertex, edge.cost)
else:
return (edge.label, edge.orig_tgt.type, edge.orig_tgt.is_vertex)
else: # src, tgt operation
if inc_cost: # op # actual edge type
return (edge.label, edge.orig_src.type, edge.cost)
else:
return (edge.label, edge.orig_src.type)
def flattenReverseGraph(vertex, inc_edge, reverseGraph):
"""
Flattens the reverseGraph, so that the vertexGraph node can get
processed to create a forwardGraph.
"""
if not isinstance(vertex, VertexGraph):
reverseGraph[vertex] = inc_edge
else:
reverseGraph[inc_edge.orig_tgt] = inc_edge
for vg, eg in inc_edge.tgt.internalMDST.items():
flattenReverseGraph(vg, eg, reverseGraph)
if isinstance(inc_edge.src, VertexGraph):
for vg, eg in inc_edge.src.internalMDST.items():
flattenReverseGraph(vg, eg, reverseGraph)
def createForwardGraph(vertex, inc_edge, forwardGraph):
"""
Create a forwardGraph, keeping in mind that their can be vertexGraph
in the reverseGraph.
"""
if not isinstance(vertex, VertexGraph):
forwardGraph.setdefault(inc_edge.orig_src, []).append(inc_edge)
else:
forwardGraph.setdefault(inc_edge.orig_src, []).append(inc_edge)
for vg, eg in vertex.internalMDST.items():
createForwardGraph(vg, eg, forwardGraph)
MDST = []
# pi_v contains {vertex, incoming_edge}
# we want to start from root and follow the outgoing edges
# so we have to build the forwardGraph graph for pi_v
# (Except for the root (has 0), each vertex has exactly one incoming edge,
# but might have multiple outgoing edges)
forwardGraph = {} # {vertex, [outgoing edge 1, ... ] }
reverseGraph = {}
# flatten reverseGraph (for the vertexGraph elements)
for v, e in pi_v.items():
flattenReverseGraph(v, e, reverseGraph)
# create the forwardGraph
for vertex, edge in reverseGraph.items():
createForwardGraph(vertex, edge, forwardGraph)
# create the MDST in a best first manner (lowest value first)
current = SortedContainer() # allows easy walking true tree
for edge in forwardGraph[self.root]:
current.add(edge.orig_cost, edge) # use orig cost, not modified
while current.empty() != True:
p_op = current.pop() # p_op contains an outgoing edge
MDST.append(createPRIM_OP(p_op))
for edge in forwardGraph.get(p_op.orig_tgt, []):
current.add(edge.orig_cost, edge)
return MDST

8
pattern_matching/run.sh Executable file
View file

@ -0,0 +1,8 @@
#!/bin/sh
python main.py
dot randomGraph.dot -Tsvg > randomGraph.svg
dot randomPattern.dot -Tsvg > randomPattern.svg
firefox randomGraph.svg
firefox randomPattern.svg

View file

@ -0,0 +1,115 @@
# coding: utf-8
"""
Author: Sten Vercamman
Univeristy of Antwerp
Example code for paper: Efficient model transformations for novices
url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen
The main goal of this code is to give an overview, and an understandable
implementation, of known techniques for pattern matching and solving the
sub-graph homomorphism problem. The presented techniques do not include
performance adaptations/optimizations. It is not optimized to be efficient
but rather for the ease of understanding the workings of the algorithms.
The paper does list some possible extensions/optimizations.
It is intended as a guideline, even for novices, and provides an in-depth look
at the workings behind various techniques for efficient pattern matching.
"""
from graph import *
import math
class SearchGraph(Graph):
"""
A SearchGraph is an extended Graph, it keeps traks of statistics
for creating the cost model when generating a search plan.
It stire the amount of edges for each edge.type per vertex.type.
"""
def __init__(self, orig=None, deepCopy=False):
Graph.__init__(self)
# member variables:
self.nr_of_inc_edges = {} # {vertex_type, {edge_type, nr of incoming edges of edge_type for vertex_type } }
self.nr_of_out_edges = {} # {vertex_type, {edge_type, nr of outgoing edges of edge_type for vertex_type } }
if orig != None:
if not (isinstance(orig, Graph) or isinstance(orig, SearchGraph)):
raise TypeError('Can only create SearchGraph from Graph and SearchGraph types')
if not deepCopy:
# copy all memeber elements:
self.vertices = orig.vertices # this is a reference
self.edges = orig.edges # this is a reference
# udpate the edge counters for each edge
for _, edges in self.edges.items():
for edge in edges:
self.addToEdgeCounters(edge)
else: # TODO: deepcopy (not really needed)
pass
def addCreateEdge(self, src, tgt, str_type):
"""
Creates edge of str_type from src to tgt, and returns it,
so that properties can be added to the edge.
This also add the Edge to the Edge counters
"""
# call parent fucntion, this function is an extention
edge = Graph.addCreateEdge(self, src, tgt, str_type)
self.updateEdgeCounters(edge)
return edge
def addToEdgeCounters(self, edge):
"""
Add the Edge to the Edge counters.
"""
# get {edge.type, counter} for tgt vertex of edge (or create it)
edge_counters = self.nr_of_inc_edges.setdefault(edge.tgt.type, {})
# increase counter of edge.type by 1
edge_counters[edge.type] = edge_counters.get(edge.type, 0) + 1
# get {edge.type, counter} for src vertex of edge (or create it)
edge_counters = self.nr_of_out_edges.setdefault(edge.src.type, {})
# increase counter of edge.type by 1
edge_counters[edge.type] = edge_counters.get(edge.type, 0) + 1
def getCostLkp(self, type, is_vertex):
"""
Returns the cost of a lkp primitive operation (of a vertex or edge).
Returns None if vertex type or edge type not present in Host Graph
"""
if is_vertex:
cost = len(self.getVerticesOfType(type))
else:
cost = len(self.getEdgesOfType(type))
if cost == 0:
return None
# we use a logaritmic cost
return math.log(cost)
def getCostInc(self, vertex_type, edge_type):
"""
Returns the cost of an in primitive operation.
Returns None if vertex_type or edge_type not present in Host Graph
"""
cost = float(self.nr_of_inc_edges.get(vertex_type, {}).get(edge_type))
if cost != None:
nr_of_vertices_with_type = len(self.getVerticesOfType(vertex_type))
if nr_of_vertices_with_type != 0:
cost /= len(self.getVerticesOfType(vertex_type))
# we use a logaritmic cost
cost = math.log(cost)
return cost
def getCostOut(self, vertex_type, edge_type):
"""
Returns the cost of an out primitive operation.
Returns None if vertex_type or edge_type not present in Host Graph
"""
cost = float(self.nr_of_out_edges.get(vertex_type, {}).get(edge_type))
if cost != None:
nr_of_vertices_with_type = len(self.getVerticesOfType(vertex_type))
if nr_of_vertices_with_type != 0:
cost /= len(self.getVerticesOfType(vertex_type))
# we use a logaritmic cost
cost = math.log(cost)
return cost