Add Sten Vercammen's pattern matching library (ported to Python 3, numpy dependency replaced by standard library)
This commit is contained in:
parent
151ffe0ff0
commit
95a8076a17
9 changed files with 2120 additions and 0 deletions
31
pattern_matching/enum.py
Normal file
31
pattern_matching/enum.py
Normal file
|
|
@ -0,0 +1,31 @@
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
"""
|
||||||
|
Author: Sten Vercamman
|
||||||
|
Univeristy of Antwerp
|
||||||
|
|
||||||
|
Example code for paper: Efficient model transformations for novices
|
||||||
|
url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen
|
||||||
|
|
||||||
|
The main goal of this code is to give an overview, and an understandable
|
||||||
|
implementation, of known techniques for pattern matching and solving the
|
||||||
|
sub-graph homomorphism problem. The presented techniques do not include
|
||||||
|
performance adaptations/optimizations. It is not optimized to be efficient
|
||||||
|
but rather for the ease of understanding the workings of the algorithms.
|
||||||
|
The paper does list some possible extensions/optimizations.
|
||||||
|
|
||||||
|
It is intended as a guideline, even for novices, and provides an in-depth look
|
||||||
|
at the workings behind various techniques for efficient pattern matching.
|
||||||
|
"""
|
||||||
|
|
||||||
|
class Enum(object):
|
||||||
|
"""
|
||||||
|
Custom Enum object for compatibility (enum is introduced in python 3.4)
|
||||||
|
Usage create : a = Enum(['e0', 'e1', ...])
|
||||||
|
Usage call : a.e0
|
||||||
|
"""
|
||||||
|
def __init__(self, args):
|
||||||
|
next = 0
|
||||||
|
for arg in args:
|
||||||
|
self.__dict__[arg] = next
|
||||||
|
next += 1
|
||||||
202
pattern_matching/generator.py
Normal file
202
pattern_matching/generator.py
Normal file
|
|
@ -0,0 +1,202 @@
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
"""
|
||||||
|
Author: Sten Vercamman
|
||||||
|
Univeristy of Antwerp
|
||||||
|
|
||||||
|
Example code for paper: Efficient model transformations for novices
|
||||||
|
url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen
|
||||||
|
|
||||||
|
The main goal of this code is to give an overview, and an understandable
|
||||||
|
implementation, of known techniques for pattern matching and solving the
|
||||||
|
sub-graph homomorphism problem. The presented techniques do not include
|
||||||
|
performance adaptations/optimizations. It is not optimized to be efficient
|
||||||
|
but rather for the ease of understanding the workings of the algorithms.
|
||||||
|
The paper does list some possible extensions/optimizations.
|
||||||
|
|
||||||
|
It is intended as a guideline, even for novices, and provides an in-depth look
|
||||||
|
at the workings behind various techniques for efficient pattern matching.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import graph
|
||||||
|
# import numpy as np
|
||||||
|
import math
|
||||||
|
import collections
|
||||||
|
import random
|
||||||
|
|
||||||
|
class GraphGenerator(object):
|
||||||
|
"""
|
||||||
|
Generates a random Graph with dv an array containing all vertices (there type),
|
||||||
|
de an array containing all edges (their type) and dc_inc an array representing
|
||||||
|
the incoming edges (analogue for dc_out)
|
||||||
|
"""
|
||||||
|
def __init__(self, dv, de, dc_inc, dc_out, debug=False):
|
||||||
|
if len(de) != len(dc_inc):
|
||||||
|
raise ValueError('de and dc_inc should be the same length.')
|
||||||
|
if len(de) != len(dc_out):
|
||||||
|
raise ValueError('de and dc_out should be the same length.')
|
||||||
|
|
||||||
|
self.dv = dv
|
||||||
|
self.de = de
|
||||||
|
self.dc_inc = dc_inc
|
||||||
|
self.dc_out = dc_out
|
||||||
|
|
||||||
|
# print for debugging, so you know the used values
|
||||||
|
if debug:
|
||||||
|
print('dv')
|
||||||
|
print('[',','.join(map(str,dv)),']')
|
||||||
|
print('_____')
|
||||||
|
print('de')
|
||||||
|
print('[',','.join(map(str,de)),']')
|
||||||
|
print('_____')
|
||||||
|
print('dc_inc')
|
||||||
|
print('[',','.join(map(str,dc_inc)),']')
|
||||||
|
print('_____')
|
||||||
|
print('dc_out')
|
||||||
|
print('[',','.join(map(str,dc_out)),']')
|
||||||
|
print('_____')
|
||||||
|
|
||||||
|
self.graph = graph.Graph()
|
||||||
|
self.vertices = []
|
||||||
|
# create all the vertices:
|
||||||
|
for v_type in self.dv:
|
||||||
|
# v_type represents the type of the vertex
|
||||||
|
self.vertices.append(self.graph.addCreateVertex('v' + str(v_type)))
|
||||||
|
|
||||||
|
index = 0
|
||||||
|
# create all edges
|
||||||
|
for e_type in self.de:
|
||||||
|
# e_type represents the type of the edge
|
||||||
|
src = self.vertices[self.dc_out[index]] # get src vertex
|
||||||
|
tgt = self.vertices[self.dc_inc[index]] # get tgt vertex
|
||||||
|
self.graph.addCreateEdge(src, tgt, 'e' + str(e_type)) # create edge
|
||||||
|
index += 1
|
||||||
|
|
||||||
|
def getRandomGraph(self):
|
||||||
|
return self.graph
|
||||||
|
|
||||||
|
def getRandomPattern(self, max_nr_of_v, max_nr_of_e, start=0, debug=False):
|
||||||
|
# create pattern
|
||||||
|
pattern = graph.Graph()
|
||||||
|
|
||||||
|
# map from graph to new pattern
|
||||||
|
graph_to_pattern = {}
|
||||||
|
|
||||||
|
# map of possible edges
|
||||||
|
# we don't need a dict, but python v2.7 does not have an OrderedSet
|
||||||
|
possible_edges = collections.OrderedDict()
|
||||||
|
|
||||||
|
# set of chosen edges
|
||||||
|
chosen_edges = set()
|
||||||
|
|
||||||
|
# start node from graph
|
||||||
|
g_node = self.vertices[start]
|
||||||
|
p_node = pattern.addCreateVertex(g_node.type)
|
||||||
|
# for debuging, print the order in which the pattern gets created and
|
||||||
|
# connects it edges
|
||||||
|
if debug:
|
||||||
|
print('v'+str(id(p_node))+'=pattern.addCreateVertex('+"'"+str(g_node.type)+"'"+')')
|
||||||
|
# save corrolation
|
||||||
|
graph_to_pattern[g_node] = p_node
|
||||||
|
|
||||||
|
def insertAllEdges(edges, possible_edges, chosen_edges):
|
||||||
|
for edge in edges:
|
||||||
|
# if we did not chose the edge
|
||||||
|
if edge not in chosen_edges:
|
||||||
|
# if inc_edge not in possible edges, add it with value 1
|
||||||
|
possible_edges[edge] = None
|
||||||
|
|
||||||
|
def insertEdges(g_vertex, possible_edges, chosen_edges):
|
||||||
|
insertAllEdges(g_vertex.incoming_edges, possible_edges, chosen_edges)
|
||||||
|
insertAllEdges(g_vertex.outgoing_edges, possible_edges, chosen_edges)
|
||||||
|
|
||||||
|
insertEdges(g_node, possible_edges, chosen_edges)
|
||||||
|
|
||||||
|
while max_nr_of_v > len(graph_to_pattern) and max_nr_of_e > len(chosen_edges):
|
||||||
|
candidate = None
|
||||||
|
if len(possible_edges) == 0:
|
||||||
|
break
|
||||||
|
# get a random number between 0 and len(possible_edges)
|
||||||
|
# We us a triangular distribution to approximate the fact that
|
||||||
|
# the first element is the longest in the possible_edges and
|
||||||
|
# already had the post chance of beeing choosen.
|
||||||
|
# (The approximation is because the first few ellements where
|
||||||
|
# added in the same itteration, but doing this exact is
|
||||||
|
# computationally expensive.)
|
||||||
|
if len(possible_edges) == 1:
|
||||||
|
randie = 0
|
||||||
|
else:
|
||||||
|
randie = int(round(random.triangular(1, len(possible_edges), len(possible_edges)))) - 1
|
||||||
|
candidate = list(possible_edges.keys())[randie]
|
||||||
|
del possible_edges[candidate]
|
||||||
|
chosen_edges.add(candidate)
|
||||||
|
|
||||||
|
src = graph_to_pattern.get(candidate.src)
|
||||||
|
tgt = graph_to_pattern.get(candidate.tgt)
|
||||||
|
src_is_new = True
|
||||||
|
if src != None and tgt != None:
|
||||||
|
# create edge between source and target
|
||||||
|
pattern.addCreateEdge(src, tgt, candidate.type)
|
||||||
|
if debug:
|
||||||
|
print('pattern.addCreateEdge('+'v'+str(id(src))+', '+'v'+str(id(tgt))+', '+"'"+str(candidate.type)+"'"+')')
|
||||||
|
# skip adding new edges
|
||||||
|
continue
|
||||||
|
elif src == None:
|
||||||
|
# create pattern vertex
|
||||||
|
src = pattern.addCreateVertex(candidate.src.type)
|
||||||
|
if debug:
|
||||||
|
print('v'+str(id(src))+'=pattern.addCreateVertex('+"'"+str(candidate.src.type)+"'"+')')
|
||||||
|
# map newly created pattern vertex
|
||||||
|
graph_to_pattern[candidate.src] = src
|
||||||
|
# create edge between source and target
|
||||||
|
pattern.addCreateEdge(src, tgt, candidate.type)
|
||||||
|
if debug:
|
||||||
|
print('pattern.addCreateEdge('+'v'+str(id(src))+', '+'v'+str(id(tgt))+', '+"'"+str(candidate.type)+"'"+')')
|
||||||
|
elif tgt == None:
|
||||||
|
src_is_new = False
|
||||||
|
# create pattern vertex
|
||||||
|
tgt = pattern.addCreateVertex(candidate.tgt.type)
|
||||||
|
if debug:
|
||||||
|
print('v'+str(id(tgt))+'=pattern.addCreateVertex('+"'"+str(candidate.tgt.type)+"'"+')')
|
||||||
|
# map newly created pattern vertex
|
||||||
|
graph_to_pattern[candidate.tgt] = tgt
|
||||||
|
# create edge between source and target
|
||||||
|
pattern.addCreateEdge(src, tgt, candidate.type)
|
||||||
|
if debug:
|
||||||
|
print('pattern.addCreateEdge('+'v'+str(id(src))+', '+'v'+str(id(tgt))+', '+"'"+str(candidate.type)+"'"+')')
|
||||||
|
else:
|
||||||
|
raise RuntimeError('Bug: src or tgt of edge should be in out pattern')
|
||||||
|
|
||||||
|
# select the vertex from the chosen edge that was not yet part of the pattern
|
||||||
|
if src_is_new:
|
||||||
|
new_vertex = candidate.src
|
||||||
|
else:
|
||||||
|
new_vertex = candidate.tgt
|
||||||
|
# insert all edges from the new vertex
|
||||||
|
insertEdges(new_vertex, possible_edges, chosen_edges)
|
||||||
|
|
||||||
|
return pattern
|
||||||
|
|
||||||
|
def createConstantPattern():
|
||||||
|
"""
|
||||||
|
Use this to create the same pattern over and over again.
|
||||||
|
"""
|
||||||
|
# create pattern
|
||||||
|
pattern = graph.Graph()
|
||||||
|
|
||||||
|
|
||||||
|
# copy and paste printed pattern from debug output or create a pattern
|
||||||
|
# below the following line:
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
v4447242448=pattern.addCreateVertex('v4')
|
||||||
|
v4457323088=pattern.addCreateVertex('v6')
|
||||||
|
pattern.addCreateEdge(v4447242448, v4457323088, 'e4')
|
||||||
|
v4457323216=pattern.addCreateVertex('v8')
|
||||||
|
pattern.addCreateEdge(v4457323216, v4447242448, 'e4')
|
||||||
|
v4457323344=pattern.addCreateVertex('v7')
|
||||||
|
pattern.addCreateEdge(v4457323216, v4457323344, 'e3')
|
||||||
|
v4457323472=pattern.addCreateVertex('v7')
|
||||||
|
pattern.addCreateEdge(v4457323344, v4457323472, 'e1')
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
return pattern
|
||||||
157
pattern_matching/graph.py
Normal file
157
pattern_matching/graph.py
Normal file
|
|
@ -0,0 +1,157 @@
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
"""
|
||||||
|
Author: Sten Vercamman
|
||||||
|
Univeristy of Antwerp
|
||||||
|
|
||||||
|
Example code for paper: Efficient model transformations for novices
|
||||||
|
url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen
|
||||||
|
|
||||||
|
The main goal of this code is to give an overview, and an understandable
|
||||||
|
implementation, of known techniques for pattern matching and solving the
|
||||||
|
sub-graph homomorphism problem. The presented techniques do not include
|
||||||
|
performance adaptations/optimizations. It is not optimized to be efficient
|
||||||
|
but rather for the ease of understanding the workings of the algorithms.
|
||||||
|
The paper does list some possible extensions/optimizations.
|
||||||
|
|
||||||
|
It is intended as a guideline, even for novices, and provides an in-depth look
|
||||||
|
at the workings behind various techniques for efficient pattern matching.
|
||||||
|
"""
|
||||||
|
|
||||||
|
class Properties(object):
|
||||||
|
"""
|
||||||
|
Holds all Properties.
|
||||||
|
"""
|
||||||
|
def __init__(self):
|
||||||
|
# member variables:
|
||||||
|
self.properties = {}
|
||||||
|
|
||||||
|
def addProperty(self, name, value):
|
||||||
|
"""
|
||||||
|
Adds property (overrides if name already exists).
|
||||||
|
"""
|
||||||
|
self.properties[name] = value
|
||||||
|
|
||||||
|
def getProperty(self, name):
|
||||||
|
"""
|
||||||
|
Returns property with given name or None if not found.
|
||||||
|
"""
|
||||||
|
return self.properties.get(name)
|
||||||
|
|
||||||
|
class Edge(Properties):
|
||||||
|
"""
|
||||||
|
Describes an Edge with source and target Node.
|
||||||
|
The Edge can have several properties, like a name, a weight, etc...
|
||||||
|
"""
|
||||||
|
def __init__(self, src, tgt, str_type=None):
|
||||||
|
# Call parent class constructor
|
||||||
|
Properties.__init__(self)
|
||||||
|
# member variables:
|
||||||
|
self.src = src
|
||||||
|
self.tgt = tgt
|
||||||
|
self.type = str_type
|
||||||
|
|
||||||
|
class Vertex(Properties):
|
||||||
|
"""
|
||||||
|
Describes a Vertex with incoming, outgoing and undirected (both ways) edges.
|
||||||
|
The vertex can have several properties, like a name, a weight, etc...
|
||||||
|
"""
|
||||||
|
def __init__(self, str_type):
|
||||||
|
# Call parent class constructor
|
||||||
|
Properties.__init__(self)
|
||||||
|
# member variables:
|
||||||
|
self.incoming_edges = set() # undirected edges should be stored both in
|
||||||
|
self.outgoing_edges = set() # incoming and outgoing edges
|
||||||
|
self.type = str_type
|
||||||
|
|
||||||
|
def addIncomingEdge(self, edge):
|
||||||
|
"""
|
||||||
|
Adds an incoming Edge.
|
||||||
|
"""
|
||||||
|
if not isinstance(edge, Edge):
|
||||||
|
raise TypeError('addIncomingEdge without it being an edge')
|
||||||
|
self.incoming_edges.add(edge)
|
||||||
|
|
||||||
|
def addOutgoingEdge(self, edge):
|
||||||
|
"""
|
||||||
|
Adds an outgoing Edge.
|
||||||
|
"""
|
||||||
|
if not isinstance(edge, Edge):
|
||||||
|
raise TypeError('addOutgoingEdge without it being an edge')
|
||||||
|
self.outgoing_edges.add(edge)
|
||||||
|
|
||||||
|
def addUndirectedEdge(self, edge):
|
||||||
|
"""
|
||||||
|
Adds an undirected (or bi-directed) Edge.
|
||||||
|
"""
|
||||||
|
self.addIncomingEdge(edge)
|
||||||
|
self.addOutgoingEdge(edge)
|
||||||
|
|
||||||
|
class Graph(object):
|
||||||
|
"""
|
||||||
|
Holds a Graph.
|
||||||
|
"""
|
||||||
|
def __init__(self):
|
||||||
|
# member variables:
|
||||||
|
# redundant type keeping, "needed" for fast iterating over specific type
|
||||||
|
self.vertices = {} # {type, set(v1, v2, ...)}
|
||||||
|
self.edges = {} # {type, set(e1, e2, ...)}
|
||||||
|
|
||||||
|
def addCreateVertex(self, str_type):
|
||||||
|
"""
|
||||||
|
Creates a Vertex of str_type, stores it and returs it
|
||||||
|
(so that properties can be added to it).
|
||||||
|
"""
|
||||||
|
vertex = Vertex(str_type)
|
||||||
|
self.addVertex(vertex)
|
||||||
|
return vertex
|
||||||
|
|
||||||
|
def addVertex(self, vertex):
|
||||||
|
"""
|
||||||
|
Stores a Vertex into the Graph.
|
||||||
|
"""
|
||||||
|
if not isinstance(vertex, Vertex):
|
||||||
|
raise TypeError('addVertex expects a Vertex')
|
||||||
|
# add vertex, but it first creates a new set for the vertex type
|
||||||
|
# if the type does not exist in the dictionary
|
||||||
|
self.vertices.setdefault(vertex.type, set()).add(vertex)
|
||||||
|
|
||||||
|
def getVerticesOfType(self, str_type):
|
||||||
|
"""
|
||||||
|
Returns all vertices of a specific type,
|
||||||
|
Return [] if there are no vertices with the given type
|
||||||
|
"""
|
||||||
|
return self.vertices.get(str_type, [])
|
||||||
|
|
||||||
|
def getEdgesOfType(self, str_type):
|
||||||
|
"""
|
||||||
|
Returns all edges of a specific type,
|
||||||
|
Return [] if there are no edges with the given type
|
||||||
|
"""
|
||||||
|
return self.edges.get(str_type, [])
|
||||||
|
|
||||||
|
def addCreateEdge(self, src, tgt, str_type):
|
||||||
|
"""
|
||||||
|
Creates edge of str_type from src to tgt, and returns it,
|
||||||
|
so that properties can be added to the edge.
|
||||||
|
"""
|
||||||
|
if not isinstance(src, Vertex):
|
||||||
|
raise TypeError('addCreateEdge: src is not a Vertex')
|
||||||
|
if not isinstance(tgt, Vertex):
|
||||||
|
raise TypeError('addCreateEdge: tgt is not a Vertex')
|
||||||
|
edge = Edge(src, tgt, str_type)
|
||||||
|
# link vertices connected to this edge
|
||||||
|
edge.src.addOutgoingEdge(edge)
|
||||||
|
edge.tgt.addIncomingEdge(edge)
|
||||||
|
self.addEdge(edge)
|
||||||
|
return edge
|
||||||
|
|
||||||
|
def addEdge(self, edge):
|
||||||
|
"""
|
||||||
|
Stores an Edge into the Graph.
|
||||||
|
"""
|
||||||
|
if not isinstance(edge, Edge):
|
||||||
|
raise TypeError('addEdge expects an Edge')
|
||||||
|
# add edge, but it first creates a new set for the edge type
|
||||||
|
# if the type does not exist in the dictionary
|
||||||
|
self.edges.setdefault(edge.type, set()).add(edge)
|
||||||
44
pattern_matching/graphToDot.py
Normal file
44
pattern_matching/graphToDot.py
Normal file
|
|
@ -0,0 +1,44 @@
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
"""
|
||||||
|
Author: Sten Vercamman
|
||||||
|
Univeristy of Antwerp
|
||||||
|
|
||||||
|
Example code for paper: Efficient model transformations for novices
|
||||||
|
url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen
|
||||||
|
|
||||||
|
The main goal of this code is to give an overview, and an understandable
|
||||||
|
implementation, of known techniques for pattern matching and solving the
|
||||||
|
sub-graph homomorphism problem. The presented techniques do not include
|
||||||
|
performance adaptations/optimizations. It is not optimized to be efficient
|
||||||
|
but rather for the ease of understanding the workings of the algorithms.
|
||||||
|
The paper does list some possible extensions/optimizations.
|
||||||
|
|
||||||
|
It is intended as a guideline, even for novices, and provides an in-depth look
|
||||||
|
at the workings behind various techniques for efficient pattern matching.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import graph as mg
|
||||||
|
|
||||||
|
def printGraph(fileName, graph, matched_v={}, matched_e={}):
|
||||||
|
if not isinstance(graph, mg.Graph):
|
||||||
|
raise TypeError('Can only print Graph Graphs')
|
||||||
|
|
||||||
|
with open(fileName, 'w') as f:
|
||||||
|
f.write('digraph randomGraph {\n\n')
|
||||||
|
for str_type, plan_vertices in graph.vertices.items():
|
||||||
|
for plan_vertex in plan_vertices:
|
||||||
|
vertex_str = str(id(plan_vertex)) + ' [label="'+str(str_type)+'"'
|
||||||
|
if plan_vertex in list(matched_v.values()):
|
||||||
|
vertex_str += ', style=dashed, style=filled]\n'
|
||||||
|
else:
|
||||||
|
vertex_str += ']\n'
|
||||||
|
f.write(vertex_str)
|
||||||
|
for out_edge in plan_vertex.outgoing_edges:
|
||||||
|
edge_str = str(id(plan_vertex)) + ' -> ' + str(id(out_edge.tgt)) + ' [label="'+str(out_edge.type)+'"'
|
||||||
|
if out_edge in list(matched_e.values()):
|
||||||
|
edge_str += ', style=dashed, penwidth = 4]\n'
|
||||||
|
else:
|
||||||
|
edge_str += ']\n'
|
||||||
|
f.write(edge_str)
|
||||||
|
f.write('\n}')
|
||||||
88
pattern_matching/main.py
Normal file
88
pattern_matching/main.py
Normal file
|
|
@ -0,0 +1,88 @@
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
"""
|
||||||
|
Author: Sten Vercamman
|
||||||
|
Univeristy of Antwerp
|
||||||
|
|
||||||
|
Example code for paper: Efficient model transformations for novices
|
||||||
|
url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen
|
||||||
|
|
||||||
|
The main goal of this code is to give an overview, and an understandable
|
||||||
|
implementation, of known techniques for pattern matching and solving the
|
||||||
|
sub-graph homomorphism problem. The presented techniques do not include
|
||||||
|
performance adaptations/optimizations. It is not optimized to be efficient
|
||||||
|
but rather for the ease of understanding the workings of the algorithms.
|
||||||
|
The paper does list some possible extensions/optimizations.
|
||||||
|
|
||||||
|
It is intended as a guideline, even for novices, and provides an in-depth look
|
||||||
|
at the workings behind various techniques for efficient pattern matching.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from generator import *
|
||||||
|
from patternMatching import *
|
||||||
|
|
||||||
|
import graphToDot
|
||||||
|
|
||||||
|
import random
|
||||||
|
|
||||||
|
debug = False
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
"""
|
||||||
|
The main function called when running from the command line.
|
||||||
|
"""
|
||||||
|
nr_of_vertices = 50
|
||||||
|
nr_of_diff_types_v = 10
|
||||||
|
nr_of_edges = 150
|
||||||
|
nr_of_diff_types_e = 10
|
||||||
|
|
||||||
|
dv = [random.randint(0, nr_of_diff_types_v) for _ in range(nr_of_vertices)]
|
||||||
|
# dv = np.random.random_integers(0, nr_of_diff_types_v, nr_of_vertices)
|
||||||
|
de = [random.randint(0, nr_of_diff_types_e) for _ in range(nr_of_edges)]
|
||||||
|
# de = np.random.random_integers(0, nr_of_diff_types_e, nr_of_edges)
|
||||||
|
dc_inc = [random.randint(0, nr_of_vertices-1) for _ in range(nr_of_edges)]
|
||||||
|
# dc_inc = np.random.random_integers(0, nr_of_vertices-1, nr_of_edges)
|
||||||
|
dc_out = [random.randint(0, nr_of_vertices-1) for _ in range(nr_of_edges)]
|
||||||
|
# dc_out = np.random.random_integers(0, nr_of_vertices-1, nr_of_edges)
|
||||||
|
|
||||||
|
# override random graph by copy pasting output from terminal
|
||||||
|
dv = [ 10,5,4,0,8,6,8,0,4,8,5,5,7,0,10,0,5,6,10,4,0,3,0,8,2,7,5,8,1,0,2,10,0,0,1,6,8,4,7,6,4,2,10,10,6,4,6,0,2,7 ]
|
||||||
|
de = [ 8,10,8,1,6,7,4,3,5,2,0,0,9,6,0,3,8,3,2,7,2,3,10,8,10,8,10,2,5,5,10,6,7,5,1,2,1,2,2,3,7,7,2,1,7,2,9,10,8,1,9,4,1,3,1,1,8,2,2,9,10,9,1,9,4,10,10,10,9,3,5,3,6,6,9,1,2,6,3,2,4,10,9,6,5,6,2,4,3,2,4,10,6,2,8,8,0,5,1,7,3,4,3,8,7,3,0,8,3,3,8,5,10,5,9,3,1,10,3,2,6,3,10,0,5,10,9,10,0,1,4,7,10,3,1,9,1,2,3,7,4,3,7,8,8,4,5,10,1,4 ]
|
||||||
|
dc_inc = [ 0,25,18,47,22,25,16,45,38,25,5,45,15,44,17,46,6,17,35,8,16,29,48,47,25,34,4,20,24,1,47,44,8,25,32,3,16,6,33,21,6,13,41,10,17,25,21,33,31,30,5,4,45,26,16,42,12,25,29,3,32,30,14,26,11,13,7,13,3,43,43,22,48,37,20,28,15,40,19,33,43,16,49,36,11,25,9,42,3,22,16,40,42,44,27,30,1,18,10,35,19,6,9,43,37,38,45,19,41,14,37,45,0,31,29,31,24,20,44,46,8,45,43,3,38,38,35,12,19,45,7,34,20,28,12,17,45,17,35,49,20,21,49,1,35,38,38,36,33,30 ]
|
||||||
|
dc_out = [ 9,2,49,49,37,33,16,21,5,46,4,15,9,6,14,22,16,33,23,21,15,31,37,23,47,3,30,26,35,9,29,21,39,32,22,43,5,9,41,30,31,30,37,33,31,34,23,22,34,26,44,36,38,33,48,5,9,34,13,7,48,41,43,26,26,7,12,6,12,28,22,8,29,22,24,27,16,4,31,41,32,15,19,20,38,0,26,18,43,46,40,17,29,14,34,14,32,17,32,47,16,45,7,4,35,22,42,11,38,2,0,29,4,38,17,44,9,23,5,10,31,17,1,11,16,5,37,27,35,32,45,16,18,1,14,4,42,24,43,31,21,38,6,34,39,46,20,1,38,47 ]
|
||||||
|
|
||||||
|
gg = GraphGenerator(dv, de, dc_inc, dc_out, debug)
|
||||||
|
|
||||||
|
graph = gg.getRandomGraph()
|
||||||
|
pattern = gg.getRandomPattern(5, 15, debug=debug)
|
||||||
|
|
||||||
|
|
||||||
|
# override random pattern by copy pasting output from terminal to create
|
||||||
|
# pattern, paste it in the createConstantPattern function in the generator.py
|
||||||
|
# pattern = gg.createConstantPattern()
|
||||||
|
|
||||||
|
# generate here to know pattern and graph before searching it
|
||||||
|
graphToDot.printGraph('randomPattern.dot', pattern)
|
||||||
|
graphToDot.printGraph('randomGraph.dot', graph)
|
||||||
|
|
||||||
|
|
||||||
|
#PM = PatternMatching('naive')
|
||||||
|
#PM = PatternMatching('SP')
|
||||||
|
# PM = PatternMatching('Ullmann')
|
||||||
|
PM = PatternMatching('VF2')
|
||||||
|
v,e = PM.match(pattern, graph)
|
||||||
|
|
||||||
|
# regenerate graph, to show matched pattern
|
||||||
|
graphToDot.printGraph('randomGraph.dot', graph, v, e)
|
||||||
|
|
||||||
|
if debug:
|
||||||
|
print(len(v))
|
||||||
|
print('___')
|
||||||
|
print(v)
|
||||||
|
for key, value in v.items():
|
||||||
|
print(value.type)
|
||||||
|
print(len(e))
|
||||||
|
print(e)
|
||||||
|
print('___')
|
||||||
|
for key, value in e.items():
|
||||||
|
print(value.type)
|
||||||
947
pattern_matching/patternMatching.py
Normal file
947
pattern_matching/patternMatching.py
Normal file
|
|
@ -0,0 +1,947 @@
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
"""
|
||||||
|
Author: Sten Vercamman
|
||||||
|
Univeristy of Antwerp
|
||||||
|
|
||||||
|
Example code for paper: Efficient model transformations for novices
|
||||||
|
url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen
|
||||||
|
|
||||||
|
The main goal of this code is to give an overview, and an understandable
|
||||||
|
implementation, of known techniques for pattern matching and solving the
|
||||||
|
sub-graph homomorphism problem. The presented techniques do not include
|
||||||
|
performance adaptations/optimizations. It is not optimized to be efficient
|
||||||
|
but rather for the ease of understanding the workings of the algorithms.
|
||||||
|
The paper does list some possible extensions/optimizations.
|
||||||
|
|
||||||
|
It is intended as a guideline, even for novices, and provides an in-depth look
|
||||||
|
at the workings behind various techniques for efficient pattern matching.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from planGraph import *
|
||||||
|
|
||||||
|
import collections
|
||||||
|
import itertools
|
||||||
|
# import numpy as np
|
||||||
|
|
||||||
|
class PatternMatching(object):
|
||||||
|
"""
|
||||||
|
Returns an occurrence of a given pattern from the given Graph
|
||||||
|
"""
|
||||||
|
def __init__(self, matching_type='SP', optimize=True):
|
||||||
|
# store the type of matching we want to use
|
||||||
|
self.type = matching_type
|
||||||
|
self.bound_vertices = {} # saves the currently bound vertices
|
||||||
|
self.bound_edges = {} # saves the currently bound edges
|
||||||
|
self.result = None
|
||||||
|
self.previous = []
|
||||||
|
self.optimize = optimize
|
||||||
|
|
||||||
|
def match(self, pattern, graph):
|
||||||
|
"""
|
||||||
|
Call this function to find an occurrence of the pattern in the (host) graph.
|
||||||
|
Setting the type of matching (naive, SP, Ullmann, VF2) is done by
|
||||||
|
setting self.matching_type to its name.
|
||||||
|
"""
|
||||||
|
if not (isinstance(pattern, SearchGraph) or isinstance(pattern, Graph)):
|
||||||
|
raise TypeError('pattern must be a SearchGraph or Graph')
|
||||||
|
if not (isinstance(graph, SearchGraph) or isinstance(graph, Graph)):
|
||||||
|
raise TypeError('graph must be a SearchGraph or Graph')
|
||||||
|
|
||||||
|
self.pattern = pattern
|
||||||
|
self.graph = graph
|
||||||
|
|
||||||
|
if self.type == 'naive':
|
||||||
|
result = self.matchNaive(vertices=graph.vertices, edges=graph.edges)
|
||||||
|
elif self.type == 'SP':
|
||||||
|
result = self.matchSP()
|
||||||
|
elif self.type == 'Ullmann':
|
||||||
|
result = self.matchUllmann()
|
||||||
|
elif self.type == 'VF2':
|
||||||
|
result = self.matchVF2()
|
||||||
|
else:
|
||||||
|
raise ValueError('Unknown type for matching')
|
||||||
|
|
||||||
|
# cleanup
|
||||||
|
self.pattern = None
|
||||||
|
self.graph = None
|
||||||
|
self.bound_vertices = {}
|
||||||
|
self.bound_edges = {}
|
||||||
|
self.result = None
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def matchNaive(self, pattern_vertices=None, vertices=None, edges=None):
|
||||||
|
"""
|
||||||
|
Try to find an occurrence of the pattern in the Graph naively.
|
||||||
|
"""
|
||||||
|
# allow call with specific arguments
|
||||||
|
if pattern_vertices == None:
|
||||||
|
pattern_vertices = self.pattern.vertices
|
||||||
|
if vertices == None:
|
||||||
|
vertices = self.bound_vertices
|
||||||
|
if edges == None:
|
||||||
|
edges = self.bound_edges
|
||||||
|
|
||||||
|
def visitEdge(pattern_vertices, p_edge, inc, g_edges, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
|
||||||
|
"""
|
||||||
|
Visit a pattern edge, and try to bind it to a graph edge.
|
||||||
|
(If the first fails, try the second, and so on...)
|
||||||
|
"""
|
||||||
|
for g_edge in g_edges:
|
||||||
|
# only reckon the edge if its in edges and not visited
|
||||||
|
# (as the graph might be a subgraph of a more complex graph)
|
||||||
|
if g_edge not in edges.get(g_edge.type, []) or g_edge in visited_g_edges:
|
||||||
|
continue
|
||||||
|
if g_edge.type == p_edge.type and g_edge not in visited_g_edges:
|
||||||
|
visited_p_edges[p_edge] = g_edge
|
||||||
|
visited_g_edges.add(g_edge)
|
||||||
|
if inc:
|
||||||
|
p_vertex = p_edge.src
|
||||||
|
else:
|
||||||
|
p_vertex = p_edge.tgt
|
||||||
|
if visitVertices(pattern_vertices, p_vertex, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
|
||||||
|
return True
|
||||||
|
# remove added edges if they lead to no match, retry with others
|
||||||
|
del visited_p_edges[p_edge]
|
||||||
|
visited_g_edges.remove(g_edge)
|
||||||
|
# no edge leads to a possitive match
|
||||||
|
return False
|
||||||
|
|
||||||
|
def visitEdges(pattern_vertices, p_edges, inc, g_edges, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
|
||||||
|
"""
|
||||||
|
Visit all edges of the pattern vertex (edges given as argument).
|
||||||
|
We need to try visiting them for all its permutations, as matching
|
||||||
|
v -e1-> first and v -e2-> second and v -e3-> third, might not result
|
||||||
|
in a matching an occurrence of the pattern, but matching v -e2->
|
||||||
|
first and v -e3-> second and v -e1-> third might.
|
||||||
|
"""
|
||||||
|
def removePrevEdge(visitedEdges, visited_p_edges, visited_g_edges):
|
||||||
|
"""
|
||||||
|
Undo the binding of the brevious edge, (the current bindinds do
|
||||||
|
not lead to an occurrence of the pattern in the graph).
|
||||||
|
"""
|
||||||
|
for wrong_edge in visitedEdges:
|
||||||
|
# remove binding (pattern edge to graph edge)
|
||||||
|
wrong_g_edge = visited_p_edges.get(wrong_edge)
|
||||||
|
del visited_p_edges[wrong_edge]
|
||||||
|
# remove visited graph edge
|
||||||
|
visited_g_edges.remove(wrong_g_edge)
|
||||||
|
|
||||||
|
for it in itertools.permutations(p_edges):
|
||||||
|
visitedEdges = []
|
||||||
|
foundallEdges = True
|
||||||
|
for edge in it:
|
||||||
|
if visited_p_edges.get(edge) == None:
|
||||||
|
if not visitEdge(pattern_vertices, edge, inc, g_edges, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
|
||||||
|
# this did not work, so we have to undo all added edges
|
||||||
|
# (the current edge is not added, as it failed)
|
||||||
|
# we then can try a different permutation
|
||||||
|
removePrevEdge(visitedEdges, visited_p_edges, visited_g_edges)
|
||||||
|
foundallEdges = False
|
||||||
|
break # try other order
|
||||||
|
# add good visited (we know it succeeded)
|
||||||
|
visitedEdges.append(edge)
|
||||||
|
else:
|
||||||
|
# we visited this pattern edge, and have the coressponding graph edge
|
||||||
|
# if it is an incoming pattern edge, we need to make sure that
|
||||||
|
# the graph target that is map from the pattern target
|
||||||
|
# (of this incoming pattern edge, which has to be bound at this point)
|
||||||
|
# has the graph adge as an incoming edge,
|
||||||
|
# otherwise the graph is not properly connected
|
||||||
|
if inc:
|
||||||
|
if not visited_p_edges[edge] in visited_p_vertices[edge.tgt].incoming_edges:
|
||||||
|
# did not work
|
||||||
|
removePrevEdge(visitedEdges, visited_p_edges, visited_g_edges)
|
||||||
|
foundallEdges = False
|
||||||
|
break # try other order
|
||||||
|
else:
|
||||||
|
# analog for an outgoing edge
|
||||||
|
if not visited_p_edges[edge] in visited_p_vertices[edge.src].outgoing_edges:
|
||||||
|
# did not work
|
||||||
|
removePrevEdge(visitedEdges, visited_p_edges, visited_g_edges)
|
||||||
|
foundallEdges = False
|
||||||
|
break # try other order
|
||||||
|
|
||||||
|
# all edges are good, look no further
|
||||||
|
if foundallEdges:
|
||||||
|
break
|
||||||
|
return foundallEdges
|
||||||
|
|
||||||
|
def visitVertex(pattern_vertices, p_vertex, g_vertex, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
|
||||||
|
"""
|
||||||
|
Visit a pattern vertex, and try to bind it to the graph vertex
|
||||||
|
(both are given as argument). A binding is successful if all the
|
||||||
|
pattern vertex his incoming and outgoing edges can be bound
|
||||||
|
(to the graph vertex).
|
||||||
|
"""
|
||||||
|
if g_vertex in visited_g_vertices:
|
||||||
|
return False
|
||||||
|
# save visited graph vertex
|
||||||
|
visited_g_vertices.add(g_vertex)
|
||||||
|
# map pattern vertex to visited graph vertex
|
||||||
|
visited_p_vertices[p_vertex] = g_vertex
|
||||||
|
|
||||||
|
if visitEdges(pattern_vertices, p_vertex.incoming_edges, True, g_vertex.incoming_edges, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
|
||||||
|
if visitEdges(pattern_vertices, p_vertex.outgoing_edges, False, g_vertex.outgoing_edges, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
|
||||||
|
return True
|
||||||
|
# cleanup, remove from visited as this does not lead to
|
||||||
|
# an occurrence of the pttern in the graph
|
||||||
|
visited_g_vertices.remove(g_vertex)
|
||||||
|
del visited_p_vertices[p_vertex]
|
||||||
|
return False
|
||||||
|
|
||||||
|
def visitVertices(pattern_vertices, p_vertex, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
|
||||||
|
"""
|
||||||
|
Visit a pattern vertex and try to bind a graph vertex to it.
|
||||||
|
"""
|
||||||
|
# if already matched or if it is a vertex not in the pattern_vertices
|
||||||
|
# (second is for when you want to match the pattern partionally)
|
||||||
|
if visited_p_vertices.get(p_vertex) != None or p_vertex not in pattern_vertices.get(p_vertex.type, set()):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# try visiting graph vertices of same type as pattern vertex
|
||||||
|
for g_vertex in vertices.get(p_vertex.type, []):
|
||||||
|
if g_vertex not in visited_g_vertices:
|
||||||
|
if visitVertex(pattern_vertices, p_vertex, g_vertex, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
visited_p_vertices = {}
|
||||||
|
visited_p_edges = {}
|
||||||
|
visited_g_vertices = set()
|
||||||
|
visited_g_edges = set()
|
||||||
|
|
||||||
|
# for loop is need for when pattern consists of multiple not connected structures
|
||||||
|
allVertices = []
|
||||||
|
for _, p_vertices in pattern_vertices.items():
|
||||||
|
allVertices.extend(p_vertices)
|
||||||
|
foundIt = False
|
||||||
|
for it_p_vertices in itertools.permutations(allVertices):
|
||||||
|
foundIt = True
|
||||||
|
for p_vertex in it_p_vertices:
|
||||||
|
if not visitVertices(pattern_vertices, p_vertex, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
|
||||||
|
foundIt = False
|
||||||
|
# reset visited
|
||||||
|
visited_p_vertices = {}
|
||||||
|
visited_p_edges = {}
|
||||||
|
visited_g_vertices = set()
|
||||||
|
visited_g_edges = set()
|
||||||
|
break
|
||||||
|
if foundIt:
|
||||||
|
break
|
||||||
|
if foundIt:
|
||||||
|
return (visited_p_vertices, visited_p_edges)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def matchSP(self):
|
||||||
|
"""
|
||||||
|
Find an occurrence of the pattern in the Graph
|
||||||
|
by using the generated SearchPlan.
|
||||||
|
"""
|
||||||
|
if isinstance(self.graph, Graph):
|
||||||
|
sg = SearchGraph(self.graph)
|
||||||
|
elif isinstance(self.graph, SearchGraph):
|
||||||
|
sg = self.graph
|
||||||
|
else:
|
||||||
|
raise TypeError('Pattern matching with a SearchPlan must be given a Graph or SearchGraph')
|
||||||
|
|
||||||
|
pg = PlanGraph(self.pattern)
|
||||||
|
SP = pg.Edmonds(sg)
|
||||||
|
|
||||||
|
self.fileIndex = 0
|
||||||
|
|
||||||
|
def propConnected():
|
||||||
|
"""
|
||||||
|
Checks if the found vertices and edges can be uniquely matched
|
||||||
|
onto the pattern graph.
|
||||||
|
"""
|
||||||
|
self.result = self.matchNaive()
|
||||||
|
return self.result != None
|
||||||
|
|
||||||
|
def matchOP(elem, bound, ops, index):
|
||||||
|
"""
|
||||||
|
Execute a primitive operation, return whether ot not it succeeded.
|
||||||
|
"""
|
||||||
|
type_bound = bound.setdefault(elem.type, set())
|
||||||
|
# if elem not yet bound, bind it, and try matching the next operations
|
||||||
|
if elem not in type_bound:
|
||||||
|
type_bound.add(elem)
|
||||||
|
# if matching of next operation failed, try with a different elem
|
||||||
|
if matchAllOP(ops, index+1):
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
type_bound.remove(elem)
|
||||||
|
return False
|
||||||
|
|
||||||
|
def matchAllOP(ops, index=0):
|
||||||
|
"""
|
||||||
|
Try to match an occurrence of the pattern in the graph,
|
||||||
|
by recursivly ,atching elements that adhere to the SearchPlan
|
||||||
|
"""
|
||||||
|
# if we matched all elements,
|
||||||
|
# check if the bound elements are properly connected
|
||||||
|
if index == len(ops):
|
||||||
|
return propConnected()
|
||||||
|
|
||||||
|
op = ops[index]
|
||||||
|
|
||||||
|
if op[0] == PRIM_OP.lkp: # lkp(elem)
|
||||||
|
if op[2]: # lookup a vertex
|
||||||
|
# If the graph does not have a vertex of the same vertex
|
||||||
|
# type, we'll have to return False, happens if elems == [].
|
||||||
|
elems = self.graph.vertices.get(op[1], [])
|
||||||
|
bound = self.bound_vertices
|
||||||
|
else: # loopup an edge
|
||||||
|
# If the graph does not have an edge of the same edge
|
||||||
|
# type, we'll have to return False, happens if elems == [].
|
||||||
|
elems = self.graph.edges.get(op[1], [])
|
||||||
|
bound = self.bound_edges
|
||||||
|
|
||||||
|
# if elems == [], we'll skip the loop and return False
|
||||||
|
for elem in elems:
|
||||||
|
if matchOP(elem, bound, ops, index):
|
||||||
|
return True
|
||||||
|
# if all not bound elems fails, backtrack
|
||||||
|
return False
|
||||||
|
|
||||||
|
elif op[0] == PRIM_OP.src: # src(e): bind src of a bound edge e
|
||||||
|
# Should always succeed, as the edge must be already bound
|
||||||
|
# (there should be at least one elem in self.bound_edges[op[1]]).
|
||||||
|
for edge in self.bound_edges[op[1]]:
|
||||||
|
if matchOP(edge.src, self.bound_vertices, ops, index):
|
||||||
|
return True
|
||||||
|
# if all not bound elems fails, backtrack
|
||||||
|
return False
|
||||||
|
|
||||||
|
elif op[0] == PRIM_OP.tgt: # tgt(e): bind tgt of a bound edge e
|
||||||
|
# Should always succeed, as the edge must be already bound
|
||||||
|
# (there should be at least one elem in self.bound_edges[op[1]]).
|
||||||
|
for edge in self.bound_edges[op[1]]:
|
||||||
|
if matchOP(edge.tgt, self.bound_vertices, ops, index):
|
||||||
|
return True
|
||||||
|
# if all not bound elems fails, backtrack
|
||||||
|
return False
|
||||||
|
|
||||||
|
elif op[0] == PRIM_OP.inc: # in(v, e): bind incoming edge e of a bound vertex v
|
||||||
|
# It's possible we will try to find a vertex of a certain type
|
||||||
|
# in the bound_vertices which should be bound implicitly
|
||||||
|
# (by a src/tgt op), that is not bound. Happens when implicit
|
||||||
|
# binding bounded a "wrong" vertex. We then need to return False
|
||||||
|
# (happens by skiping for loop by looping over [])
|
||||||
|
for vertex in self.bound_vertices.get(op[1], []):
|
||||||
|
for edge in vertex.incoming_edges:
|
||||||
|
if edge.type == op[2]:
|
||||||
|
if matchOP(edge, self.bound_edges, ops, index):
|
||||||
|
return True
|
||||||
|
# if all not bound elems fails, backtrack
|
||||||
|
return False
|
||||||
|
|
||||||
|
elif op[0] == PRIM_OP.out: # out(v, e): bind outgoing edge e of a bound vertex v
|
||||||
|
# Return False if we expect an element to be bound that is not
|
||||||
|
# bound (for the same reason as the inc op).
|
||||||
|
for vertex in self.bound_vertices.get(op[1], []):
|
||||||
|
for edge in vertex.outgoing_edges:
|
||||||
|
if edge.type == op[2]:
|
||||||
|
if matchOP(edge, self.bound_edges, ops, index):
|
||||||
|
return True
|
||||||
|
# if all not bound elems fails, backtrack
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
raise TypeError('Unknown PRIM_OP type')
|
||||||
|
|
||||||
|
# try and match all (primitive) operations from the SearchPlan
|
||||||
|
matchAllOP(SP)
|
||||||
|
|
||||||
|
# Either nothing is found, or we found an occurrence,
|
||||||
|
# it is impossble to have a partionally matched occurrence
|
||||||
|
for key, bound_elems in self.bound_vertices.items():
|
||||||
|
if len(bound_elems) == 0:
|
||||||
|
# The pattern does not exist in the Graph
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
# We found a pattern
|
||||||
|
return self.result
|
||||||
|
|
||||||
|
|
||||||
|
def createAdjacencyMatrixMap(self, graph):
|
||||||
|
"""
|
||||||
|
Return adjacency matrix and the order of the vertices.
|
||||||
|
"""
|
||||||
|
matrix = collections.OrderedDict() # { vertex, (index, [has edge from index to pos?]) }
|
||||||
|
|
||||||
|
# contains all vertices we'll use for the AdjacencyMatrix
|
||||||
|
allVertices = []
|
||||||
|
|
||||||
|
if self.optimize:
|
||||||
|
# insert only the vertices from the graph which have a type
|
||||||
|
# that is present in the pattern
|
||||||
|
for vertex_type, _ in self.pattern.vertices.items():
|
||||||
|
graph_vertices = graph.vertices.get(vertex_type)
|
||||||
|
if graph_vertices != None:
|
||||||
|
allVertices.extend(graph_vertices)
|
||||||
|
else:
|
||||||
|
# we will not be able to find the pattern
|
||||||
|
# as the pattern contains a vertex of a certain type
|
||||||
|
# that is not present in the host graph
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
# insert all vertices from the graph
|
||||||
|
for _, vertices in graph.vertices.items():
|
||||||
|
allVertices.extend(vertices)
|
||||||
|
|
||||||
|
# create squared zero matrix
|
||||||
|
index = 0
|
||||||
|
for vertex in allVertices:
|
||||||
|
matrix[vertex] = (index, [False] * len(allVertices))
|
||||||
|
index += 1
|
||||||
|
|
||||||
|
for _, edges in graph.edges.items():
|
||||||
|
for edge in edges:
|
||||||
|
if self.optimize:
|
||||||
|
if edge.tgt not in matrix or edge.src not in matrix:
|
||||||
|
# skip adding edge if the target or source type
|
||||||
|
# is not present in the pattern
|
||||||
|
# (and therefor not added to the matrix)
|
||||||
|
continue
|
||||||
|
index = matrix[edge.tgt][0]
|
||||||
|
matrix[edge.src][1][index] = True
|
||||||
|
|
||||||
|
AM = []
|
||||||
|
vertices_order = []
|
||||||
|
for vertex, row in matrix.items():
|
||||||
|
AM.append(row[1])
|
||||||
|
vertices_order.append(vertex)
|
||||||
|
|
||||||
|
return AM, vertices_order
|
||||||
|
|
||||||
|
def matchUllmann(self):
|
||||||
|
"""
|
||||||
|
Find an occurrence of the pattern in the Graph
|
||||||
|
by using Ullmann for solving the Constraint Satisfaction Problem (CSP).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def createM_star(h, p):
|
||||||
|
"""
|
||||||
|
Create M*[v, w] = 1 if deg(v) <= deg(w), for v in V_P, w in V_H
|
||||||
|
= 0 otherwise
|
||||||
|
|
||||||
|
M and P are given to ensure corect order.
|
||||||
|
"""
|
||||||
|
m = [] # [[..], ...]
|
||||||
|
for p_vertex in p:
|
||||||
|
row = []
|
||||||
|
for g_vertex in h:
|
||||||
|
# for the degree function, we choose to look at the
|
||||||
|
# outgoing edges AND the incoming edges
|
||||||
|
# (one might prefer to use only one of them)
|
||||||
|
if self.optimize:
|
||||||
|
# also check if type matches
|
||||||
|
if p_vertex.type != g_vertex.type:
|
||||||
|
row.append(False)
|
||||||
|
continue
|
||||||
|
row.append( len(p_vertex.incoming_edges) <=
|
||||||
|
len(g_vertex.incoming_edges) and
|
||||||
|
len(p_vertex.outgoing_edges) <=
|
||||||
|
len(g_vertex.outgoing_edges))
|
||||||
|
m.append(row)
|
||||||
|
|
||||||
|
return m
|
||||||
|
|
||||||
|
def createDecreasingOrder(h):
|
||||||
|
"""
|
||||||
|
It turns out that the more edges a vertex has, the sooner it will
|
||||||
|
fail in matching the pattern. For efficiency reasons, we want it
|
||||||
|
to fail as fast as possible.
|
||||||
|
"""
|
||||||
|
order = [] # [(value, index), ...]
|
||||||
|
index = 0
|
||||||
|
for g_vertex in h:
|
||||||
|
order.append(( len(g_vertex.outgoing_edges) +
|
||||||
|
len(g_vertex.outgoing_edges), index))
|
||||||
|
index += 1
|
||||||
|
|
||||||
|
order.sort(key = lambda elem: elem[0])
|
||||||
|
# sort and only return the indices (which specify the order)
|
||||||
|
return [index for (_, index) in order]
|
||||||
|
|
||||||
|
def propConnected(M, H, P, h, p):
|
||||||
|
"""
|
||||||
|
Checks if the vertices represented in M are isomorphic to P and if
|
||||||
|
they can be matched onto the pattern graph.
|
||||||
|
"""
|
||||||
|
print(M, H, P, h, p)
|
||||||
|
# P_candi = np.dot(M, np.transpose(np.dot(M, H)))
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
# If we do not aply the refineM function, we will want to check if
|
||||||
|
# this succeeds, as it checks for isomorphism.
|
||||||
|
# If we apply the refineM function, it is garanteed to be isomorphic.
|
||||||
|
|
||||||
|
index_column = 0
|
||||||
|
for row in P_candi:
|
||||||
|
index_row = 0
|
||||||
|
for item in row:
|
||||||
|
# for all i,j: P[i, j] = 1 : M(MH)^T [j, i] = 1
|
||||||
|
# (not the other way around)
|
||||||
|
# (return False when item is 0 and P[i,j] is 1)
|
||||||
|
if item < P[index_row][index_column]:
|
||||||
|
return False
|
||||||
|
index_row += 1
|
||||||
|
index_column += 1
|
||||||
|
"""
|
||||||
|
|
||||||
|
vertices = {}
|
||||||
|
index_column = 0
|
||||||
|
for row in M:
|
||||||
|
index_row = 0
|
||||||
|
for item in row:
|
||||||
|
# there should only be one item per row
|
||||||
|
if item:
|
||||||
|
vertex = h[index_row]
|
||||||
|
vertices.setdefault(vertex.type, set()).add(vertex)
|
||||||
|
break
|
||||||
|
index_row += 1
|
||||||
|
index_column += 1
|
||||||
|
|
||||||
|
self.result = self.matchNaive(vertices=vertices, edges=self.graph.edges)
|
||||||
|
return self.result != None
|
||||||
|
|
||||||
|
def refineM(M, H, P, h, pp):
|
||||||
|
"""
|
||||||
|
Refine M, for every vertex from the pattern, check if each possible
|
||||||
|
matching (candidate) his neighbours can also be matched. (M's column
|
||||||
|
represents vertices from P, and the row represents its candidate.)
|
||||||
|
If this is not possible set M[i,j] to false, refining/reducing the
|
||||||
|
search space.
|
||||||
|
"""
|
||||||
|
any_changes=True
|
||||||
|
while any_changes:
|
||||||
|
any_changes = False
|
||||||
|
# for all vertices from the pattern
|
||||||
|
for i in range(0, len(P)): # P is a nxn-matrix
|
||||||
|
# for all its possible assignments
|
||||||
|
for j in range(0, len(H[0])):
|
||||||
|
# if bound vertex of P, check if all neigbours are matchable
|
||||||
|
if M[i][j]:
|
||||||
|
# for all the pattern his neighbours
|
||||||
|
for k in range(0, len(P)):
|
||||||
|
# if it is a neighbour (from outgoing edges)
|
||||||
|
if P[i][k]:
|
||||||
|
match = False
|
||||||
|
for p in range(0, len(H[0])):
|
||||||
|
# check if we can match a candidate neighbour
|
||||||
|
# (from M* to to the graph (H))
|
||||||
|
if M[k][p] and H[j][p]:
|
||||||
|
if self.optimize:
|
||||||
|
# also check correct type
|
||||||
|
if pp[k].type != h[p].type:
|
||||||
|
continue
|
||||||
|
match = True
|
||||||
|
break
|
||||||
|
if not match:
|
||||||
|
M[i][j] = False
|
||||||
|
any_changes = True
|
||||||
|
|
||||||
|
# if it is a neighbour (from incoming edges)
|
||||||
|
if P[k][i]:
|
||||||
|
match = False
|
||||||
|
for p in range(0, len(H[0])):
|
||||||
|
# check if we can match a candidate neighbour
|
||||||
|
# (from M* to to the graph (H))
|
||||||
|
if M[k][p] and H[p][j]:
|
||||||
|
if self.optimize:
|
||||||
|
# also check correct type
|
||||||
|
if pp[i].type != h[j].type:
|
||||||
|
continue
|
||||||
|
match = True
|
||||||
|
break
|
||||||
|
if not match:
|
||||||
|
M[i][j] = False
|
||||||
|
any_changes = True
|
||||||
|
|
||||||
|
def findM(M_star, M, order, H, P, h, p, index_M=0):
|
||||||
|
"""
|
||||||
|
Find an isomorphic mapping for the vertices of P to H.
|
||||||
|
This mapping is represented by a matrix M if,
|
||||||
|
and only if M(MH)^T = P^T.
|
||||||
|
"""
|
||||||
|
# We are at the end, we found an candidate.
|
||||||
|
# Remember that we are at the end, bu first check if there is
|
||||||
|
# a row with ony False, if so, we do not need to check if it is
|
||||||
|
# properly connected.
|
||||||
|
check_prop = False
|
||||||
|
if index_M == len(M):
|
||||||
|
check_prop = True
|
||||||
|
index_M -= 1
|
||||||
|
|
||||||
|
# we need to refer to this row
|
||||||
|
old_row = M_star[index_M]
|
||||||
|
# previous rows (these are sparse, 1 per row, save only its position)
|
||||||
|
prev_pos = []
|
||||||
|
for i in range(0, index_M):
|
||||||
|
row = M[i]
|
||||||
|
only_false = True
|
||||||
|
for j in range(0, len(old_row)):
|
||||||
|
if row[j]:
|
||||||
|
only_false = False
|
||||||
|
prev_pos.append(j)
|
||||||
|
break
|
||||||
|
if only_false:
|
||||||
|
# check if a row with only False occurs,
|
||||||
|
# if so, we will not find an occurence
|
||||||
|
return False
|
||||||
|
|
||||||
|
# We are at the end, we found an candidate.
|
||||||
|
if check_prop:
|
||||||
|
index_M += 1
|
||||||
|
return propConnected(M, H, P, h, p)
|
||||||
|
|
||||||
|
M[index_M] = [False] * len(old_row)
|
||||||
|
index_order = 0
|
||||||
|
for index_order in range(0, len(order)):
|
||||||
|
index_row = order[index_order]
|
||||||
|
# put previous True back on False
|
||||||
|
if index_order > 0:
|
||||||
|
M[index_M][order[index_order - 1]] = False
|
||||||
|
|
||||||
|
if old_row[index_row]:
|
||||||
|
M[index_M][index_row] = True
|
||||||
|
|
||||||
|
findMPart = True
|
||||||
|
# 1 0 0 Assume 3th round, and we select x,
|
||||||
|
# 0 1 0 no element at the same possition in the row,
|
||||||
|
# 0 x 0 of the elements above itselve in the same
|
||||||
|
# column may be 1. In the example it is, then try
|
||||||
|
# selecting an other element.
|
||||||
|
for index_column in range(0, index_M):
|
||||||
|
if M[index_column][index_row]:
|
||||||
|
findMPart = False
|
||||||
|
break
|
||||||
|
|
||||||
|
if not findMPart:
|
||||||
|
continue
|
||||||
|
|
||||||
|
refineM(M, H, P, h, p)
|
||||||
|
|
||||||
|
if findM(M_star, M, order, H, P, h, p, index_M + 1):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# reset previous rows their True's
|
||||||
|
prev_row = 0
|
||||||
|
for pos in prev_pos:
|
||||||
|
M[prev_row][pos] = True
|
||||||
|
prev_row += 1
|
||||||
|
# reset rows below current row
|
||||||
|
for index_column in range(index_M + 1, len(M)):
|
||||||
|
# deep copy, we do not want to just copy pointer to array/list
|
||||||
|
M[index_column] = M_star[index_column][:]
|
||||||
|
|
||||||
|
# reset current row (the rest is already reset)
|
||||||
|
M[index_M] = M_star[index_M][:]
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
# create adjecency matrix of the graph
|
||||||
|
H, h = self.createAdjacencyMatrixMap(self.graph)
|
||||||
|
# create adjecency matrix of the pattern
|
||||||
|
P, p = self.createAdjacencyMatrixMap(self.pattern)
|
||||||
|
# create M* binary matrix
|
||||||
|
M_star = createM_star(h, p)
|
||||||
|
|
||||||
|
# create the order we will use later on
|
||||||
|
order = createDecreasingOrder(h)
|
||||||
|
# deepcopy M_s into M
|
||||||
|
M = [row[:] for row in M_star]
|
||||||
|
|
||||||
|
if self.optimize:
|
||||||
|
refineM(M, H, P, h, p)
|
||||||
|
|
||||||
|
findM(M_star, M, order, H, P, h, p)
|
||||||
|
|
||||||
|
return self.result
|
||||||
|
|
||||||
|
|
||||||
|
def matchVF2(self):
|
||||||
|
|
||||||
|
class VF2_Obj(object):
|
||||||
|
"""
|
||||||
|
Structor for keeping the VF2 data.
|
||||||
|
"""
|
||||||
|
def __init__(self, len_graph_vertices, len_pattern_vertices):
|
||||||
|
# represents if n-the element (h[n] or p[n]) matched
|
||||||
|
self.core_graph = [False]*len_graph_vertices
|
||||||
|
self.core_pattern = [False]*len_pattern_vertices
|
||||||
|
|
||||||
|
# save mapping from pattern to graph
|
||||||
|
self.mapping = {}
|
||||||
|
|
||||||
|
# preference lvl 1
|
||||||
|
# ordered set of vertices adjecent to M_graph connected via an outgoing edge
|
||||||
|
self.N_out_graph = [-1]*len_graph_vertices
|
||||||
|
# ordered set of vertices adjecent to M_pattern connected via an outgoing edge
|
||||||
|
self.N_out_pattern = [-1]*len_pattern_vertices
|
||||||
|
|
||||||
|
# preference lvl 2
|
||||||
|
# ordered set of vertices adjecent to M_graph connected via an incoming edge
|
||||||
|
self.N_inc_graph = [-1]*len_graph_vertices
|
||||||
|
# ordered set of vertices adjecent to M_pattern connected via an incoming edge
|
||||||
|
self.N_inc_pattern = [-1]*len_pattern_vertices
|
||||||
|
|
||||||
|
# preference lvl 3
|
||||||
|
# not in the above
|
||||||
|
|
||||||
|
def findM(H, P, h, p, VF2_obj, index_M=0):
|
||||||
|
"""
|
||||||
|
Find an isomorphic mapping for the vertices of P to H.
|
||||||
|
This mapping is represented by a matrix M if,
|
||||||
|
and only if M(MH)^T = P^T.
|
||||||
|
|
||||||
|
This operates in a simular way as Ullmann. Ullmann has a predefind
|
||||||
|
order for matching (sorted on most edges first). VF2's order is to
|
||||||
|
first try to match the adjacency vertices connected via outgoing
|
||||||
|
edges, then thos connected via incoming edges and then those that
|
||||||
|
not connected to the currently mathed vertices.
|
||||||
|
"""
|
||||||
|
def addOutNeighbours(neighbours, N, index_M):
|
||||||
|
"""
|
||||||
|
Given outgoing neighbours (a row from an adjacency matrix),
|
||||||
|
label them as added by saving when they got added (index_M
|
||||||
|
represents this, otherwise it is -1)
|
||||||
|
"""
|
||||||
|
for neighbour_index in range(0, len(neighbours)):
|
||||||
|
if neighbours[neighbour_index]:
|
||||||
|
if N[neighbour_index] == -1:
|
||||||
|
N[neighbour_index] = index_M
|
||||||
|
|
||||||
|
def addIncNeighbours(G, j, N, index_M):
|
||||||
|
"""
|
||||||
|
Given the adjacency matrix, and the colum j, representing that
|
||||||
|
we want to add the incoming edges to vertex j,
|
||||||
|
label them as added by saving when they got added (index_M
|
||||||
|
represents this, otherwise it is -1)
|
||||||
|
"""
|
||||||
|
for i in range(0, len(G)):
|
||||||
|
if G[i][j]:
|
||||||
|
if N[i] == -1:
|
||||||
|
N[i] = index_M
|
||||||
|
|
||||||
|
def delNeighbours(N, index_M):
|
||||||
|
"""
|
||||||
|
Remove neighbours that where added at index_M.
|
||||||
|
If we call this function, we are backtracking and we want to
|
||||||
|
remove the added neighbours from the just tried matching (n, m)
|
||||||
|
pair (whiched failed).
|
||||||
|
"""
|
||||||
|
for n in range(0, len(N)):
|
||||||
|
if N[n] == index_M:
|
||||||
|
N[n] = -1
|
||||||
|
|
||||||
|
def feasibilityTest(H, P, h, p, VF2_obj, n, m):
|
||||||
|
"""
|
||||||
|
Examine all the nodes connected to n and m; if such nodes are
|
||||||
|
in the current partial mapping, check if each branch from or to
|
||||||
|
n has a corresponding branch from or to m and vice versa.
|
||||||
|
|
||||||
|
If the nodes and the branches of the graphs being matched also
|
||||||
|
carry semantic attributes, another condition must also hold for
|
||||||
|
F(s, n, m) to be true; namely the attributes of the nodes and of
|
||||||
|
the branches being paired must be compatible.
|
||||||
|
|
||||||
|
Another pruning step is to check if the nr of ext_edges between
|
||||||
|
the matched_vertices from the pattern and its adjecent vertices
|
||||||
|
are less than or equal to the nr of ext_edges between
|
||||||
|
matched_vertices from the graph and its adjecent vertices.
|
||||||
|
|
||||||
|
And if the nr of ext_edges between those adjecent vertices from
|
||||||
|
the pattern and the not connected vertices are less than or
|
||||||
|
equal to the nr of ext_edges between those adjecent vertices from
|
||||||
|
the graph and its adjecent vertices.
|
||||||
|
"""
|
||||||
|
# Get all neighbours from graph node n and pattern node m
|
||||||
|
# (including n and m)
|
||||||
|
neighbours_graph = {}
|
||||||
|
neighbours_graph[h[n].type] = set([h[n]])
|
||||||
|
|
||||||
|
neighbours_pattern = {}
|
||||||
|
neighbours_pattern[p[m].type] = set([p[m]])
|
||||||
|
|
||||||
|
# add all neihgbours of pattern vertex m
|
||||||
|
for i in range(0, len(P)): # P is a nxn-matrix
|
||||||
|
if (P[m][i] or P[i][m]) and VF2_obj.core_pattern[i]:
|
||||||
|
neighbours_pattern.setdefault(p[i].type, set()).add(p[i])
|
||||||
|
|
||||||
|
# add all neihgbours of graph vertex n
|
||||||
|
for i in range(0, len(H)): # P is a nxn-matrix
|
||||||
|
if (H[n][i] or H[i][n]) and VF2_obj.core_graph[i]:
|
||||||
|
neighbours_graph.setdefault(h[i].type, set()).add(h[i])
|
||||||
|
|
||||||
|
# take a coding shortcut,
|
||||||
|
# use self.matchNaive function to see if it is feasable.
|
||||||
|
# this way, we immidiatly test the semantic attributes
|
||||||
|
if not self.matchNaive(pattern_vertices=neighbours_pattern, vertices=neighbours_graph, edges=self.graph.edges):
|
||||||
|
return False
|
||||||
|
|
||||||
|
# count ext_edges from core_graph to a adjecent vertices and
|
||||||
|
# cuotn ext_edges for adjecent vertices and not matched vertices
|
||||||
|
# connected via the ext_edges
|
||||||
|
ext_edges_graph_ca = 0
|
||||||
|
ext_edges_graph_an = 0
|
||||||
|
# for all core vertices
|
||||||
|
for x in range(0, len(VF2_obj.core_graph)):
|
||||||
|
# for all its neighbours
|
||||||
|
for y in range(0, len(H)):
|
||||||
|
if H[x][y]:
|
||||||
|
# if it is a neighbor and not yet matched
|
||||||
|
if (VF2_obj.N_out_graph[y] != -1 or VF2_obj.N_inc_graph[y] != -1) and VF2_obj.core_graph[y]:
|
||||||
|
# if we matched it
|
||||||
|
if VF2_obj.core_graph[x] != -1:
|
||||||
|
ext_edges_graph_ca += 1
|
||||||
|
else:
|
||||||
|
ext_edges_graph_an += 1
|
||||||
|
|
||||||
|
# count ext_edges from core_pattern to a adjecent vertices
|
||||||
|
# connected via the ext_edges
|
||||||
|
ext_edges_pattern_ca = 0
|
||||||
|
ext_edges_pattern_an = 0
|
||||||
|
# for all core vertices
|
||||||
|
for x in range(0, len(VF2_obj.core_pattern)):
|
||||||
|
# for all its neighbours
|
||||||
|
for y in range(0, len(P)):
|
||||||
|
if P[x][y]:
|
||||||
|
# if it is a neighbor and not yet matched
|
||||||
|
if (VF2_obj.N_out_pattern[y] != -1 or VF2_obj.N_inc_pattern[y] != -1) and VF2_obj.core_pattern[y]:
|
||||||
|
# if we matched it
|
||||||
|
if VF2_obj.core_pattern[x] != -1:
|
||||||
|
ext_edges_pattern_ca += 1
|
||||||
|
else:
|
||||||
|
ext_edges_pattern_an += 1
|
||||||
|
|
||||||
|
# The nr of ext_edges between matched_vertices from the pattern
|
||||||
|
# and its adjecent vertices must be less than or equal to the nr
|
||||||
|
# of ext_edges between matched_vertices from the graph and its
|
||||||
|
# adjecent vertices, otherwise we wont find an occurrence
|
||||||
|
if ext_edges_pattern_ca > ext_edges_graph_ca:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# The nr of ext_edges between those adjancent vertices from the
|
||||||
|
# pattern and its not connected vertices must be less than or
|
||||||
|
# equal to the nr of ext_edges between those adjacent vertices
|
||||||
|
# from the graph and its not connected vertices,
|
||||||
|
# otherwise we wont find an occurrence
|
||||||
|
if ext_edges_pattern_an > ext_edges_graph_an:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def matchPhase(H, P, h, p, index_M, VF2_obj, n, m):
|
||||||
|
"""
|
||||||
|
The matching fase of the VF2 algorithm. If the chosen n, m pair
|
||||||
|
passes the feasibilityTest, the pair gets added and we start
|
||||||
|
to search for the next matching pair.
|
||||||
|
"""
|
||||||
|
# all candidate pair (n, m) represent graph x pattern
|
||||||
|
|
||||||
|
if feasibilityTest(H, P, h, p, VF2_obj, n, m):
|
||||||
|
# adapt VF2_obj
|
||||||
|
VF2_obj.core_graph[n] = True
|
||||||
|
VF2_obj.core_pattern[m] = True
|
||||||
|
VF2_obj.mapping[h[n]] = p[m]
|
||||||
|
addOutNeighbours(H[n], VF2_obj.N_out_graph, index_M)
|
||||||
|
addIncNeighbours(H, n, VF2_obj.N_inc_graph, index_M)
|
||||||
|
addOutNeighbours(P[m], VF2_obj.N_out_pattern, index_M)
|
||||||
|
addIncNeighbours(P, m, VF2_obj.N_inc_pattern, index_M)
|
||||||
|
|
||||||
|
if findM(H, P, h, p, VF2_obj, index_M + 1):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# else, cleanup, adapt VF2_obj
|
||||||
|
VF2_obj.core_graph[n] = False
|
||||||
|
VF2_obj.core_pattern[m] = False
|
||||||
|
del VF2_obj.mapping[h[n]]
|
||||||
|
delNeighbours(VF2_obj.N_out_graph, index_M)
|
||||||
|
delNeighbours(VF2_obj.N_inc_graph, index_M)
|
||||||
|
delNeighbours(VF2_obj.N_out_pattern, index_M)
|
||||||
|
delNeighbours(VF2_obj.N_inc_pattern, index_M)
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def preferred(H, P, h, p, index_M, VF2_obj, N_graph, N_pattern):
|
||||||
|
"""
|
||||||
|
Try to match the adjacency vertices connected via outgoing
|
||||||
|
or incoming edges. (Depending on what is given for N_graph and
|
||||||
|
N_pattern.)
|
||||||
|
"""
|
||||||
|
for n in range(0, len(N_graph)):
|
||||||
|
# skip graph vertices that are not in VF2_obj.N_out_graph
|
||||||
|
# (or already matched)
|
||||||
|
if N_graph[n] == -1 or VF2_obj.core_graph[n]:
|
||||||
|
continue
|
||||||
|
for m in range(0, len(N_pattern)):
|
||||||
|
# skip graph vertices that are not in VF2_obj.N_out_pattern
|
||||||
|
# (or already matched)
|
||||||
|
if N_pattern[m] == -1 or VF2_obj.core_pattern[m]:
|
||||||
|
continue
|
||||||
|
if matchPhase(H, P, h, p, index_M, VF2_obj, n, m):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def leastPreferred(H, P, h, p, index_M, VF2_obj):
|
||||||
|
"""
|
||||||
|
Try to match the vertices that are not connected to the curretly
|
||||||
|
matched vertices.
|
||||||
|
"""
|
||||||
|
for n in range(0, len(VF2_obj.N_out_graph)):
|
||||||
|
# skip vertices that are connected to the graph
|
||||||
|
# (or already matched)
|
||||||
|
if not (VF2_obj.N_out_graph[n] == -1 and VF2_obj.N_inc_graph[n] == -1) or VF2_obj.core_graph[n]:
|
||||||
|
continue
|
||||||
|
for m in range(0, len(VF2_obj.N_out_pattern)):
|
||||||
|
# skip vertices that are connected to the graph
|
||||||
|
# (or already matched)
|
||||||
|
if not (VF2_obj.N_out_pattern[m] == -1 and VF2_obj.N_inc_pattern[m] == -1) or VF2_obj.core_pattern[m]:
|
||||||
|
continue
|
||||||
|
if matchPhase(H, P, h, p, index_M, VF2_obj, n, m):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
# We are at the end, we found an candidate.
|
||||||
|
if index_M == len(p):
|
||||||
|
bound_graph_vertices = {}
|
||||||
|
for vertex_bound, _ in VF2_obj.mapping.items():
|
||||||
|
bound_graph_vertices.setdefault(vertex_bound.type, set()).add(vertex_bound)
|
||||||
|
|
||||||
|
self.result = self.matchNaive(vertices=bound_graph_vertices, edges=self.graph.edges)
|
||||||
|
return self.result != None
|
||||||
|
|
||||||
|
# try the candidates is the preffered order
|
||||||
|
# first try the adjacent vertices connected via the outgoing edges.
|
||||||
|
if preferred(H, P, h, p, index_M, VF2_obj, VF2_obj.N_out_graph, VF2_obj.N_out_pattern):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# then try the adjacent vertices connected via the incoming edges.
|
||||||
|
if preferred(H, P, h, p, index_M, VF2_obj, VF2_obj.N_inc_graph, VF2_obj.N_inc_pattern):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# and lastly, try the vertices not connected to the currently matched vertices
|
||||||
|
if leastPreferred(H, P, h, p, index_M, VF2_obj):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
# create adjecency matrix of the graph
|
||||||
|
H, h = self.createAdjacencyMatrixMap(self.graph)
|
||||||
|
# create adjecency matrix of the pattern
|
||||||
|
P, p = self.createAdjacencyMatrixMap(self.pattern)
|
||||||
|
|
||||||
|
VF2_obj = VF2_Obj(len(h), len(p))
|
||||||
|
|
||||||
|
findM(H, P, h, p, VF2_obj)
|
||||||
|
|
||||||
|
return self.result
|
||||||
528
pattern_matching/planGraph.py
Normal file
528
pattern_matching/planGraph.py
Normal file
|
|
@ -0,0 +1,528 @@
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
"""
|
||||||
|
Author: Sten Vercamman
|
||||||
|
Univeristy of Antwerp
|
||||||
|
|
||||||
|
Example code for paper: Efficient model transformations for novices
|
||||||
|
url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen
|
||||||
|
|
||||||
|
The main goal of this code is to give an overview, and an understandable
|
||||||
|
implementation, of known techniques for pattern matching and solving the
|
||||||
|
sub-graph homomorphism problem. The presented techniques do not include
|
||||||
|
performance adaptations/optimizations. It is not optimized to be efficient
|
||||||
|
but rather for the ease of understanding the workings of the algorithms.
|
||||||
|
The paper does list some possible extensions/optimizations.
|
||||||
|
|
||||||
|
It is intended as a guideline, even for novices, and provides an in-depth look
|
||||||
|
at the workings behind various techniques for efficient pattern matching.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from searchGraph import *
|
||||||
|
from enum import *
|
||||||
|
|
||||||
|
# Enum for all primitive operation types
|
||||||
|
# note: inc represent primitive operation in (as in is a reserved keyword in python)
|
||||||
|
PRIM_OP = Enum(['lkp', 'inc', 'out', 'src', 'tgt'])
|
||||||
|
|
||||||
|
class PlanGraph(object):
|
||||||
|
"""
|
||||||
|
Holds the PlanGraph for a pattern.
|
||||||
|
Can create the search plan of the pattern for a given SearchGraph.
|
||||||
|
"""
|
||||||
|
def __init__(self, pattern):
|
||||||
|
if not isinstance(pattern, Graph):
|
||||||
|
raise TypeError('PlanGraph expects the pattern to be a Graph')
|
||||||
|
# member variables:
|
||||||
|
self.vertices = [] # will not be searched in
|
||||||
|
self.edges = [] # will not be searched in
|
||||||
|
|
||||||
|
# representation map, maps vertex from pattern to element from PlanGraph
|
||||||
|
# (no need for edges)
|
||||||
|
repr_map = {}
|
||||||
|
|
||||||
|
# 1.1: for every vertex in the pattern graph,
|
||||||
|
# create a vertex representing the pattern element
|
||||||
|
for str_type, vertices in pattern.vertices.items():
|
||||||
|
for vertex in vertices:
|
||||||
|
# we only need to know the type of the vertex
|
||||||
|
plan_vertex = Vertex(str_type)
|
||||||
|
# and we need to know that is was a vertex
|
||||||
|
plan_vertex.is_vertex = True
|
||||||
|
# for re-linking the edges, we'll need to map the
|
||||||
|
# vertex of the pattern to the plan_vertex
|
||||||
|
repr_map[vertex] = plan_vertex
|
||||||
|
# save created plan_vertex
|
||||||
|
self.vertices.append(plan_vertex)
|
||||||
|
# 1.2: for every edge in the pattern graph,
|
||||||
|
# create a vertex representing the pattern elemen
|
||||||
|
for str_type, edges in pattern.edges.items():
|
||||||
|
for edge in edges:
|
||||||
|
# we only need to know the type of the edge
|
||||||
|
plan_vertex = Vertex(edge.type)
|
||||||
|
# and we need to know that is was an edge
|
||||||
|
plan_vertex.is_vertex = False
|
||||||
|
# save created plan_vertex
|
||||||
|
self.vertices.append(plan_vertex)
|
||||||
|
# 4: for every element x from the PlanGraph
|
||||||
|
# that represents an edge e in the pattern:
|
||||||
|
# 4.1: create an edge labelled tgt from x to the vertex in the PlanGraph
|
||||||
|
# representing the target vertex of e in the pattern graph,
|
||||||
|
# and a reverted edge labelled in
|
||||||
|
# 4.1.1: tgt:
|
||||||
|
plan_edge = Edge(plan_vertex, repr_map[edge.tgt])
|
||||||
|
# backup src and tgt (Edmonds might override it)
|
||||||
|
plan_edge.orig_src = plan_edge.src
|
||||||
|
plan_edge.orig_tgt = plan_edge.tgt
|
||||||
|
plan_edge.label = PRIM_OP.tgt
|
||||||
|
# link vertices connected to this plan_edge
|
||||||
|
plan_edge.src.addOutgoingEdge(plan_edge)
|
||||||
|
plan_edge.tgt.addIncomingEdge(plan_edge)
|
||||||
|
# tgt and src cost are always 1, we use logaritmic cost,
|
||||||
|
# (=> cost = ln(1) = 0.0) so that we do not need to minimaze
|
||||||
|
# a product, but can minimize a sum
|
||||||
|
# (as ln(c1...ck) = ln(c1) + ... + ln (ck))
|
||||||
|
plan_edge.cost = 0.0
|
||||||
|
# backup orig cost, as Edmonds changes cost
|
||||||
|
plan_edge.orig_cost = plan_edge.cost
|
||||||
|
# save created edge
|
||||||
|
self.edges.append(plan_edge)
|
||||||
|
# 4.1.2: in:
|
||||||
|
plan_edge = Edge(repr_map[edge.tgt], plan_vertex)
|
||||||
|
# backup src and tgt (Edmonds might override it)
|
||||||
|
plan_edge.orig_src = plan_edge.src
|
||||||
|
plan_edge.orig_tgt = plan_edge.tgt
|
||||||
|
plan_edge.label = PRIM_OP.inc
|
||||||
|
# link vertices connected to this plan_edge
|
||||||
|
plan_edge.src.addOutgoingEdge(plan_edge)
|
||||||
|
plan_edge.tgt.addIncomingEdge(plan_edge)
|
||||||
|
# save created edge
|
||||||
|
self.edges.append(plan_edge)
|
||||||
|
|
||||||
|
# 4.2: create an edge labelled src from x to the vertex in the PlanGraph
|
||||||
|
# representing the source vertex of e in the pattern graph
|
||||||
|
# and a reverted edge labelled out
|
||||||
|
# 4.2.1: src
|
||||||
|
plan_edge = Edge(plan_vertex, repr_map[edge.src])
|
||||||
|
# backup src and tgt (Edmonds might override it)
|
||||||
|
plan_edge.orig_src = plan_edge.src
|
||||||
|
plan_edge.orig_tgt = plan_edge.tgt
|
||||||
|
plan_edge.label = PRIM_OP.src
|
||||||
|
# link vertices connected to this plan_edge
|
||||||
|
plan_edge.src.addOutgoingEdge(plan_edge)
|
||||||
|
plan_edge.tgt.addIncomingEdge(plan_edge)
|
||||||
|
# tgt and src cost are always 1, we use logaritmic cost,
|
||||||
|
# (=> cost = ln(1) = 0.0) so that we do not need to minimaze
|
||||||
|
# a product, but can minimize a sum
|
||||||
|
# (as ln(c1...ck) = ln(c1) + ... + ln (ck))
|
||||||
|
plan_edge.cost = 0.0
|
||||||
|
# backup orig cost, as Edmonds changes cost
|
||||||
|
plan_edge.orig_cost = plan_edge.cost
|
||||||
|
# save created edge
|
||||||
|
self.edges.append(plan_edge)
|
||||||
|
# 4.2.2: out
|
||||||
|
plan_edge = Edge(repr_map[edge.src], plan_vertex)
|
||||||
|
# backup src and tgt (Edmonds might override it)
|
||||||
|
plan_edge.orig_src = plan_edge.src
|
||||||
|
plan_edge.orig_tgt = plan_edge.tgt
|
||||||
|
plan_edge.label = PRIM_OP.out
|
||||||
|
# link vertices connected to this plan_edge
|
||||||
|
plan_edge.src.addOutgoingEdge(plan_edge)
|
||||||
|
plan_edge.tgt.addIncomingEdge(plan_edge)
|
||||||
|
# save created edge
|
||||||
|
self.edges.append(plan_edge)
|
||||||
|
# 2: create a root vertex
|
||||||
|
self.root = Vertex('root')
|
||||||
|
# don't add it to the vertices
|
||||||
|
|
||||||
|
# 3: for each element in the PlanGraph (that is not the root vertex),
|
||||||
|
# create an edge from the root to it, and label it lkp
|
||||||
|
for vertex in self.vertices:
|
||||||
|
plan_edge = Edge(self.root, vertex)
|
||||||
|
# backup src and tgt (Edmonds might override it)
|
||||||
|
plan_edge.orig_src = plan_edge.src
|
||||||
|
plan_edge.orig_tgt = plan_edge.tgt
|
||||||
|
plan_edge.label = PRIM_OP.lkp
|
||||||
|
# link vertices connected to this plan_edge
|
||||||
|
plan_edge.src.addOutgoingEdge(plan_edge)
|
||||||
|
plan_edge.tgt.addIncomingEdge(plan_edge)
|
||||||
|
# save created edge
|
||||||
|
self.edges.append(plan_edge)
|
||||||
|
|
||||||
|
def updatePlanCost(self, graph):
|
||||||
|
"""
|
||||||
|
returns True if sucessfully updated cost,
|
||||||
|
returns False if a type in the pattern is not in the graph.
|
||||||
|
"""
|
||||||
|
if not isinstance(graph, SearchGraph):
|
||||||
|
raise TypeError('updatePlanCost expects a SearchGraph')
|
||||||
|
# update, lkp, in and out (not src and tgt as they are constant)
|
||||||
|
|
||||||
|
for edge in self.edges:
|
||||||
|
if edge.label == PRIM_OP.lkp:
|
||||||
|
edge.cost = graph.getCostLkp(edge.tgt.type, edge.tgt.is_vertex)
|
||||||
|
if edge.cost == None:
|
||||||
|
print('failed lkp')
|
||||||
|
return False
|
||||||
|
elif edge.label == PRIM_OP.inc:
|
||||||
|
# in(v, e), binds an incoming edge e from an already bound vertex v,
|
||||||
|
# depends on the number of incoming edges of type e for the vertex type
|
||||||
|
edge.cost = graph.getCostInc(edge.src.type, edge.tgt.type)
|
||||||
|
if edge.cost == None:
|
||||||
|
print('failed in')
|
||||||
|
return False
|
||||||
|
elif edge.label == PRIM_OP.out:
|
||||||
|
# (analogue for out(v, e))
|
||||||
|
edge.cost = graph.getCostOut(edge.src.type, edge.tgt.type)
|
||||||
|
if edge.cost == None:
|
||||||
|
print('failed out')
|
||||||
|
return False
|
||||||
|
# else: ignore src and tgt
|
||||||
|
# backup orig cost, as Edmonds changes cost
|
||||||
|
edge.orig_cost = edge.cost
|
||||||
|
return True
|
||||||
|
|
||||||
|
def Edmonds(self, searchGraph):
|
||||||
|
"""
|
||||||
|
Returns the minimum directed spanning tree (MDST)
|
||||||
|
for the pattern and the provided graph.
|
||||||
|
Returns None if it is impossible to find the pattern in the Graph
|
||||||
|
(vertex type of edge type from pattern not in Graph).
|
||||||
|
"""
|
||||||
|
# update the cost for the PlanGraph
|
||||||
|
if not self.updatePlanCost(searchGraph):
|
||||||
|
print('type in pattern not found in Graph (in Edmonds)')
|
||||||
|
# (returns False if a type in the pattern can not be found in the graph)
|
||||||
|
return None
|
||||||
|
# Complete Edmonds algorithm has optimization steps:
|
||||||
|
# a: remove edges entering the root
|
||||||
|
# b: merge parallel edges from same src to same tgt with mim weight
|
||||||
|
# we can ignore this as:
|
||||||
|
# a: the root does not have incoming edges
|
||||||
|
# b: the PlanGraph does not have such paralllel edges
|
||||||
|
|
||||||
|
# 1: for each node v (other than root), find incoming edge with lowest weight
|
||||||
|
# insert those
|
||||||
|
pi_v = {}
|
||||||
|
for plan_vertex in self.vertices:
|
||||||
|
min_weight = float('infinity')
|
||||||
|
min_edge = None
|
||||||
|
for plan_edge in plan_vertex.incoming_edges:
|
||||||
|
if plan_edge.cost < min_weight:
|
||||||
|
min_weight = plan_edge.cost
|
||||||
|
min_edge = plan_edge
|
||||||
|
# save plan_vertex and it's minimum incoming edge
|
||||||
|
pi_v[plan_vertex] = min_edge
|
||||||
|
if min_edge == None:
|
||||||
|
raise RuntimeError('baka: no min_edge found')
|
||||||
|
|
||||||
|
def getCycle(vertex, reverse_graph, visited):
|
||||||
|
"""
|
||||||
|
Walk from vertex to root, we walk in a reverse order, as each vertex
|
||||||
|
only has one incoming edge, so we walk to the source of that incoming
|
||||||
|
edge. We stop when we already visited a vertex we walked on.
|
||||||
|
In both cases we return None.
|
||||||
|
When we visit a vertex from our current path, we return that cycle,
|
||||||
|
by first removing its tail.
|
||||||
|
"""
|
||||||
|
def addToVisited(walked, visited):
|
||||||
|
for vertex in walked:
|
||||||
|
visited.add(vertex)
|
||||||
|
|
||||||
|
walked = [] # we could only save it once, but we need order
|
||||||
|
current_path = set() # and lookup in an array is slower than in set
|
||||||
|
# we asume root is in visited (it must be in it)
|
||||||
|
while vertex not in visited:
|
||||||
|
if vertex in current_path:
|
||||||
|
# we found a cycle, the cycle however might look like a: O--,
|
||||||
|
# g f e where we first visited a, then b, c, d,...
|
||||||
|
# h d c b a k points back to d, completing a cycle,
|
||||||
|
# i j k but c b a is the tail that does not belong
|
||||||
|
# in the cycle, removing this is "easy" as we know that
|
||||||
|
# we first visited the tail, so they are the first elements
|
||||||
|
# in our walked path
|
||||||
|
for tail_part in walked:
|
||||||
|
if tail_part != vertex:
|
||||||
|
current_path.remove(tail_part)
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
addToVisited(walked, visited)
|
||||||
|
return current_path
|
||||||
|
current_path.add(vertex)
|
||||||
|
walked.append(vertex)
|
||||||
|
# by definition, an MDST only has one incoming edge per vertex
|
||||||
|
# so we follow it upwards
|
||||||
|
# vertex <--(minimal edge)-- src
|
||||||
|
vertex = reverse_graph[vertex].src
|
||||||
|
|
||||||
|
# no cycle found (the current path let to a visited vertex)
|
||||||
|
addToVisited(walked, visited) # add walked to visited
|
||||||
|
return None
|
||||||
|
|
||||||
|
class VertexGraph(Vertex):
|
||||||
|
"""
|
||||||
|
Acts as a super vertex, holds a subgraph (that is/was once a cyle).
|
||||||
|
Uses for Edmonds contractions step.
|
||||||
|
The incoming edges are the edges leading to the vertices in the
|
||||||
|
VertexGraph (they exclude edges from a vertex in the cycle to
|
||||||
|
another vertex in the cycle).
|
||||||
|
Analogue for outgoing edges.
|
||||||
|
"""
|
||||||
|
def __init__(self, cycle, reverseGraph):
|
||||||
|
# Call parent class constructor
|
||||||
|
str_type = ''
|
||||||
|
for vertex in cycle:
|
||||||
|
str_type += str(vertex.type)
|
||||||
|
Vertex.__init__(self, str_type)
|
||||||
|
# member variables:
|
||||||
|
self.internalMDST = {}
|
||||||
|
|
||||||
|
minIntWeight = self.findMinIntWeight(cycle, reverseGraph)
|
||||||
|
self.updateMinExtEdge(minIntWeight, reverseGraph)
|
||||||
|
|
||||||
|
|
||||||
|
def findMinIntWeight(self, cycle, reverseGraph):
|
||||||
|
"""
|
||||||
|
Find the the smallest cost of the cycle his internal incoming edges.
|
||||||
|
(Also save its internalMDST (currently a cycle).)
|
||||||
|
(The VertexGraph formed by the cycle will be added to the
|
||||||
|
reverseGraph by calling findMinExtEdge.)
|
||||||
|
"""
|
||||||
|
minIntWeight = float('infinity')
|
||||||
|
|
||||||
|
cycleEdges = []
|
||||||
|
origTgts = []
|
||||||
|
for cyclePart in cycle:
|
||||||
|
cycleEdges.append(reverseGraph[cyclePart])
|
||||||
|
origTgts.append(reverseGraph[cyclePart].orig_tgt)
|
||||||
|
|
||||||
|
for vertex in cycle:
|
||||||
|
# add incoming edges to this VertexGraph
|
||||||
|
for inc_edge in vertex.incoming_edges:
|
||||||
|
# edge from within the cycle
|
||||||
|
if inc_edge.src in cycle:
|
||||||
|
minIntWeight = min(minIntWeight, inc_edge.cost)
|
||||||
|
else:
|
||||||
|
# edge from outside the cycle
|
||||||
|
self.addIncomingEdge(inc_edge)
|
||||||
|
# add outgoing edges to this VertexGraph
|
||||||
|
for out_edge in vertex.outgoing_edges:
|
||||||
|
if out_edge.tgt not in cycle:
|
||||||
|
# edge leaves the cycle
|
||||||
|
self.addOutgoingEdge(out_edge)
|
||||||
|
# update src to this VertexGraph
|
||||||
|
out_edge.src = self
|
||||||
|
# save internal MDST
|
||||||
|
min_edge = reverseGraph[vertex]
|
||||||
|
if min_edge.src in cycle:
|
||||||
|
self.internalMDST[vertex] = min_edge
|
||||||
|
else:
|
||||||
|
raise TypeError('how is this a cycle')
|
||||||
|
|
||||||
|
return minIntWeight
|
||||||
|
|
||||||
|
def updateMinExtEdge(self, minIntWeight, reverseGraph):
|
||||||
|
"""
|
||||||
|
Modifies all external incoming edges their cost and finds the
|
||||||
|
minimum external incoming edge with this modified weight.
|
||||||
|
This found edge will break the cycle, update the internalMDST
|
||||||
|
from a cycle to an MDST, updates the reverseGraph to include
|
||||||
|
the vertexGraph.
|
||||||
|
"""
|
||||||
|
minExt = None
|
||||||
|
minModWeight = -float('infinity')
|
||||||
|
|
||||||
|
# Find incoming edge from outside of the circle with minimal
|
||||||
|
# modified cost. This edge will break the cycle.
|
||||||
|
for inc_edge in self.incoming_edges:
|
||||||
|
# An incoming edge (with src from within the cycle), can be
|
||||||
|
# from a contracted part of the graph. Assume bc is a
|
||||||
|
# contracted part (VertexGraph) a, bc is a newly formed
|
||||||
|
# cycle (due to the breaking of the previous cycle bc). bc
|
||||||
|
# has at least lkp incoming edges to b and c, but we should
|
||||||
|
# not consider the lkp of c to break the cycle.
|
||||||
|
# If we want to break a, bc, select plausable edges,
|
||||||
|
# /<--\
|
||||||
|
# a bc bc's MDST b <-- c
|
||||||
|
# \-->/
|
||||||
|
# by looking at their original targets.
|
||||||
|
# (if cycle inc_edge.orig_tgt == external inc_edge.orig_tgt)
|
||||||
|
if reverseGraph[inc_edge.tgt].orig_tgt == inc_edge.orig_tgt:
|
||||||
|
# modify costL cost of inc_edge -
|
||||||
|
# (cost of previously choosen minimum edge to cycle vertex - minIntWeight)
|
||||||
|
inc_edge.cost -= (reverseGraph[inc_edge.tgt].cost - minIntWeight)
|
||||||
|
if minExt is None or minModWeight > inc_edge.cost:
|
||||||
|
# save better edge from outside of the cycle
|
||||||
|
minExt = inc_edge
|
||||||
|
minModWeight = inc_edge.cost
|
||||||
|
|
||||||
|
# Example: a, b is a cycle (we know that there are no other
|
||||||
|
# incoming edges to a and/or b, as there is on;y exactly one
|
||||||
|
# incoming edge per vertex), and the arow from c to b represents
|
||||||
|
# the minExt edge. We will remove the bottem arrow (from a to b)
|
||||||
|
# /<--\ and save the minExt edge in the reverseGraph.
|
||||||
|
# a b <-- c This breaks the cycle. As the internalMDST
|
||||||
|
# \-->/ saves the intenal MDST, and currently still
|
||||||
|
# holds a cycle, we have to remove it from the internalMDST.
|
||||||
|
# We have to remove all vertex bindings of the cycle from the
|
||||||
|
# reverseGraph (as it is contracted into a single VertexGraph),
|
||||||
|
# and store the minExt edge to this VertexGraph in it.
|
||||||
|
for int_vertex, _ in self.internalMDST.items():
|
||||||
|
del reverseGraph[int_vertex] # remove cycle from reverseGraph
|
||||||
|
|
||||||
|
del self.internalMDST[minExt.tgt] # remove/break cycle
|
||||||
|
|
||||||
|
for inc_edge in self.incoming_edges:
|
||||||
|
# update inc_edge's target to this VertexGraph
|
||||||
|
inc_edge.tgt = self
|
||||||
|
|
||||||
|
# save minExt edge to this VertexGraph in the reverseGraph
|
||||||
|
reverseGraph[self] = minExt
|
||||||
|
|
||||||
|
while True:
|
||||||
|
# 2: find all cycles:
|
||||||
|
cycles = []
|
||||||
|
visited = set([self.root]) # root does not have incoming edges,
|
||||||
|
for vertex in list(pi_v.keys()): # it can not be part of a cycle
|
||||||
|
if vertex not in visited: # getCycle depends on root being in visited
|
||||||
|
cycle = getCycle(vertex, pi_v, visited)
|
||||||
|
if cycle != None:
|
||||||
|
cycles.append(cycle)
|
||||||
|
|
||||||
|
# 2: if the set of edges {pi(v), v} does not contain any cycles,
|
||||||
|
# Then we found our minimum directed spanning tree
|
||||||
|
# otherwise, we'll have to resolve the cycles
|
||||||
|
if len(cycles) == 0:
|
||||||
|
break
|
||||||
|
|
||||||
|
# 3: For each formed cycle:
|
||||||
|
# 3a: find internal incoming edge with the smallest cost
|
||||||
|
# 3b: modify the cost of each arc which enters the cycle
|
||||||
|
# 3c: replace smallert internal edge with the modified edge which has the smallest cost
|
||||||
|
for cycle in cycles:
|
||||||
|
# Breaks a cycle by:
|
||||||
|
# - contracting cycle into VertexGraph
|
||||||
|
# - finding the internal incoming edge with the smallest cost
|
||||||
|
# - modify the cost of each arc which enters the cycle
|
||||||
|
# - replacing the smallest internal edge with the modified edge which has the smallest cost
|
||||||
|
# - changing reverseGraph accordingly (removes elements from cycle, ads vertexGraph)
|
||||||
|
# (This will find a solution as the graph keeps shrinking with every cycle,
|
||||||
|
# in the worst case the same amount as there are vertices, until
|
||||||
|
# onlty the root and one vertexGraph remains)
|
||||||
|
vertexGraph = VertexGraph(cycle, pi_v)
|
||||||
|
|
||||||
|
class SortedContainer(object):
|
||||||
|
"""
|
||||||
|
A container that keeps elemets sorted based on a given sortValue.
|
||||||
|
Elements with the same value, will be returned in the order they got inserted.
|
||||||
|
"""
|
||||||
|
def __init__(self):
|
||||||
|
# member variables:
|
||||||
|
self.keys = [] # stores key in sorted order (sorted when pop gets called)
|
||||||
|
self.sorted = {} # {key, [elems with same key]}
|
||||||
|
|
||||||
|
def add(self, sortValue, element):
|
||||||
|
"""
|
||||||
|
Adds element with sortValue to the SortedContainer.
|
||||||
|
"""
|
||||||
|
elems = self.sorted.get(sortValue)
|
||||||
|
if elems == None:
|
||||||
|
self.sorted[sortValue] = [element]
|
||||||
|
self.keys.append(sortValue)
|
||||||
|
else:
|
||||||
|
elems.append(element)
|
||||||
|
|
||||||
|
def pop(self):
|
||||||
|
"""
|
||||||
|
Sorts the SortedContainer, returns element with smallest sortValue.
|
||||||
|
"""
|
||||||
|
self.keys.sort()
|
||||||
|
elems = self.sorted[self.keys[0]]
|
||||||
|
elem = elems.pop()
|
||||||
|
if len(elems) == 0:
|
||||||
|
del self.sorted[self.keys[0]]
|
||||||
|
del self.keys[0]
|
||||||
|
return elem
|
||||||
|
|
||||||
|
def empty(self):
|
||||||
|
"""
|
||||||
|
Returns whether or not the sorted container is empty.
|
||||||
|
"""
|
||||||
|
return (len(self.keys) == 0)
|
||||||
|
|
||||||
|
def createPRIM_OP(edge, inc_cost=True):
|
||||||
|
"""
|
||||||
|
Helper function to keep argument list short,
|
||||||
|
return contracted data for a PRIM_OP.
|
||||||
|
"""
|
||||||
|
if edge.label == PRIM_OP.inc or edge.label == PRIM_OP.out:
|
||||||
|
if inc_cost: # op # vertex type # actual edge type
|
||||||
|
return (edge.label, edge.orig_src.type, edge.orig_tgt.type, edge.cost)
|
||||||
|
else:
|
||||||
|
return (edge.label, edge.orig_src.type, edge.orig_tgt.type)
|
||||||
|
elif edge.label == PRIM_OP.lkp:
|
||||||
|
if inc_cost: # op # vertex/edge type # is vertex or edge
|
||||||
|
return (edge.label, edge.orig_tgt.type, edge.orig_tgt.is_vertex, edge.cost)
|
||||||
|
else:
|
||||||
|
return (edge.label, edge.orig_tgt.type, edge.orig_tgt.is_vertex)
|
||||||
|
else: # src, tgt operation
|
||||||
|
if inc_cost: # op # actual edge type
|
||||||
|
return (edge.label, edge.orig_src.type, edge.cost)
|
||||||
|
else:
|
||||||
|
return (edge.label, edge.orig_src.type)
|
||||||
|
|
||||||
|
def flattenReverseGraph(vertex, inc_edge, reverseGraph):
|
||||||
|
"""
|
||||||
|
Flattens the reverseGraph, so that the vertexGraph node can get
|
||||||
|
processed to create a forwardGraph.
|
||||||
|
"""
|
||||||
|
if not isinstance(vertex, VertexGraph):
|
||||||
|
reverseGraph[vertex] = inc_edge
|
||||||
|
else:
|
||||||
|
reverseGraph[inc_edge.orig_tgt] = inc_edge
|
||||||
|
for vg, eg in inc_edge.tgt.internalMDST.items():
|
||||||
|
flattenReverseGraph(vg, eg, reverseGraph)
|
||||||
|
if isinstance(inc_edge.src, VertexGraph):
|
||||||
|
for vg, eg in inc_edge.src.internalMDST.items():
|
||||||
|
flattenReverseGraph(vg, eg, reverseGraph)
|
||||||
|
|
||||||
|
def createForwardGraph(vertex, inc_edge, forwardGraph):
|
||||||
|
"""
|
||||||
|
Create a forwardGraph, keeping in mind that their can be vertexGraph
|
||||||
|
in the reverseGraph.
|
||||||
|
"""
|
||||||
|
if not isinstance(vertex, VertexGraph):
|
||||||
|
forwardGraph.setdefault(inc_edge.orig_src, []).append(inc_edge)
|
||||||
|
else:
|
||||||
|
forwardGraph.setdefault(inc_edge.orig_src, []).append(inc_edge)
|
||||||
|
for vg, eg in vertex.internalMDST.items():
|
||||||
|
createForwardGraph(vg, eg, forwardGraph)
|
||||||
|
|
||||||
|
MDST = []
|
||||||
|
# pi_v contains {vertex, incoming_edge}
|
||||||
|
# we want to start from root and follow the outgoing edges
|
||||||
|
# so we have to build the forwardGraph graph for pi_v
|
||||||
|
# (Except for the root (has 0), each vertex has exactly one incoming edge,
|
||||||
|
# but might have multiple outgoing edges)
|
||||||
|
forwardGraph = {} # {vertex, [outgoing edge 1, ... ] }
|
||||||
|
reverseGraph = {}
|
||||||
|
|
||||||
|
# flatten reverseGraph (for the vertexGraph elements)
|
||||||
|
for v, e in pi_v.items():
|
||||||
|
flattenReverseGraph(v, e, reverseGraph)
|
||||||
|
|
||||||
|
# create the forwardGraph
|
||||||
|
for vertex, edge in reverseGraph.items():
|
||||||
|
createForwardGraph(vertex, edge, forwardGraph)
|
||||||
|
|
||||||
|
# create the MDST in a best first manner (lowest value first)
|
||||||
|
current = SortedContainer() # allows easy walking true tree
|
||||||
|
for edge in forwardGraph[self.root]:
|
||||||
|
current.add(edge.orig_cost, edge) # use orig cost, not modified
|
||||||
|
while current.empty() != True:
|
||||||
|
p_op = current.pop() # p_op contains an outgoing edge
|
||||||
|
MDST.append(createPRIM_OP(p_op))
|
||||||
|
for edge in forwardGraph.get(p_op.orig_tgt, []):
|
||||||
|
current.add(edge.orig_cost, edge)
|
||||||
|
return MDST
|
||||||
8
pattern_matching/run.sh
Executable file
8
pattern_matching/run.sh
Executable file
|
|
@ -0,0 +1,8 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
python main.py
|
||||||
|
dot randomGraph.dot -Tsvg > randomGraph.svg
|
||||||
|
dot randomPattern.dot -Tsvg > randomPattern.svg
|
||||||
|
|
||||||
|
firefox randomGraph.svg
|
||||||
|
firefox randomPattern.svg
|
||||||
115
pattern_matching/searchGraph.py
Normal file
115
pattern_matching/searchGraph.py
Normal file
|
|
@ -0,0 +1,115 @@
|
||||||
|
# coding: utf-8
|
||||||
|
|
||||||
|
"""
|
||||||
|
Author: Sten Vercamman
|
||||||
|
Univeristy of Antwerp
|
||||||
|
|
||||||
|
Example code for paper: Efficient model transformations for novices
|
||||||
|
url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen
|
||||||
|
|
||||||
|
The main goal of this code is to give an overview, and an understandable
|
||||||
|
implementation, of known techniques for pattern matching and solving the
|
||||||
|
sub-graph homomorphism problem. The presented techniques do not include
|
||||||
|
performance adaptations/optimizations. It is not optimized to be efficient
|
||||||
|
but rather for the ease of understanding the workings of the algorithms.
|
||||||
|
The paper does list some possible extensions/optimizations.
|
||||||
|
|
||||||
|
It is intended as a guideline, even for novices, and provides an in-depth look
|
||||||
|
at the workings behind various techniques for efficient pattern matching.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from graph import *
|
||||||
|
|
||||||
|
import math
|
||||||
|
|
||||||
|
class SearchGraph(Graph):
|
||||||
|
"""
|
||||||
|
A SearchGraph is an extended Graph, it keeps traks of statistics
|
||||||
|
for creating the cost model when generating a search plan.
|
||||||
|
It stire the amount of edges for each edge.type per vertex.type.
|
||||||
|
"""
|
||||||
|
def __init__(self, orig=None, deepCopy=False):
|
||||||
|
Graph.__init__(self)
|
||||||
|
# member variables:
|
||||||
|
self.nr_of_inc_edges = {} # {vertex_type, {edge_type, nr of incoming edges of edge_type for vertex_type } }
|
||||||
|
self.nr_of_out_edges = {} # {vertex_type, {edge_type, nr of outgoing edges of edge_type for vertex_type } }
|
||||||
|
|
||||||
|
if orig != None:
|
||||||
|
if not (isinstance(orig, Graph) or isinstance(orig, SearchGraph)):
|
||||||
|
raise TypeError('Can only create SearchGraph from Graph and SearchGraph types')
|
||||||
|
if not deepCopy:
|
||||||
|
# copy all memeber elements:
|
||||||
|
self.vertices = orig.vertices # this is a reference
|
||||||
|
self.edges = orig.edges # this is a reference
|
||||||
|
# udpate the edge counters for each edge
|
||||||
|
for _, edges in self.edges.items():
|
||||||
|
for edge in edges:
|
||||||
|
self.addToEdgeCounters(edge)
|
||||||
|
else: # TODO: deepcopy (not really needed)
|
||||||
|
pass
|
||||||
|
|
||||||
|
def addCreateEdge(self, src, tgt, str_type):
|
||||||
|
"""
|
||||||
|
Creates edge of str_type from src to tgt, and returns it,
|
||||||
|
so that properties can be added to the edge.
|
||||||
|
This also add the Edge to the Edge counters
|
||||||
|
"""
|
||||||
|
# call parent fucntion, this function is an extention
|
||||||
|
edge = Graph.addCreateEdge(self, src, tgt, str_type)
|
||||||
|
self.updateEdgeCounters(edge)
|
||||||
|
return edge
|
||||||
|
|
||||||
|
def addToEdgeCounters(self, edge):
|
||||||
|
"""
|
||||||
|
Add the Edge to the Edge counters.
|
||||||
|
"""
|
||||||
|
# get {edge.type, counter} for tgt vertex of edge (or create it)
|
||||||
|
edge_counters = self.nr_of_inc_edges.setdefault(edge.tgt.type, {})
|
||||||
|
# increase counter of edge.type by 1
|
||||||
|
edge_counters[edge.type] = edge_counters.get(edge.type, 0) + 1
|
||||||
|
# get {edge.type, counter} for src vertex of edge (or create it)
|
||||||
|
edge_counters = self.nr_of_out_edges.setdefault(edge.src.type, {})
|
||||||
|
# increase counter of edge.type by 1
|
||||||
|
edge_counters[edge.type] = edge_counters.get(edge.type, 0) + 1
|
||||||
|
|
||||||
|
def getCostLkp(self, type, is_vertex):
|
||||||
|
"""
|
||||||
|
Returns the cost of a lkp primitive operation (of a vertex or edge).
|
||||||
|
Returns None if vertex type or edge type not present in Host Graph
|
||||||
|
"""
|
||||||
|
if is_vertex:
|
||||||
|
cost = len(self.getVerticesOfType(type))
|
||||||
|
else:
|
||||||
|
cost = len(self.getEdgesOfType(type))
|
||||||
|
if cost == 0:
|
||||||
|
return None
|
||||||
|
# we use a logaritmic cost
|
||||||
|
return math.log(cost)
|
||||||
|
|
||||||
|
def getCostInc(self, vertex_type, edge_type):
|
||||||
|
"""
|
||||||
|
Returns the cost of an in primitive operation.
|
||||||
|
Returns None if vertex_type or edge_type not present in Host Graph
|
||||||
|
"""
|
||||||
|
cost = float(self.nr_of_inc_edges.get(vertex_type, {}).get(edge_type))
|
||||||
|
if cost != None:
|
||||||
|
nr_of_vertices_with_type = len(self.getVerticesOfType(vertex_type))
|
||||||
|
if nr_of_vertices_with_type != 0:
|
||||||
|
cost /= len(self.getVerticesOfType(vertex_type))
|
||||||
|
# we use a logaritmic cost
|
||||||
|
cost = math.log(cost)
|
||||||
|
return cost
|
||||||
|
|
||||||
|
def getCostOut(self, vertex_type, edge_type):
|
||||||
|
"""
|
||||||
|
Returns the cost of an out primitive operation.
|
||||||
|
Returns None if vertex_type or edge_type not present in Host Graph
|
||||||
|
"""
|
||||||
|
cost = float(self.nr_of_out_edges.get(vertex_type, {}).get(edge_type))
|
||||||
|
if cost != None:
|
||||||
|
nr_of_vertices_with_type = len(self.getVerticesOfType(vertex_type))
|
||||||
|
if nr_of_vertices_with_type != 0:
|
||||||
|
cost /= len(self.getVerticesOfType(vertex_type))
|
||||||
|
# we use a logaritmic cost
|
||||||
|
cost = math.log(cost)
|
||||||
|
return cost
|
||||||
Loading…
Add table
Add a link
Reference in a new issue