Add performance comparison between Sten's and Joeri's matcher. Sten's seems to be broken (giving a different match set every time), however.

This commit is contained in:
Joeri Exelmans 2024-09-05 11:42:32 +02:00
parent b4f41cc090
commit bed3529676
2 changed files with 354 additions and 58 deletions

View file

@ -0,0 +1,286 @@
import time
import matcher as j # joeri's matcher
import graph as sgraph # sten's graph
import patternMatching as s # sten's matcher
import generator
def j_to_s(j):
s = sgraph.Graph()
m = {}
for jv in j.vtxs:
sv = s.addCreateVertex(jv.value) # value becomes type
m[jv] = sv
for je in j.edges:
s.addCreateEdge(m[je.src], m[je.tgt], "e") # only one type
return s
def s_to_j(s):
jg = j.Graph()
jg.vtxs = [ j.Vertex(typ) for (typ,svs) in s.vertices.items() for sv in svs ]
m = { sv : jg.vtxs[i] for svs in s.vertices.values() for i,sv in enumerate(svs) }
jg.edges = [j.Edge(m[se.src], m[se.tgt]) for ses in s.edges.values() for se in ses ]
return j
def run_benchmark(jhost, jguest, shost, sguest, expected=None):
j_durations = 0
s_durations = 0
# benchmark Joeri
m = j.MatcherVF2(host, guest,
lambda g_val, h_val: g_val == h_val) # all vertices can be matched
iterations = 50
print(" Patience (joeri)...")
for n in range(iterations):
time_start = time.perf_counter_ns()
matches = [mm for mm in m.match()]
time_end = time.perf_counter_ns()
duration = time_end - time_start
j_durations += duration
print(f' {iterations} iterations, took {j_durations/1000000:.3f} ms, {j_durations/iterations/1000000:.3f} ms per iteration')
if expected == None:
print(f" {len(matches)} matches")
else:
if len(matches) == expected:
print(" correct (probably)")
else:
print(f" WRONG! expected: {expected}, got: {len(matches)}")
# print([m.mapping_vtxs for m in matches])
# print([m.mapping_edges for m in matches])
# benchmark Sten
m = s.PatternMatching()
print(" Patience (sten)...")
for n in range(iterations):
time_start = time.perf_counter_ns()
matches = [mm for mm in m.matchVF2(sguest, shost)]
time_end = time.perf_counter_ns()
duration = time_end - time_start
s_durations += duration
print(f' {iterations} iterations, took {s_durations/1000000:.3f} ms, {s_durations/iterations/1000000:.3f} ms per iteration')
if expected == None:
print(f" {len(matches)} matches")
else:
if len(matches) == expected:
print(" correct (probably)")
else:
print(f" WRONG! expected: {expected}, got: {len(matches)}")
# print(matches)
print(f" joeri is {s_durations/j_durations:.2f} times faster")
if __name__ == "__main__":
print("\nBENCHMARK: small graph, simple pattern")
host = j.Graph()
host.vtxs = [j.Vertex(0), j.Vertex(0), j.Vertex(0), j.Vertex(0)]
host.edges = [
j.Edge(host.vtxs[0], host.vtxs[1]),
j.Edge(host.vtxs[1], host.vtxs[2]),
j.Edge(host.vtxs[2], host.vtxs[0]),
j.Edge(host.vtxs[2], host.vtxs[3]),
j.Edge(host.vtxs[3], host.vtxs[2]),
]
guest = j.Graph()
guest.vtxs = [
j.Vertex(0),
j.Vertex(0)]
guest.edges = [
# Look for a simple loop:
j.Edge(guest.vtxs[0], guest.vtxs[1]),
j.Edge(guest.vtxs[1], guest.vtxs[0]),
]
# because of the symmetry in our pattern, there will be 2 matches
run_benchmark(host, guest, j_to_s(host), j_to_s(guest), expected=2)
#######################################################################
print("\nBENCHMARK: larger graph, simple pattern")
host = j.Graph()
host.vtxs = [
j.Vertex('triangle'), # 0
j.Vertex('square'), # 1
j.Vertex('square'), # 2
j.Vertex('circle'), # 3
j.Vertex('circle'), # 4
j.Vertex('circle'), # 5
]
host.edges = [
# not a match:
j.Edge(host.vtxs[0], host.vtxs[5]),
j.Edge(host.vtxs[5], host.vtxs[0]),
# will be a match:
j.Edge(host.vtxs[1], host.vtxs[5]),
j.Edge(host.vtxs[5], host.vtxs[1]),
# noise:
j.Edge(host.vtxs[1], host.vtxs[2]),
# will be a match:
j.Edge(host.vtxs[2], host.vtxs[4]),
j.Edge(host.vtxs[4], host.vtxs[2]),
# noise:
j.Edge(host.vtxs[0], host.vtxs[1]),
j.Edge(host.vtxs[0], host.vtxs[3]),
j.Edge(host.vtxs[0], host.vtxs[0]),
j.Edge(host.vtxs[1], host.vtxs[1]),
# will be a match:
j.Edge(host.vtxs[3], host.vtxs[2]),
j.Edge(host.vtxs[2], host.vtxs[3]),
]
guest = j.Graph()
guest.vtxs = [
j.Vertex('square'), # 0
j.Vertex('circle')] # 1
guest.edges = [
j.Edge(guest.vtxs[0], guest.vtxs[1]),
j.Edge(guest.vtxs[1], guest.vtxs[0]),
]
# should give 3 matches
run_benchmark(host, guest, j_to_s(host), j_to_s(guest), expected=3)
#######################################################################
print("\nBENCHMARK: same as before, but with larger pattern")
host = j.Graph()
host.vtxs = [
j.Vertex('triangle'), # 0
j.Vertex('square'), # 1
j.Vertex('square'), # 2
j.Vertex('circle'), # 3
j.Vertex('circle'), # 4
j.Vertex('circle'), # 5
]
host.edges = [
# not a match:
j.Edge(host.vtxs[0], host.vtxs[5]),
j.Edge(host.vtxs[5], host.vtxs[0]),
# will be a match:
j.Edge(host.vtxs[1], host.vtxs[5]),
j.Edge(host.vtxs[5], host.vtxs[1]),
# noise:
j.Edge(host.vtxs[1], host.vtxs[2]),
# will be a match:
j.Edge(host.vtxs[2], host.vtxs[4]),
j.Edge(host.vtxs[4], host.vtxs[2]),
# noise:
j.Edge(host.vtxs[0], host.vtxs[1]),
j.Edge(host.vtxs[0], host.vtxs[3]),
j.Edge(host.vtxs[0], host.vtxs[0]),
j.Edge(host.vtxs[1], host.vtxs[1]),
# will be a match:
j.Edge(host.vtxs[3], host.vtxs[2]),
j.Edge(host.vtxs[2], host.vtxs[3]),
]
guest = j.Graph()
guest.vtxs = [
j.Vertex('square'), # 0
j.Vertex('circle'), # 1
j.Vertex('square')] # 2
guest.edges = [
j.Edge(guest.vtxs[0], guest.vtxs[1]),
j.Edge(guest.vtxs[1], guest.vtxs[0]),
j.Edge(guest.vtxs[2], guest.vtxs[0]),
]
# this time, only 2 matches
run_benchmark(host, guest, j_to_s(host), j_to_s(guest), expected=2)
#######################################################################
print("\nBENCHMARK: disconnected pattern")
host = j.Graph()
host.vtxs = [
j.Vertex('triangle'), # 0
j.Vertex('square'), # 1
j.Vertex('square'), # 2
j.Vertex('circle'), # 3
j.Vertex('circle'), # 4
j.Vertex('circle'), # 5
j.Vertex('bear'),
j.Vertex('bear'),
]
host.edges = [
# not a match:
j.Edge(host.vtxs[0], host.vtxs[5]),
j.Edge(host.vtxs[5], host.vtxs[0]),
# will be a match:
j.Edge(host.vtxs[1], host.vtxs[5]),
j.Edge(host.vtxs[5], host.vtxs[1]),
# noise:
j.Edge(host.vtxs[1], host.vtxs[2]),
# will be a match:
j.Edge(host.vtxs[2], host.vtxs[4]),
j.Edge(host.vtxs[4], host.vtxs[2]),
# noise:
j.Edge(host.vtxs[0], host.vtxs[1]),
j.Edge(host.vtxs[0], host.vtxs[3]),
j.Edge(host.vtxs[0], host.vtxs[0]),
j.Edge(host.vtxs[1], host.vtxs[1]),
# will be a match:
j.Edge(host.vtxs[3], host.vtxs[2]),
j.Edge(host.vtxs[2], host.vtxs[3]),
]
guest = j.Graph()
guest.vtxs = [
j.Vertex('square'), # 0
j.Vertex('circle'), # 1
j.Vertex('bear')]
guest.edges = [
j.Edge(guest.vtxs[0], guest.vtxs[1]),
j.Edge(guest.vtxs[1], guest.vtxs[0]),
]
# the 'bear' in our pattern can be matched with any of the two bears in the graph, effectively doubling the number of matches
run_benchmark(host, guest, j_to_s(host), j_to_s(guest), expected=6)
#######################################################################
print("\nBENCHMARK: larger graph")
shost, sguest = generator.get_large_host_and_guest()
run_benchmark(s_to_j(shost), s_to_j(sguest), shost, sguest)
#######################################################################
print("\nBENCHMARK: large random graph")
import random
random.seed(0)
shost, sguest = generator.get_random_host_and_guest(
nr_vtxs = 10,
nr_vtx_types = 0,
nr_edges = 20,
nr_edge_types = 0,
)
run_benchmark(s_to_j(shost), s_to_j(sguest), shost, sguest)

View file

@ -26,7 +26,6 @@ class PatternMatching(object):
Returns an occurrence of a given pattern from the given Graph
"""
def __init__(self, optimize=True):
# store the type of matching we want to use
self.optimize = optimize
def matchNaive(self, pattern, vertices, edges, pattern_vertices=None):
@ -34,17 +33,19 @@ class PatternMatching(object):
Try to find an occurrence of the pattern in the Graph naively.
"""
print('matchNaive...')
print('pattern:', pattern)
print('vertices:', vertices)
print('edges:', edges)
print('pattern_vertices:', pattern_vertices)
# print('matchNaive...')
# print('pattern.vertices:', pattern.vertices)
# print('pattern.edges:', pattern.edges)
# print('vertices:', vertices)
# print('edges:', edges)
# print('pattern_vertices:', pattern_vertices)
# allow call with specific arguments
if pattern_vertices == None:
pattern_vertices = pattern.vertices
def visitEdge(pattern_vertices, p_edge, inc, g_edges, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
# print('visitEdge')
"""
Visit a pattern edge, and try to bind it to a graph edge.
(If the first fails, try the second, and so on...)
@ -70,6 +71,7 @@ class PatternMatching(object):
return False
def visitEdges(pattern_vertices, p_edges, inc, g_edges, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
# print('visitEdges')
"""
Visit all edges of the pattern vertex (edges given as argument).
We need to try visiting them for all its permutations, as matching
@ -130,6 +132,7 @@ class PatternMatching(object):
return foundallEdges
def visitVertex(pattern_vertices, p_vertex, g_vertex, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
# print('visitVertex')
"""
Visit a pattern vertex, and try to bind it to the graph vertex
(both are given as argument). A binding is successful if all the
@ -153,6 +156,7 @@ class PatternMatching(object):
return False
def visitVertices(pattern_vertices, p_vertex, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
# print('visitVertices')
"""
Visit a pattern vertex and try to bind a graph vertex to it.
"""
@ -201,9 +205,9 @@ class PatternMatching(object):
"""
Return adjacency matrix and the order of the vertices.
"""
print('createAdjacencyMatrixMap...')
print('graph:', graph)
print('pattern:', pattern)
# print('createAdjacencyMatrixMap...')
# print('graph:', graph)
# print('pattern:', pattern)
matrix = collections.OrderedDict() # { vertex, (index, [has edge from index to pos?]) }
@ -253,9 +257,9 @@ class PatternMatching(object):
return AM, vertices_order
def matchVF2(self, pattern, graph):
print('matchVF2...')
print('pattern:', pattern)
print('graph:', graph)
# print('matchVF2...')
# print('pattern:', pattern)
# print('graph:', graph)
class VF2_Obj(object):
"""
@ -263,11 +267,12 @@ class PatternMatching(object):
"""
def __init__(self, len_graph_vertices, len_pattern_vertices):
# represents if n-the element (h[n] or p[n]) matched
self.core_graph = [False]*len_graph_vertices
self.core_pattern = [False]*len_pattern_vertices
self.host_vtx_is_matched = [False]*len_graph_vertices
self.pattern_vtx_is_matched = [False]*len_pattern_vertices
# save mapping from pattern to graph
self.mapping = {}
self.edge_mapping = {}
# preference lvl 1
# ordered set of vertices adjecent to M_graph connected via an outgoing edge
@ -361,51 +366,53 @@ class PatternMatching(object):
# add all neihgbours of pattern vertex m
for i in range(0, len(P)): # P is a nxn-matrix
if (P[m][i] or P[i][m]) and VF2_obj.core_pattern[i]:
if (P[m][i] or P[i][m]) and VF2_obj.pattern_vtx_is_matched[i]:
neighbours_pattern.setdefault(p[i].type, set()).add(p[i])
# add all neihgbours of graph vertex n
for i in range(0, len(H)): # P is a nxn-matrix
if (H[n][i] or H[i][n]) and VF2_obj.core_graph[i]:
if (H[n][i] or H[i][n]) and VF2_obj.host_vtx_is_matched[i]:
neighbours_graph.setdefault(h[i].type, set()).add(h[i])
# take a coding shortcut,
# use self.matchNaive function to see if it is feasable.
# this way, we immidiatly test the semantic attributes
if not self.matchNaive(pattern, pattern_vertices=neighbours_pattern, vertices=neighbours_graph, edges=graph.edges):
# print('pattern.vertices', pattern.vertices)
matched = self.matchNaive(pattern, pattern_vertices=neighbours_pattern, vertices=neighbours_graph, edges=graph.edges)
if matched == None:
return False
# count ext_edges from core_graph to a adjecent vertices and
# count ext_edges from host_vtx_is_matched to a adjecent vertices and
# cuotn ext_edges for adjecent vertices and not matched vertices
# connected via the ext_edges
ext_edges_graph_ca = 0
ext_edges_graph_an = 0
# for all core vertices
for x in range(0, len(VF2_obj.core_graph)):
for x in range(0, len(VF2_obj.host_vtx_is_matched)):
# for all its neighbours
for y in range(0, len(H)):
if H[x][y]:
# if it is a neighbor and not yet matched
if (VF2_obj.N_out_graph[y] != -1 or VF2_obj.N_inc_graph[y] != -1) and VF2_obj.core_graph[y]:
if (VF2_obj.N_out_graph[y] != -1 or VF2_obj.N_inc_graph[y] != -1) and VF2_obj.host_vtx_is_matched[y]:
# if we matched it
if VF2_obj.core_graph[x] != -1:
if VF2_obj.host_vtx_is_matched[x] != -1:
ext_edges_graph_ca += 1
else:
ext_edges_graph_an += 1
# count ext_edges from core_pattern to a adjecent vertices
# count ext_edges from pattern_vtx_is_matched to a adjecent vertices
# connected via the ext_edges
ext_edges_pattern_ca = 0
ext_edges_pattern_an = 0
# for all core vertices
for x in range(0, len(VF2_obj.core_pattern)):
for x in range(0, len(VF2_obj.pattern_vtx_is_matched)):
# for all its neighbours
for y in range(0, len(P)):
if P[x][y]:
# if it is a neighbor and not yet matched
if (VF2_obj.N_out_pattern[y] != -1 or VF2_obj.N_inc_pattern[y] != -1) and VF2_obj.core_pattern[y]:
if (VF2_obj.N_out_pattern[y] != -1 or VF2_obj.N_inc_pattern[y] != -1) and VF2_obj.pattern_vtx_is_matched[y]:
# if we matched it
if VF2_obj.core_pattern[x] != -1:
if VF2_obj.pattern_vtx_is_matched[x] != -1:
ext_edges_pattern_ca += 1
else:
ext_edges_pattern_an += 1
@ -425,9 +432,9 @@ class PatternMatching(object):
if ext_edges_pattern_an > ext_edges_graph_an:
return False
return True
return matched
def matchPhase(H, P, h, p, index_M, VF2_obj, n, m):
def matchPhase(index_M, VF2_obj, n, m):
"""
The matching fase of the VF2 algorithm. If the chosen n, m pair
passes the feasibilityTest, the pair gets added and we start
@ -448,12 +455,15 @@ class PatternMatching(object):
return False # already visited this (partial) match -> skip
if feasibilityTest(H, P, h, p, VF2_obj, n, m):
print(self.indent*" ","adding to match:", n, "->", m)
matched = feasibilityTest(H, P, h, p, VF2_obj, n, m)
if matched != False:
# print(self.indent*" ","adding to match:", n, "->", m)
# adapt VF2_obj
VF2_obj.core_graph[n] = True
VF2_obj.core_pattern[m] = True
VF2_obj.host_vtx_is_matched[n] = True
VF2_obj.pattern_vtx_is_matched[m] = True
VF2_obj.mapping[h[n]] = p[m]
# VF2_obj.edge_mapping
addOutNeighbours(H[n], VF2_obj.N_out_graph, index_M)
addIncNeighbours(H, n, VF2_obj.N_inc_graph, index_M)
addOutNeighbours(P[m], VF2_obj.N_out_pattern, index_M)
@ -475,11 +485,11 @@ class PatternMatching(object):
if True:
# else:
print(self.indent*" ","backtracking... remove", n, "->", m)
# print(self.indent*" ","backtracking... remove", n, "->", m)
# else, backtrack, adapt VF2_obj
VF2_obj.core_graph[n] = False
VF2_obj.core_pattern[m] = False
VF2_obj.host_vtx_is_matched[n] = False
VF2_obj.pattern_vtx_is_matched[m] = False
del VF2_obj.mapping[h[n]]
delNeighbours(VF2_obj.N_out_graph, index_M)
delNeighbours(VF2_obj.N_inc_graph, index_M)
@ -488,7 +498,7 @@ class PatternMatching(object):
return False
def preferred(H, P, h, p, index_M, VF2_obj, N_graph, N_pattern):
def preferred(index_M, VF2_obj, N_graph, N_pattern):
"""
Try to match the adjacency vertices connected via outgoing
or incoming edges. (Depending on what is given for N_graph and
@ -497,23 +507,23 @@ class PatternMatching(object):
for n in range(0, len(N_graph)):
# skip graph vertices that are not in VF2_obj.N_out_graph
# (or already matched)
if N_graph[n] == -1 or VF2_obj.core_graph[n]:
if N_graph[n] == -1 or VF2_obj.host_vtx_is_matched[n]:
# print(self.indent*" "," skipping")
continue
print(self.indent*" "," n:", n)
# print(self.indent*" "," n:", n)
for m in range(0, len(N_pattern)):
# skip graph vertices that are not in VF2_obj.N_out_pattern
# (or already matched)
if N_pattern[m] == -1 or VF2_obj.core_pattern[m]:
if N_pattern[m] == -1 or VF2_obj.pattern_vtx_is_matched[m]:
continue
print(self.indent*" "," m:", m)
matched = yield from matchPhase(H, P, h, p, index_M, VF2_obj, n, m)
# print(self.indent*" "," m:", m)
matched = yield from matchPhase(index_M, VF2_obj, n, m)
if matched:
return True
return False
def leastPreferred(H, P, h, p, index_M, VF2_obj):
def leastPreferred(index_M, VF2_obj):
"""
Try to match the vertices that are not connected to the curretly
matched vertices.
@ -521,28 +531,28 @@ class PatternMatching(object):
for n in range(0, len(VF2_obj.N_out_graph)):
# skip vertices that are connected to the graph
# (or already matched)
if not (VF2_obj.N_out_graph[n] == -1 and VF2_obj.N_inc_graph[n] == -1) or VF2_obj.core_graph[n]:
if not (VF2_obj.N_out_graph[n] == -1 and VF2_obj.N_inc_graph[n] == -1) or VF2_obj.host_vtx_is_matched[n]:
# print(self.indent*" "," skipping")
continue
print(" n:", n)
# print(" n:", n)
for m in range(0, len(VF2_obj.N_out_pattern)):
# skip vertices that are connected to the graph
# (or already matched)
if not (VF2_obj.N_out_pattern[m] == -1 and VF2_obj.N_inc_pattern[m] == -1) or VF2_obj.core_pattern[m]:
if not (VF2_obj.N_out_pattern[m] == -1 and VF2_obj.N_inc_pattern[m] == -1) or VF2_obj.pattern_vtx_is_matched[m]:
# print(self.indent*" "," skipping")
continue
print(self.indent*" "," m:", m)
matched = yield from matchPhase(H, P, h, p, index_M, VF2_obj, n, m)
# print(self.indent*" "," m:", m)
matched = yield from matchPhase(index_M, VF2_obj, n, m)
if matched:
return True
return False
print(self.indent*" ","index_M:", index_M)
# print(self.indent*" ","index_M:", index_M)
# We are at the end, we found an candidate.
if index_M == len(p):
print(self.indent*" ","end...")
# print(self.indent*" ","end...")
bound_graph_vertices = {}
for vertex_bound, _ in VF2_obj.mapping.items():
bound_graph_vertices.setdefault(vertex_bound.type, set()).add(vertex_bound)
@ -555,28 +565,28 @@ class PatternMatching(object):
if index_M > 0:
# try the candidates is the preffered order
# first try the adjacent vertices connected via the outgoing edges.
print(self.indent*" ","preferred L1")
matched = yield from preferred(H, P, h, p, index_M, VF2_obj, VF2_obj.N_out_graph, VF2_obj.N_out_pattern)
# print(self.indent*" ","preferred L1")
matched = yield from preferred(index_M, VF2_obj, VF2_obj.N_out_graph, VF2_obj.N_out_pattern)
if matched:
return True
print(self.indent*" ","preferred L2")
# print(self.indent*" ","preferred L2")
# then try the adjacent vertices connected via the incoming edges.
matched = yield from preferred(H, P, h, p, index_M, VF2_obj, VF2_obj.N_inc_graph, VF2_obj.N_inc_pattern)
matched = yield from preferred(index_M, VF2_obj, VF2_obj.N_inc_graph, VF2_obj.N_inc_pattern)
if matched:
return True
print(self.indent*" ","leastPreferred")
# print(self.indent*" ","leastPreferred")
# and lastly, try the vertices not connected to the currently matched vertices
matched = yield from leastPreferred(H, P, h, p, index_M, VF2_obj)
matched = yield from leastPreferred(index_M, VF2_obj)
if matched:
return True
return False
# create adjecency matrix of the graph
# create adjacency matrix of the graph
H, h = self.createAdjacencyMatrixMap(graph, pattern)
# create adjecency matrix of the pattern
# create adjacency matrix of the pattern
P, p = self.createAdjacencyMatrixMap(pattern, pattern)
VF2_obj = VF2_Obj(len(h), len(p))