Turn VF2 matcher into a generator that yields all matches

This commit is contained in:
Joeri Exelmans 2024-09-03 12:07:29 +02:00
parent 1320ea29e9
commit d2c996f4f7
2 changed files with 40 additions and 459 deletions

View file

@ -31,10 +31,10 @@ if __name__ == '__main__':
"""
The main function called when running from the command line.
"""
nr_of_vertices = 10
nr_of_diff_types_v = 0
nr_of_edges = 20
nr_of_diff_types_e = 0
nr_of_vertices = 50
nr_of_diff_types_v = 2
nr_of_edges = 150
nr_of_diff_types_e = 2
dv = [random.randint(0, nr_of_diff_types_v) for _ in range(nr_of_vertices)]
de = [random.randint(0, nr_of_diff_types_e) for _ in range(nr_of_edges)]
@ -47,10 +47,10 @@ if __name__ == '__main__':
# dc_inc = [ 0,25,18,47,22,25,16,45,38,25,5,45,15,44,17,46,6,17,35,8,16,29,48,47,25,34,4,20,24,1,47,44,8,25,32,3,16,6,33,21,6,13,41,10,17,25,21,33,31,30,5,4,45,26,16,42,12,25,29,3,32,30,14,26,11,13,7,13,3,43,43,22,48,37,20,28,15,40,19,33,43,16,49,36,11,25,9,42,3,22,16,40,42,44,27,30,1,18,10,35,19,6,9,43,37,38,45,19,41,14,37,45,0,31,29,31,24,20,44,46,8,45,43,3,38,38,35,12,19,45,7,34,20,28,12,17,45,17,35,49,20,21,49,1,35,38,38,36,33,30 ]
# dc_out = [ 9,2,49,49,37,33,16,21,5,46,4,15,9,6,14,22,16,33,23,21,15,31,37,23,47,3,30,26,35,9,29,21,39,32,22,43,5,9,41,30,31,30,37,33,31,34,23,22,34,26,44,36,38,33,48,5,9,34,13,7,48,41,43,26,26,7,12,6,12,28,22,8,29,22,24,27,16,4,31,41,32,15,19,20,38,0,26,18,43,46,40,17,29,14,34,14,32,17,32,47,16,45,7,4,35,22,42,11,38,2,0,29,4,38,17,44,9,23,5,10,31,17,1,11,16,5,37,27,35,32,45,16,18,1,14,4,42,24,43,31,21,38,6,34,39,46,20,1,38,47 ]
dv = [0, 1, 0, 1, 0]
de = [0, 0, 0]
dc_inc = [0, 2, 4]
dc_out = [1, 3, 3]
# dv = [0, 1, 0, 1, 0]
# de = [0, 0, 0]
# dc_inc = [0, 2, 4]
# dc_out = [1, 3, 3]
gg = GraphGenerator(dv, de, dc_inc, dc_out, debug)
@ -73,7 +73,7 @@ if __name__ == '__main__':
#PM = PatternMatching('SP')
# PM = PatternMatching('Ullmann')
PM = PatternMatching('VF2')
matches = PM.match(pattern, graph)
matches = [m for m in PM.matchVF2(pattern, graph)]
print("found", len(matches), "matches:", matches)
# regenerate graph, to show matched pattern

View file

@ -28,62 +28,17 @@ class PatternMatching(object):
"""
Returns an occurrence of a given pattern from the given Graph
"""
def __init__(self, matching_type='SP', optimize=True):
def __init__(self, optimize=True):
# store the type of matching we want to use
self.type = matching_type
self.bound_vertices = {} # saves the currently bound vertices
self.bound_edges = {} # saves the currently bound edges
self.result = None
self.previous = []
self.optimize = optimize
self.results = []
def match(self, pattern, graph):
"""
Call this function to find an occurrence of the pattern in the (host) graph.
Setting the type of matching (naive, SP, Ullmann, VF2) is done by
setting self.matching_type to its name.
"""
if not (isinstance(pattern, SearchGraph) or isinstance(pattern, Graph)):
raise TypeError('pattern must be a SearchGraph or Graph')
if not (isinstance(graph, SearchGraph) or isinstance(graph, Graph)):
raise TypeError('graph must be a SearchGraph or Graph')
self.pattern = pattern
self.graph = graph
if self.type == 'naive':
result = self.matchNaive(vertices=graph.vertices, edges=graph.edges)
elif self.type == 'SP':
result = self.matchSP()
elif self.type == 'Ullmann':
result = self.matchUllmann()
elif self.type == 'VF2':
result = self.matchVF2()
else:
raise ValueError('Unknown type for matching')
# cleanup
self.pattern = None
self.graph = None
self.bound_vertices = {}
self.bound_edges = {}
self.result = None
self.results = []
return result
def matchNaive(self, pattern_vertices=None, vertices=None, edges=None):
def matchNaive(self, pattern, vertices, edges, pattern_vertices=None):
"""
Try to find an occurrence of the pattern in the Graph naively.
"""
# allow call with specific arguments
if pattern_vertices == None:
pattern_vertices = self.pattern.vertices
if vertices == None:
vertices = self.bound_vertices
if edges == None:
edges = self.bound_edges
pattern_vertices = pattern.vertices
def visitEdge(pattern_vertices, p_edge, inc, g_edges, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
"""
@ -238,137 +193,7 @@ class PatternMatching(object):
else:
return None
def matchSP(self):
"""
Find an occurrence of the pattern in the Graph
by using the generated SearchPlan.
"""
if isinstance(self.graph, Graph):
sg = SearchGraph(self.graph)
elif isinstance(self.graph, SearchGraph):
sg = self.graph
else:
raise TypeError('Pattern matching with a SearchPlan must be given a Graph or SearchGraph')
pg = PlanGraph(self.pattern)
SP = pg.Edmonds(sg)
self.fileIndex = 0
def propConnected():
"""
Checks if the found vertices and edges can be uniquely matched
onto the pattern graph.
"""
self.result = self.matchNaive()
return self.result != None
def matchOP(elem, bound, ops, index):
"""
Execute a primitive operation, return whether ot not it succeeded.
"""
type_bound = bound.setdefault(elem.type, set())
# if elem not yet bound, bind it, and try matching the next operations
if elem not in type_bound:
type_bound.add(elem)
# if matching of next operation failed, try with a different elem
if matchAllOP(ops, index+1):
return True
else:
type_bound.remove(elem)
return False
def matchAllOP(ops, index=0):
"""
Try to match an occurrence of the pattern in the graph,
by recursivly ,atching elements that adhere to the SearchPlan
"""
# if we matched all elements,
# check if the bound elements are properly connected
if index == len(ops):
return propConnected()
op = ops[index]
if op[0] == PRIM_OP.lkp: # lkp(elem)
if op[2]: # lookup a vertex
# If the graph does not have a vertex of the same vertex
# type, we'll have to return False, happens if elems == [].
elems = self.graph.vertices.get(op[1], [])
bound = self.bound_vertices
else: # loopup an edge
# If the graph does not have an edge of the same edge
# type, we'll have to return False, happens if elems == [].
elems = self.graph.edges.get(op[1], [])
bound = self.bound_edges
# if elems == [], we'll skip the loop and return False
for elem in elems:
if matchOP(elem, bound, ops, index):
return True
# if all not bound elems fails, backtrack
return False
elif op[0] == PRIM_OP.src: # src(e): bind src of a bound edge e
# Should always succeed, as the edge must be already bound
# (there should be at least one elem in self.bound_edges[op[1]]).
for edge in self.bound_edges[op[1]]:
if matchOP(edge.src, self.bound_vertices, ops, index):
return True
# if all not bound elems fails, backtrack
return False
elif op[0] == PRIM_OP.tgt: # tgt(e): bind tgt of a bound edge e
# Should always succeed, as the edge must be already bound
# (there should be at least one elem in self.bound_edges[op[1]]).
for edge in self.bound_edges[op[1]]:
if matchOP(edge.tgt, self.bound_vertices, ops, index):
return True
# if all not bound elems fails, backtrack
return False
elif op[0] == PRIM_OP.inc: # in(v, e): bind incoming edge e of a bound vertex v
# It's possible we will try to find a vertex of a certain type
# in the bound_vertices which should be bound implicitly
# (by a src/tgt op), that is not bound. Happens when implicit
# binding bounded a "wrong" vertex. We then need to return False
# (happens by skiping for loop by looping over [])
for vertex in self.bound_vertices.get(op[1], []):
for edge in vertex.incoming_edges:
if edge.type == op[2]:
if matchOP(edge, self.bound_edges, ops, index):
return True
# if all not bound elems fails, backtrack
return False
elif op[0] == PRIM_OP.out: # out(v, e): bind outgoing edge e of a bound vertex v
# Return False if we expect an element to be bound that is not
# bound (for the same reason as the inc op).
for vertex in self.bound_vertices.get(op[1], []):
for edge in vertex.outgoing_edges:
if edge.type == op[2]:
if matchOP(edge, self.bound_edges, ops, index):
return True
# if all not bound elems fails, backtrack
return False
else:
raise TypeError('Unknown PRIM_OP type')
# try and match all (primitive) operations from the SearchPlan
matchAllOP(SP)
# Either nothing is found, or we found an occurrence,
# it is impossble to have a partionally matched occurrence
for key, bound_elems in self.bound_vertices.items():
if len(bound_elems) == 0:
# The pattern does not exist in the Graph
return None
else:
# We found a pattern
return self.result
def createAdjacencyMatrixMap(self, graph):
def createAdjacencyMatrixMap(self, graph, pattern):
"""
Return adjacency matrix and the order of the vertices.
"""
@ -380,7 +205,7 @@ class PatternMatching(object):
if self.optimize:
# insert only the vertices from the graph which have a type
# that is present in the pattern
for vertex_type, _ in self.pattern.vertices.items():
for vertex_type, _ in pattern.vertices.items():
graph_vertices = graph.vertices.get(vertex_type)
if graph_vertices != None:
allVertices.extend(graph_vertices)
@ -419,255 +244,7 @@ class PatternMatching(object):
return AM, vertices_order
def matchUllmann(self):
"""
Find an occurrence of the pattern in the Graph
by using Ullmann for solving the Constraint Satisfaction Problem (CSP).
"""
def createM_star(h, p):
"""
Create M*[v, w] = 1 if deg(v) <= deg(w), for v in V_P, w in V_H
= 0 otherwise
M and P are given to ensure corect order.
"""
m = [] # [[..], ...]
for p_vertex in p:
row = []
for g_vertex in h:
# for the degree function, we choose to look at the
# outgoing edges AND the incoming edges
# (one might prefer to use only one of them)
if self.optimize:
# also check if type matches
if p_vertex.type != g_vertex.type:
row.append(False)
continue
row.append( len(p_vertex.incoming_edges) <=
len(g_vertex.incoming_edges) and
len(p_vertex.outgoing_edges) <=
len(g_vertex.outgoing_edges))
m.append(row)
return m
def createDecreasingOrder(h):
"""
It turns out that the more edges a vertex has, the sooner it will
fail in matching the pattern. For efficiency reasons, we want it
to fail as fast as possible.
"""
order = [] # [(value, index), ...]
index = 0
for g_vertex in h:
order.append(( len(g_vertex.outgoing_edges) +
len(g_vertex.outgoing_edges), index))
index += 1
order.sort(key = lambda elem: elem[0])
# sort and only return the indices (which specify the order)
return [index for (_, index) in order]
def propConnected(M, H, P, h, p):
"""
Checks if the vertices represented in M are isomorphic to P and if
they can be matched onto the pattern graph.
"""
print(M, H, P, h, p)
# P_candi = np.dot(M, np.transpose(np.dot(M, H)))
"""
# If we do not aply the refineM function, we will want to check if
# this succeeds, as it checks for isomorphism.
# If we apply the refineM function, it is garanteed to be isomorphic.
index_column = 0
for row in P_candi:
index_row = 0
for item in row:
# for all i,j: P[i, j] = 1 : M(MH)^T [j, i] = 1
# (not the other way around)
# (return False when item is 0 and P[i,j] is 1)
if item < P[index_row][index_column]:
return False
index_row += 1
index_column += 1
"""
vertices = {}
index_column = 0
for row in M:
index_row = 0
for item in row:
# there should only be one item per row
if item:
vertex = h[index_row]
vertices.setdefault(vertex.type, set()).add(vertex)
break
index_row += 1
index_column += 1
self.result = self.matchNaive(vertices=vertices, edges=self.graph.edges)
return self.result != None
def refineM(M, H, P, h, pp):
"""
Refine M, for every vertex from the pattern, check if each possible
matching (candidate) his neighbours can also be matched. (M's column
represents vertices from P, and the row represents its candidate.)
If this is not possible set M[i,j] to false, refining/reducing the
search space.
"""
any_changes=True
while any_changes:
any_changes = False
# for all vertices from the pattern
for i in range(0, len(P)): # P is a nxn-matrix
# for all its possible assignments
for j in range(0, len(H[0])):
# if bound vertex of P, check if all neigbours are matchable
if M[i][j]:
# for all the pattern his neighbours
for k in range(0, len(P)):
# if it is a neighbour (from outgoing edges)
if P[i][k]:
match = False
for p in range(0, len(H[0])):
# check if we can match a candidate neighbour
# (from M* to to the graph (H))
if M[k][p] and H[j][p]:
if self.optimize:
# also check correct type
if pp[k].type != h[p].type:
continue
match = True
break
if not match:
M[i][j] = False
any_changes = True
# if it is a neighbour (from incoming edges)
if P[k][i]:
match = False
for p in range(0, len(H[0])):
# check if we can match a candidate neighbour
# (from M* to to the graph (H))
if M[k][p] and H[p][j]:
if self.optimize:
# also check correct type
if pp[i].type != h[j].type:
continue
match = True
break
if not match:
M[i][j] = False
any_changes = True
def findM(M_star, M, order, H, P, h, p, index_M=0):
"""
Find an isomorphic mapping for the vertices of P to H.
This mapping is represented by a matrix M if,
and only if M(MH)^T = P^T.
"""
# We are at the end, we found an candidate.
# Remember that we are at the end, bu first check if there is
# a row with ony False, if so, we do not need to check if it is
# properly connected.
check_prop = False
if index_M == len(M):
check_prop = True
index_M -= 1
# we need to refer to this row
old_row = M_star[index_M]
# previous rows (these are sparse, 1 per row, save only its position)
prev_pos = []
for i in range(0, index_M):
row = M[i]
only_false = True
for j in range(0, len(old_row)):
if row[j]:
only_false = False
prev_pos.append(j)
break
if only_false:
# check if a row with only False occurs,
# if so, we will not find an occurence
return False
# We are at the end, we found an candidate.
if check_prop:
index_M += 1
return propConnected(M, H, P, h, p)
M[index_M] = [False] * len(old_row)
index_order = 0
for index_order in range(0, len(order)):
index_row = order[index_order]
# put previous True back on False
if index_order > 0:
M[index_M][order[index_order - 1]] = False
if old_row[index_row]:
M[index_M][index_row] = True
findMPart = True
# 1 0 0 Assume 3th round, and we select x,
# 0 1 0 no element at the same possition in the row,
# 0 x 0 of the elements above itselve in the same
# column may be 1. In the example it is, then try
# selecting an other element.
for index_column in range(0, index_M):
if M[index_column][index_row]:
findMPart = False
break
if not findMPart:
continue
refineM(M, H, P, h, p)
if findM(M_star, M, order, H, P, h, p, index_M + 1):
return True
# reset previous rows their True's
prev_row = 0
for pos in prev_pos:
M[prev_row][pos] = True
prev_row += 1
# reset rows below current row
for index_column in range(index_M + 1, len(M)):
# deep copy, we do not want to just copy pointer to array/list
M[index_column] = M_star[index_column][:]
# reset current row (the rest is already reset)
M[index_M] = M_star[index_M][:]
return False
# create adjecency matrix of the graph
H, h = self.createAdjacencyMatrixMap(self.graph)
# create adjecency matrix of the pattern
P, p = self.createAdjacencyMatrixMap(self.pattern)
# create M* binary matrix
M_star = createM_star(h, p)
# create the order we will use later on
order = createDecreasingOrder(h)
# deepcopy M_s into M
M = [row[:] for row in M_star]
if self.optimize:
refineM(M, H, P, h, p)
findM(M_star, M, order, H, P, h, p)
return self.result
def matchVF2(self):
def matchVF2(self, pattern, graph):
class VF2_Obj(object):
"""
@ -784,7 +361,7 @@ class PatternMatching(object):
# take a coding shortcut,
# use self.matchNaive function to see if it is feasable.
# this way, we immidiatly test the semantic attributes
if not self.matchNaive(pattern_vertices=neighbours_pattern, vertices=neighbours_graph, edges=self.graph.edges):
if not self.matchNaive(pattern, pattern_vertices=neighbours_pattern, vertices=neighbours_graph, edges=graph.edges):
return False
# count ext_edges from core_graph to a adjecent vertices and
@ -878,9 +455,11 @@ class PatternMatching(object):
# print(self.alreadyVisited)
self.indent += 1
if findM(H, P, h, p, VF2_obj, index_M + 1):
matched = yield from findM(H, P, h, p, VF2_obj, index_M + 1)
if matched:
# return True
print(self.indent*" ","found match", len(self.results), ", continuing...")
# print(self.indent*" ","found match", len(self.results), ", continuing...")
pass
self.indent -= 1
if True:
@ -917,7 +496,8 @@ class PatternMatching(object):
if N_pattern[m] == -1 or VF2_obj.core_pattern[m]:
continue
print(self.indent*" "," m:", m)
if matchPhase(H, P, h, p, index_M, VF2_obj, n, m):
matched = yield from matchPhase(H, P, h, p, index_M, VF2_obj, n, m)
if matched:
return True
return False
@ -941,7 +521,8 @@ class PatternMatching(object):
# print(self.indent*" "," skipping")
continue
print(self.indent*" "," m:", m)
if matchPhase(H, P, h, p, index_M, VF2_obj, n, m):
matched = yield from matchPhase(H, P, h, p, index_M, VF2_obj, n, m)
if matched:
return True
return False
@ -955,43 +536,45 @@ class PatternMatching(object):
for vertex_bound, _ in VF2_obj.mapping.items():
bound_graph_vertices.setdefault(vertex_bound.type, set()).add(vertex_bound)
self.result = self.matchNaive(vertices=bound_graph_vertices, edges=self.graph.edges)
if self.result != None:
self.results.append(self.result)
return self.result != None
result = self.matchNaive(pattern, vertices=bound_graph_vertices, edges=graph.edges)
if result != None:
yield result
return result != None
if index_M > 0:
# try the candidates is the preffered order
# first try the adjacent vertices connected via the outgoing edges.
print(self.indent*" ","preferred L1")
if preferred(H, P, h, p, index_M, VF2_obj, VF2_obj.N_out_graph, VF2_obj.N_out_pattern):
matched = yield from preferred(H, P, h, p, index_M, VF2_obj, VF2_obj.N_out_graph, VF2_obj.N_out_pattern)
if matched:
return True
print(self.indent*" ","preferred L2")
# then try the adjacent vertices connected via the incoming edges.
if preferred(H, P, h, p, index_M, VF2_obj, VF2_obj.N_inc_graph, VF2_obj.N_inc_pattern):
matched = yield from preferred(H, P, h, p, index_M, VF2_obj, VF2_obj.N_inc_graph, VF2_obj.N_inc_pattern)
if matched:
return True
print(self.indent*" ","leastPreferred")
# and lastly, try the vertices not connected to the currently matched vertices
if leastPreferred(H, P, h, p, index_M, VF2_obj):
matched = yield from leastPreferred(H, P, h, p, index_M, VF2_obj)
if matched:
return True
return False
print("graph:", graph)
# create adjecency matrix of the graph
H, h = self.createAdjacencyMatrixMap(self.graph)
print("adjacency:", H)
print("h:", len(h))
H, h = self.createAdjacencyMatrixMap(graph, pattern)
# create adjecency matrix of the pattern
P, p = self.createAdjacencyMatrixMap(self.pattern)
P, p = self.createAdjacencyMatrixMap(pattern, pattern)
VF2_obj = VF2_Obj(len(h), len(p))
self.indent = 0
# Only for debugging:
self.indent = 0
self.reverseMapH = { h[i] : i for i in range(len(h))}
self.reverseMapP = { p[i] : i for i in range(len(p))}
@ -999,6 +582,4 @@ class PatternMatching(object):
# Encoded as a mapping from match size to the partial match
self.alreadyVisited = set()
findM(H, P, h, p, VF2_obj)
return self.results
yield from findM(H, P, h, p, VF2_obj)