Turn VF2 matcher into a generator that yields all matches
This commit is contained in:
parent
1320ea29e9
commit
d2c996f4f7
2 changed files with 40 additions and 459 deletions
|
|
@ -28,62 +28,17 @@ class PatternMatching(object):
|
|||
"""
|
||||
Returns an occurrence of a given pattern from the given Graph
|
||||
"""
|
||||
def __init__(self, matching_type='SP', optimize=True):
|
||||
def __init__(self, optimize=True):
|
||||
# store the type of matching we want to use
|
||||
self.type = matching_type
|
||||
self.bound_vertices = {} # saves the currently bound vertices
|
||||
self.bound_edges = {} # saves the currently bound edges
|
||||
self.result = None
|
||||
self.previous = []
|
||||
self.optimize = optimize
|
||||
self.results = []
|
||||
|
||||
def match(self, pattern, graph):
|
||||
"""
|
||||
Call this function to find an occurrence of the pattern in the (host) graph.
|
||||
Setting the type of matching (naive, SP, Ullmann, VF2) is done by
|
||||
setting self.matching_type to its name.
|
||||
"""
|
||||
if not (isinstance(pattern, SearchGraph) or isinstance(pattern, Graph)):
|
||||
raise TypeError('pattern must be a SearchGraph or Graph')
|
||||
if not (isinstance(graph, SearchGraph) or isinstance(graph, Graph)):
|
||||
raise TypeError('graph must be a SearchGraph or Graph')
|
||||
|
||||
self.pattern = pattern
|
||||
self.graph = graph
|
||||
|
||||
if self.type == 'naive':
|
||||
result = self.matchNaive(vertices=graph.vertices, edges=graph.edges)
|
||||
elif self.type == 'SP':
|
||||
result = self.matchSP()
|
||||
elif self.type == 'Ullmann':
|
||||
result = self.matchUllmann()
|
||||
elif self.type == 'VF2':
|
||||
result = self.matchVF2()
|
||||
else:
|
||||
raise ValueError('Unknown type for matching')
|
||||
|
||||
# cleanup
|
||||
self.pattern = None
|
||||
self.graph = None
|
||||
self.bound_vertices = {}
|
||||
self.bound_edges = {}
|
||||
self.result = None
|
||||
self.results = []
|
||||
|
||||
return result
|
||||
|
||||
def matchNaive(self, pattern_vertices=None, vertices=None, edges=None):
|
||||
def matchNaive(self, pattern, vertices, edges, pattern_vertices=None):
|
||||
"""
|
||||
Try to find an occurrence of the pattern in the Graph naively.
|
||||
"""
|
||||
# allow call with specific arguments
|
||||
if pattern_vertices == None:
|
||||
pattern_vertices = self.pattern.vertices
|
||||
if vertices == None:
|
||||
vertices = self.bound_vertices
|
||||
if edges == None:
|
||||
edges = self.bound_edges
|
||||
pattern_vertices = pattern.vertices
|
||||
|
||||
def visitEdge(pattern_vertices, p_edge, inc, g_edges, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges):
|
||||
"""
|
||||
|
|
@ -238,137 +193,7 @@ class PatternMatching(object):
|
|||
else:
|
||||
return None
|
||||
|
||||
def matchSP(self):
|
||||
"""
|
||||
Find an occurrence of the pattern in the Graph
|
||||
by using the generated SearchPlan.
|
||||
"""
|
||||
if isinstance(self.graph, Graph):
|
||||
sg = SearchGraph(self.graph)
|
||||
elif isinstance(self.graph, SearchGraph):
|
||||
sg = self.graph
|
||||
else:
|
||||
raise TypeError('Pattern matching with a SearchPlan must be given a Graph or SearchGraph')
|
||||
|
||||
pg = PlanGraph(self.pattern)
|
||||
SP = pg.Edmonds(sg)
|
||||
|
||||
self.fileIndex = 0
|
||||
|
||||
def propConnected():
|
||||
"""
|
||||
Checks if the found vertices and edges can be uniquely matched
|
||||
onto the pattern graph.
|
||||
"""
|
||||
self.result = self.matchNaive()
|
||||
return self.result != None
|
||||
|
||||
def matchOP(elem, bound, ops, index):
|
||||
"""
|
||||
Execute a primitive operation, return whether ot not it succeeded.
|
||||
"""
|
||||
type_bound = bound.setdefault(elem.type, set())
|
||||
# if elem not yet bound, bind it, and try matching the next operations
|
||||
if elem not in type_bound:
|
||||
type_bound.add(elem)
|
||||
# if matching of next operation failed, try with a different elem
|
||||
if matchAllOP(ops, index+1):
|
||||
return True
|
||||
else:
|
||||
type_bound.remove(elem)
|
||||
return False
|
||||
|
||||
def matchAllOP(ops, index=0):
|
||||
"""
|
||||
Try to match an occurrence of the pattern in the graph,
|
||||
by recursivly ,atching elements that adhere to the SearchPlan
|
||||
"""
|
||||
# if we matched all elements,
|
||||
# check if the bound elements are properly connected
|
||||
if index == len(ops):
|
||||
return propConnected()
|
||||
|
||||
op = ops[index]
|
||||
|
||||
if op[0] == PRIM_OP.lkp: # lkp(elem)
|
||||
if op[2]: # lookup a vertex
|
||||
# If the graph does not have a vertex of the same vertex
|
||||
# type, we'll have to return False, happens if elems == [].
|
||||
elems = self.graph.vertices.get(op[1], [])
|
||||
bound = self.bound_vertices
|
||||
else: # loopup an edge
|
||||
# If the graph does not have an edge of the same edge
|
||||
# type, we'll have to return False, happens if elems == [].
|
||||
elems = self.graph.edges.get(op[1], [])
|
||||
bound = self.bound_edges
|
||||
|
||||
# if elems == [], we'll skip the loop and return False
|
||||
for elem in elems:
|
||||
if matchOP(elem, bound, ops, index):
|
||||
return True
|
||||
# if all not bound elems fails, backtrack
|
||||
return False
|
||||
|
||||
elif op[0] == PRIM_OP.src: # src(e): bind src of a bound edge e
|
||||
# Should always succeed, as the edge must be already bound
|
||||
# (there should be at least one elem in self.bound_edges[op[1]]).
|
||||
for edge in self.bound_edges[op[1]]:
|
||||
if matchOP(edge.src, self.bound_vertices, ops, index):
|
||||
return True
|
||||
# if all not bound elems fails, backtrack
|
||||
return False
|
||||
|
||||
elif op[0] == PRIM_OP.tgt: # tgt(e): bind tgt of a bound edge e
|
||||
# Should always succeed, as the edge must be already bound
|
||||
# (there should be at least one elem in self.bound_edges[op[1]]).
|
||||
for edge in self.bound_edges[op[1]]:
|
||||
if matchOP(edge.tgt, self.bound_vertices, ops, index):
|
||||
return True
|
||||
# if all not bound elems fails, backtrack
|
||||
return False
|
||||
|
||||
elif op[0] == PRIM_OP.inc: # in(v, e): bind incoming edge e of a bound vertex v
|
||||
# It's possible we will try to find a vertex of a certain type
|
||||
# in the bound_vertices which should be bound implicitly
|
||||
# (by a src/tgt op), that is not bound. Happens when implicit
|
||||
# binding bounded a "wrong" vertex. We then need to return False
|
||||
# (happens by skiping for loop by looping over [])
|
||||
for vertex in self.bound_vertices.get(op[1], []):
|
||||
for edge in vertex.incoming_edges:
|
||||
if edge.type == op[2]:
|
||||
if matchOP(edge, self.bound_edges, ops, index):
|
||||
return True
|
||||
# if all not bound elems fails, backtrack
|
||||
return False
|
||||
|
||||
elif op[0] == PRIM_OP.out: # out(v, e): bind outgoing edge e of a bound vertex v
|
||||
# Return False if we expect an element to be bound that is not
|
||||
# bound (for the same reason as the inc op).
|
||||
for vertex in self.bound_vertices.get(op[1], []):
|
||||
for edge in vertex.outgoing_edges:
|
||||
if edge.type == op[2]:
|
||||
if matchOP(edge, self.bound_edges, ops, index):
|
||||
return True
|
||||
# if all not bound elems fails, backtrack
|
||||
return False
|
||||
else:
|
||||
raise TypeError('Unknown PRIM_OP type')
|
||||
|
||||
# try and match all (primitive) operations from the SearchPlan
|
||||
matchAllOP(SP)
|
||||
|
||||
# Either nothing is found, or we found an occurrence,
|
||||
# it is impossble to have a partionally matched occurrence
|
||||
for key, bound_elems in self.bound_vertices.items():
|
||||
if len(bound_elems) == 0:
|
||||
# The pattern does not exist in the Graph
|
||||
return None
|
||||
else:
|
||||
# We found a pattern
|
||||
return self.result
|
||||
|
||||
|
||||
def createAdjacencyMatrixMap(self, graph):
|
||||
def createAdjacencyMatrixMap(self, graph, pattern):
|
||||
"""
|
||||
Return adjacency matrix and the order of the vertices.
|
||||
"""
|
||||
|
|
@ -380,7 +205,7 @@ class PatternMatching(object):
|
|||
if self.optimize:
|
||||
# insert only the vertices from the graph which have a type
|
||||
# that is present in the pattern
|
||||
for vertex_type, _ in self.pattern.vertices.items():
|
||||
for vertex_type, _ in pattern.vertices.items():
|
||||
graph_vertices = graph.vertices.get(vertex_type)
|
||||
if graph_vertices != None:
|
||||
allVertices.extend(graph_vertices)
|
||||
|
|
@ -419,255 +244,7 @@ class PatternMatching(object):
|
|||
|
||||
return AM, vertices_order
|
||||
|
||||
def matchUllmann(self):
|
||||
"""
|
||||
Find an occurrence of the pattern in the Graph
|
||||
by using Ullmann for solving the Constraint Satisfaction Problem (CSP).
|
||||
"""
|
||||
|
||||
def createM_star(h, p):
|
||||
"""
|
||||
Create M*[v, w] = 1 if deg(v) <= deg(w), for v in V_P, w in V_H
|
||||
= 0 otherwise
|
||||
|
||||
M and P are given to ensure corect order.
|
||||
"""
|
||||
m = [] # [[..], ...]
|
||||
for p_vertex in p:
|
||||
row = []
|
||||
for g_vertex in h:
|
||||
# for the degree function, we choose to look at the
|
||||
# outgoing edges AND the incoming edges
|
||||
# (one might prefer to use only one of them)
|
||||
if self.optimize:
|
||||
# also check if type matches
|
||||
if p_vertex.type != g_vertex.type:
|
||||
row.append(False)
|
||||
continue
|
||||
row.append( len(p_vertex.incoming_edges) <=
|
||||
len(g_vertex.incoming_edges) and
|
||||
len(p_vertex.outgoing_edges) <=
|
||||
len(g_vertex.outgoing_edges))
|
||||
m.append(row)
|
||||
|
||||
return m
|
||||
|
||||
def createDecreasingOrder(h):
|
||||
"""
|
||||
It turns out that the more edges a vertex has, the sooner it will
|
||||
fail in matching the pattern. For efficiency reasons, we want it
|
||||
to fail as fast as possible.
|
||||
"""
|
||||
order = [] # [(value, index), ...]
|
||||
index = 0
|
||||
for g_vertex in h:
|
||||
order.append(( len(g_vertex.outgoing_edges) +
|
||||
len(g_vertex.outgoing_edges), index))
|
||||
index += 1
|
||||
|
||||
order.sort(key = lambda elem: elem[0])
|
||||
# sort and only return the indices (which specify the order)
|
||||
return [index for (_, index) in order]
|
||||
|
||||
def propConnected(M, H, P, h, p):
|
||||
"""
|
||||
Checks if the vertices represented in M are isomorphic to P and if
|
||||
they can be matched onto the pattern graph.
|
||||
"""
|
||||
print(M, H, P, h, p)
|
||||
# P_candi = np.dot(M, np.transpose(np.dot(M, H)))
|
||||
|
||||
|
||||
"""
|
||||
# If we do not aply the refineM function, we will want to check if
|
||||
# this succeeds, as it checks for isomorphism.
|
||||
# If we apply the refineM function, it is garanteed to be isomorphic.
|
||||
|
||||
index_column = 0
|
||||
for row in P_candi:
|
||||
index_row = 0
|
||||
for item in row:
|
||||
# for all i,j: P[i, j] = 1 : M(MH)^T [j, i] = 1
|
||||
# (not the other way around)
|
||||
# (return False when item is 0 and P[i,j] is 1)
|
||||
if item < P[index_row][index_column]:
|
||||
return False
|
||||
index_row += 1
|
||||
index_column += 1
|
||||
"""
|
||||
|
||||
vertices = {}
|
||||
index_column = 0
|
||||
for row in M:
|
||||
index_row = 0
|
||||
for item in row:
|
||||
# there should only be one item per row
|
||||
if item:
|
||||
vertex = h[index_row]
|
||||
vertices.setdefault(vertex.type, set()).add(vertex)
|
||||
break
|
||||
index_row += 1
|
||||
index_column += 1
|
||||
|
||||
self.result = self.matchNaive(vertices=vertices, edges=self.graph.edges)
|
||||
return self.result != None
|
||||
|
||||
def refineM(M, H, P, h, pp):
|
||||
"""
|
||||
Refine M, for every vertex from the pattern, check if each possible
|
||||
matching (candidate) his neighbours can also be matched. (M's column
|
||||
represents vertices from P, and the row represents its candidate.)
|
||||
If this is not possible set M[i,j] to false, refining/reducing the
|
||||
search space.
|
||||
"""
|
||||
any_changes=True
|
||||
while any_changes:
|
||||
any_changes = False
|
||||
# for all vertices from the pattern
|
||||
for i in range(0, len(P)): # P is a nxn-matrix
|
||||
# for all its possible assignments
|
||||
for j in range(0, len(H[0])):
|
||||
# if bound vertex of P, check if all neigbours are matchable
|
||||
if M[i][j]:
|
||||
# for all the pattern his neighbours
|
||||
for k in range(0, len(P)):
|
||||
# if it is a neighbour (from outgoing edges)
|
||||
if P[i][k]:
|
||||
match = False
|
||||
for p in range(0, len(H[0])):
|
||||
# check if we can match a candidate neighbour
|
||||
# (from M* to to the graph (H))
|
||||
if M[k][p] and H[j][p]:
|
||||
if self.optimize:
|
||||
# also check correct type
|
||||
if pp[k].type != h[p].type:
|
||||
continue
|
||||
match = True
|
||||
break
|
||||
if not match:
|
||||
M[i][j] = False
|
||||
any_changes = True
|
||||
|
||||
# if it is a neighbour (from incoming edges)
|
||||
if P[k][i]:
|
||||
match = False
|
||||
for p in range(0, len(H[0])):
|
||||
# check if we can match a candidate neighbour
|
||||
# (from M* to to the graph (H))
|
||||
if M[k][p] and H[p][j]:
|
||||
if self.optimize:
|
||||
# also check correct type
|
||||
if pp[i].type != h[j].type:
|
||||
continue
|
||||
match = True
|
||||
break
|
||||
if not match:
|
||||
M[i][j] = False
|
||||
any_changes = True
|
||||
|
||||
def findM(M_star, M, order, H, P, h, p, index_M=0):
|
||||
"""
|
||||
Find an isomorphic mapping for the vertices of P to H.
|
||||
This mapping is represented by a matrix M if,
|
||||
and only if M(MH)^T = P^T.
|
||||
"""
|
||||
# We are at the end, we found an candidate.
|
||||
# Remember that we are at the end, bu first check if there is
|
||||
# a row with ony False, if so, we do not need to check if it is
|
||||
# properly connected.
|
||||
check_prop = False
|
||||
if index_M == len(M):
|
||||
check_prop = True
|
||||
index_M -= 1
|
||||
|
||||
# we need to refer to this row
|
||||
old_row = M_star[index_M]
|
||||
# previous rows (these are sparse, 1 per row, save only its position)
|
||||
prev_pos = []
|
||||
for i in range(0, index_M):
|
||||
row = M[i]
|
||||
only_false = True
|
||||
for j in range(0, len(old_row)):
|
||||
if row[j]:
|
||||
only_false = False
|
||||
prev_pos.append(j)
|
||||
break
|
||||
if only_false:
|
||||
# check if a row with only False occurs,
|
||||
# if so, we will not find an occurence
|
||||
return False
|
||||
|
||||
# We are at the end, we found an candidate.
|
||||
if check_prop:
|
||||
index_M += 1
|
||||
return propConnected(M, H, P, h, p)
|
||||
|
||||
M[index_M] = [False] * len(old_row)
|
||||
index_order = 0
|
||||
for index_order in range(0, len(order)):
|
||||
index_row = order[index_order]
|
||||
# put previous True back on False
|
||||
if index_order > 0:
|
||||
M[index_M][order[index_order - 1]] = False
|
||||
|
||||
if old_row[index_row]:
|
||||
M[index_M][index_row] = True
|
||||
|
||||
findMPart = True
|
||||
# 1 0 0 Assume 3th round, and we select x,
|
||||
# 0 1 0 no element at the same possition in the row,
|
||||
# 0 x 0 of the elements above itselve in the same
|
||||
# column may be 1. In the example it is, then try
|
||||
# selecting an other element.
|
||||
for index_column in range(0, index_M):
|
||||
if M[index_column][index_row]:
|
||||
findMPart = False
|
||||
break
|
||||
|
||||
if not findMPart:
|
||||
continue
|
||||
|
||||
refineM(M, H, P, h, p)
|
||||
|
||||
if findM(M_star, M, order, H, P, h, p, index_M + 1):
|
||||
return True
|
||||
|
||||
# reset previous rows their True's
|
||||
prev_row = 0
|
||||
for pos in prev_pos:
|
||||
M[prev_row][pos] = True
|
||||
prev_row += 1
|
||||
# reset rows below current row
|
||||
for index_column in range(index_M + 1, len(M)):
|
||||
# deep copy, we do not want to just copy pointer to array/list
|
||||
M[index_column] = M_star[index_column][:]
|
||||
|
||||
# reset current row (the rest is already reset)
|
||||
M[index_M] = M_star[index_M][:]
|
||||
|
||||
return False
|
||||
|
||||
# create adjecency matrix of the graph
|
||||
H, h = self.createAdjacencyMatrixMap(self.graph)
|
||||
# create adjecency matrix of the pattern
|
||||
P, p = self.createAdjacencyMatrixMap(self.pattern)
|
||||
# create M* binary matrix
|
||||
M_star = createM_star(h, p)
|
||||
|
||||
# create the order we will use later on
|
||||
order = createDecreasingOrder(h)
|
||||
# deepcopy M_s into M
|
||||
M = [row[:] for row in M_star]
|
||||
|
||||
if self.optimize:
|
||||
refineM(M, H, P, h, p)
|
||||
|
||||
findM(M_star, M, order, H, P, h, p)
|
||||
|
||||
return self.result
|
||||
|
||||
|
||||
def matchVF2(self):
|
||||
def matchVF2(self, pattern, graph):
|
||||
|
||||
class VF2_Obj(object):
|
||||
"""
|
||||
|
|
@ -784,7 +361,7 @@ class PatternMatching(object):
|
|||
# take a coding shortcut,
|
||||
# use self.matchNaive function to see if it is feasable.
|
||||
# this way, we immidiatly test the semantic attributes
|
||||
if not self.matchNaive(pattern_vertices=neighbours_pattern, vertices=neighbours_graph, edges=self.graph.edges):
|
||||
if not self.matchNaive(pattern, pattern_vertices=neighbours_pattern, vertices=neighbours_graph, edges=graph.edges):
|
||||
return False
|
||||
|
||||
# count ext_edges from core_graph to a adjecent vertices and
|
||||
|
|
@ -878,9 +455,11 @@ class PatternMatching(object):
|
|||
# print(self.alreadyVisited)
|
||||
|
||||
self.indent += 1
|
||||
if findM(H, P, h, p, VF2_obj, index_M + 1):
|
||||
matched = yield from findM(H, P, h, p, VF2_obj, index_M + 1)
|
||||
if matched:
|
||||
# return True
|
||||
print(self.indent*" ","found match", len(self.results), ", continuing...")
|
||||
# print(self.indent*" ","found match", len(self.results), ", continuing...")
|
||||
pass
|
||||
self.indent -= 1
|
||||
|
||||
if True:
|
||||
|
|
@ -917,7 +496,8 @@ class PatternMatching(object):
|
|||
if N_pattern[m] == -1 or VF2_obj.core_pattern[m]:
|
||||
continue
|
||||
print(self.indent*" "," m:", m)
|
||||
if matchPhase(H, P, h, p, index_M, VF2_obj, n, m):
|
||||
matched = yield from matchPhase(H, P, h, p, index_M, VF2_obj, n, m)
|
||||
if matched:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
|
@ -941,7 +521,8 @@ class PatternMatching(object):
|
|||
# print(self.indent*" "," skipping")
|
||||
continue
|
||||
print(self.indent*" "," m:", m)
|
||||
if matchPhase(H, P, h, p, index_M, VF2_obj, n, m):
|
||||
matched = yield from matchPhase(H, P, h, p, index_M, VF2_obj, n, m)
|
||||
if matched:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
|
@ -955,50 +536,50 @@ class PatternMatching(object):
|
|||
for vertex_bound, _ in VF2_obj.mapping.items():
|
||||
bound_graph_vertices.setdefault(vertex_bound.type, set()).add(vertex_bound)
|
||||
|
||||
self.result = self.matchNaive(vertices=bound_graph_vertices, edges=self.graph.edges)
|
||||
if self.result != None:
|
||||
self.results.append(self.result)
|
||||
return self.result != None
|
||||
result = self.matchNaive(pattern, vertices=bound_graph_vertices, edges=graph.edges)
|
||||
if result != None:
|
||||
yield result
|
||||
return result != None
|
||||
|
||||
if index_M > 0:
|
||||
# try the candidates is the preffered order
|
||||
# first try the adjacent vertices connected via the outgoing edges.
|
||||
print(self.indent*" ","preferred L1")
|
||||
if preferred(H, P, h, p, index_M, VF2_obj, VF2_obj.N_out_graph, VF2_obj.N_out_pattern):
|
||||
matched = yield from preferred(H, P, h, p, index_M, VF2_obj, VF2_obj.N_out_graph, VF2_obj.N_out_pattern)
|
||||
if matched:
|
||||
return True
|
||||
|
||||
print(self.indent*" ","preferred L2")
|
||||
# then try the adjacent vertices connected via the incoming edges.
|
||||
if preferred(H, P, h, p, index_M, VF2_obj, VF2_obj.N_inc_graph, VF2_obj.N_inc_pattern):
|
||||
matched = yield from preferred(H, P, h, p, index_M, VF2_obj, VF2_obj.N_inc_graph, VF2_obj.N_inc_pattern)
|
||||
if matched:
|
||||
return True
|
||||
|
||||
print(self.indent*" ","leastPreferred")
|
||||
# and lastly, try the vertices not connected to the currently matched vertices
|
||||
if leastPreferred(H, P, h, p, index_M, VF2_obj):
|
||||
matched = yield from leastPreferred(H, P, h, p, index_M, VF2_obj)
|
||||
if matched:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
print("graph:", graph)
|
||||
|
||||
# create adjecency matrix of the graph
|
||||
H, h = self.createAdjacencyMatrixMap(self.graph)
|
||||
print("adjacency:", H)
|
||||
print("h:", len(h))
|
||||
H, h = self.createAdjacencyMatrixMap(graph, pattern)
|
||||
# create adjecency matrix of the pattern
|
||||
P, p = self.createAdjacencyMatrixMap(self.pattern)
|
||||
P, p = self.createAdjacencyMatrixMap(pattern, pattern)
|
||||
|
||||
VF2_obj = VF2_Obj(len(h), len(p))
|
||||
|
||||
self.indent = 0
|
||||
|
||||
# Only for debugging:
|
||||
self.indent = 0
|
||||
self.reverseMapH = { h[i] : i for i in range(len(h))}
|
||||
self.reverseMapP = { p[i] : i for i in range(len(p))}
|
||||
|
||||
# Set of partial matches already explored - prevents us from producing the same match multiple times
|
||||
# Encoded as a mapping from match size to the partial match
|
||||
self.alreadyVisited = set()
|
||||
|
||||
findM(H, P, h, p, VF2_obj)
|
||||
|
||||
return self.results
|
||||
yield from findM(H, P, h, p, VF2_obj)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue