diff --git a/pattern_matching/main.py b/pattern_matching/main.py index 604794a..8bd5e5e 100644 --- a/pattern_matching/main.py +++ b/pattern_matching/main.py @@ -31,10 +31,10 @@ if __name__ == '__main__': """ The main function called when running from the command line. """ - nr_of_vertices = 10 - nr_of_diff_types_v = 0 - nr_of_edges = 20 - nr_of_diff_types_e = 0 + nr_of_vertices = 50 + nr_of_diff_types_v = 2 + nr_of_edges = 150 + nr_of_diff_types_e = 2 dv = [random.randint(0, nr_of_diff_types_v) for _ in range(nr_of_vertices)] de = [random.randint(0, nr_of_diff_types_e) for _ in range(nr_of_edges)] @@ -47,10 +47,10 @@ if __name__ == '__main__': # dc_inc = [ 0,25,18,47,22,25,16,45,38,25,5,45,15,44,17,46,6,17,35,8,16,29,48,47,25,34,4,20,24,1,47,44,8,25,32,3,16,6,33,21,6,13,41,10,17,25,21,33,31,30,5,4,45,26,16,42,12,25,29,3,32,30,14,26,11,13,7,13,3,43,43,22,48,37,20,28,15,40,19,33,43,16,49,36,11,25,9,42,3,22,16,40,42,44,27,30,1,18,10,35,19,6,9,43,37,38,45,19,41,14,37,45,0,31,29,31,24,20,44,46,8,45,43,3,38,38,35,12,19,45,7,34,20,28,12,17,45,17,35,49,20,21,49,1,35,38,38,36,33,30 ] # dc_out = [ 9,2,49,49,37,33,16,21,5,46,4,15,9,6,14,22,16,33,23,21,15,31,37,23,47,3,30,26,35,9,29,21,39,32,22,43,5,9,41,30,31,30,37,33,31,34,23,22,34,26,44,36,38,33,48,5,9,34,13,7,48,41,43,26,26,7,12,6,12,28,22,8,29,22,24,27,16,4,31,41,32,15,19,20,38,0,26,18,43,46,40,17,29,14,34,14,32,17,32,47,16,45,7,4,35,22,42,11,38,2,0,29,4,38,17,44,9,23,5,10,31,17,1,11,16,5,37,27,35,32,45,16,18,1,14,4,42,24,43,31,21,38,6,34,39,46,20,1,38,47 ] - dv = [0, 1, 0, 1, 0] - de = [0, 0, 0] - dc_inc = [0, 2, 4] - dc_out = [1, 3, 3] + # dv = [0, 1, 0, 1, 0] + # de = [0, 0, 0] + # dc_inc = [0, 2, 4] + # dc_out = [1, 3, 3] gg = GraphGenerator(dv, de, dc_inc, dc_out, debug) @@ -73,7 +73,7 @@ if __name__ == '__main__': #PM = PatternMatching('SP') # PM = PatternMatching('Ullmann') PM = PatternMatching('VF2') - matches = PM.match(pattern, graph) + matches = [m for m in PM.matchVF2(pattern, graph)] print("found", len(matches), "matches:", matches) # regenerate graph, to show matched pattern diff --git a/pattern_matching/patternMatching.py b/pattern_matching/patternMatching.py index c168ebd..914b60c 100644 --- a/pattern_matching/patternMatching.py +++ b/pattern_matching/patternMatching.py @@ -28,62 +28,17 @@ class PatternMatching(object): """ Returns an occurrence of a given pattern from the given Graph """ - def __init__(self, matching_type='SP', optimize=True): + def __init__(self, optimize=True): # store the type of matching we want to use - self.type = matching_type - self.bound_vertices = {} # saves the currently bound vertices - self.bound_edges = {} # saves the currently bound edges - self.result = None - self.previous = [] self.optimize = optimize - self.results = [] - def match(self, pattern, graph): - """ - Call this function to find an occurrence of the pattern in the (host) graph. - Setting the type of matching (naive, SP, Ullmann, VF2) is done by - setting self.matching_type to its name. - """ - if not (isinstance(pattern, SearchGraph) or isinstance(pattern, Graph)): - raise TypeError('pattern must be a SearchGraph or Graph') - if not (isinstance(graph, SearchGraph) or isinstance(graph, Graph)): - raise TypeError('graph must be a SearchGraph or Graph') - - self.pattern = pattern - self.graph = graph - - if self.type == 'naive': - result = self.matchNaive(vertices=graph.vertices, edges=graph.edges) - elif self.type == 'SP': - result = self.matchSP() - elif self.type == 'Ullmann': - result = self.matchUllmann() - elif self.type == 'VF2': - result = self.matchVF2() - else: - raise ValueError('Unknown type for matching') - - # cleanup - self.pattern = None - self.graph = None - self.bound_vertices = {} - self.bound_edges = {} - self.result = None - self.results = [] - - return result - - def matchNaive(self, pattern_vertices=None, vertices=None, edges=None): + def matchNaive(self, pattern, vertices, edges, pattern_vertices=None): """ Try to find an occurrence of the pattern in the Graph naively. """ # allow call with specific arguments if pattern_vertices == None: - pattern_vertices = self.pattern.vertices - if vertices == None: - vertices = self.bound_vertices - if edges == None: - edges = self.bound_edges + pattern_vertices = pattern.vertices def visitEdge(pattern_vertices, p_edge, inc, g_edges, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges): """ @@ -238,137 +193,7 @@ class PatternMatching(object): else: return None - def matchSP(self): - """ - Find an occurrence of the pattern in the Graph - by using the generated SearchPlan. - """ - if isinstance(self.graph, Graph): - sg = SearchGraph(self.graph) - elif isinstance(self.graph, SearchGraph): - sg = self.graph - else: - raise TypeError('Pattern matching with a SearchPlan must be given a Graph or SearchGraph') - - pg = PlanGraph(self.pattern) - SP = pg.Edmonds(sg) - - self.fileIndex = 0 - - def propConnected(): - """ - Checks if the found vertices and edges can be uniquely matched - onto the pattern graph. - """ - self.result = self.matchNaive() - return self.result != None - - def matchOP(elem, bound, ops, index): - """ - Execute a primitive operation, return whether ot not it succeeded. - """ - type_bound = bound.setdefault(elem.type, set()) - # if elem not yet bound, bind it, and try matching the next operations - if elem not in type_bound: - type_bound.add(elem) - # if matching of next operation failed, try with a different elem - if matchAllOP(ops, index+1): - return True - else: - type_bound.remove(elem) - return False - - def matchAllOP(ops, index=0): - """ - Try to match an occurrence of the pattern in the graph, - by recursivly ,atching elements that adhere to the SearchPlan - """ - # if we matched all elements, - # check if the bound elements are properly connected - if index == len(ops): - return propConnected() - - op = ops[index] - - if op[0] == PRIM_OP.lkp: # lkp(elem) - if op[2]: # lookup a vertex - # If the graph does not have a vertex of the same vertex - # type, we'll have to return False, happens if elems == []. - elems = self.graph.vertices.get(op[1], []) - bound = self.bound_vertices - else: # loopup an edge - # If the graph does not have an edge of the same edge - # type, we'll have to return False, happens if elems == []. - elems = self.graph.edges.get(op[1], []) - bound = self.bound_edges - - # if elems == [], we'll skip the loop and return False - for elem in elems: - if matchOP(elem, bound, ops, index): - return True - # if all not bound elems fails, backtrack - return False - - elif op[0] == PRIM_OP.src: # src(e): bind src of a bound edge e - # Should always succeed, as the edge must be already bound - # (there should be at least one elem in self.bound_edges[op[1]]). - for edge in self.bound_edges[op[1]]: - if matchOP(edge.src, self.bound_vertices, ops, index): - return True - # if all not bound elems fails, backtrack - return False - - elif op[0] == PRIM_OP.tgt: # tgt(e): bind tgt of a bound edge e - # Should always succeed, as the edge must be already bound - # (there should be at least one elem in self.bound_edges[op[1]]). - for edge in self.bound_edges[op[1]]: - if matchOP(edge.tgt, self.bound_vertices, ops, index): - return True - # if all not bound elems fails, backtrack - return False - - elif op[0] == PRIM_OP.inc: # in(v, e): bind incoming edge e of a bound vertex v - # It's possible we will try to find a vertex of a certain type - # in the bound_vertices which should be bound implicitly - # (by a src/tgt op), that is not bound. Happens when implicit - # binding bounded a "wrong" vertex. We then need to return False - # (happens by skiping for loop by looping over []) - for vertex in self.bound_vertices.get(op[1], []): - for edge in vertex.incoming_edges: - if edge.type == op[2]: - if matchOP(edge, self.bound_edges, ops, index): - return True - # if all not bound elems fails, backtrack - return False - - elif op[0] == PRIM_OP.out: # out(v, e): bind outgoing edge e of a bound vertex v - # Return False if we expect an element to be bound that is not - # bound (for the same reason as the inc op). - for vertex in self.bound_vertices.get(op[1], []): - for edge in vertex.outgoing_edges: - if edge.type == op[2]: - if matchOP(edge, self.bound_edges, ops, index): - return True - # if all not bound elems fails, backtrack - return False - else: - raise TypeError('Unknown PRIM_OP type') - - # try and match all (primitive) operations from the SearchPlan - matchAllOP(SP) - - # Either nothing is found, or we found an occurrence, - # it is impossble to have a partionally matched occurrence - for key, bound_elems in self.bound_vertices.items(): - if len(bound_elems) == 0: - # The pattern does not exist in the Graph - return None - else: - # We found a pattern - return self.result - - - def createAdjacencyMatrixMap(self, graph): + def createAdjacencyMatrixMap(self, graph, pattern): """ Return adjacency matrix and the order of the vertices. """ @@ -380,7 +205,7 @@ class PatternMatching(object): if self.optimize: # insert only the vertices from the graph which have a type # that is present in the pattern - for vertex_type, _ in self.pattern.vertices.items(): + for vertex_type, _ in pattern.vertices.items(): graph_vertices = graph.vertices.get(vertex_type) if graph_vertices != None: allVertices.extend(graph_vertices) @@ -419,255 +244,7 @@ class PatternMatching(object): return AM, vertices_order - def matchUllmann(self): - """ - Find an occurrence of the pattern in the Graph - by using Ullmann for solving the Constraint Satisfaction Problem (CSP). - """ - - def createM_star(h, p): - """ - Create M*[v, w] = 1 if deg(v) <= deg(w), for v in V_P, w in V_H - = 0 otherwise - - M and P are given to ensure corect order. - """ - m = [] # [[..], ...] - for p_vertex in p: - row = [] - for g_vertex in h: - # for the degree function, we choose to look at the - # outgoing edges AND the incoming edges - # (one might prefer to use only one of them) - if self.optimize: - # also check if type matches - if p_vertex.type != g_vertex.type: - row.append(False) - continue - row.append( len(p_vertex.incoming_edges) <= - len(g_vertex.incoming_edges) and - len(p_vertex.outgoing_edges) <= - len(g_vertex.outgoing_edges)) - m.append(row) - - return m - - def createDecreasingOrder(h): - """ - It turns out that the more edges a vertex has, the sooner it will - fail in matching the pattern. For efficiency reasons, we want it - to fail as fast as possible. - """ - order = [] # [(value, index), ...] - index = 0 - for g_vertex in h: - order.append(( len(g_vertex.outgoing_edges) + - len(g_vertex.outgoing_edges), index)) - index += 1 - - order.sort(key = lambda elem: elem[0]) - # sort and only return the indices (which specify the order) - return [index for (_, index) in order] - - def propConnected(M, H, P, h, p): - """ - Checks if the vertices represented in M are isomorphic to P and if - they can be matched onto the pattern graph. - """ - print(M, H, P, h, p) - # P_candi = np.dot(M, np.transpose(np.dot(M, H))) - - - """ - # If we do not aply the refineM function, we will want to check if - # this succeeds, as it checks for isomorphism. - # If we apply the refineM function, it is garanteed to be isomorphic. - - index_column = 0 - for row in P_candi: - index_row = 0 - for item in row: - # for all i,j: P[i, j] = 1 : M(MH)^T [j, i] = 1 - # (not the other way around) - # (return False when item is 0 and P[i,j] is 1) - if item < P[index_row][index_column]: - return False - index_row += 1 - index_column += 1 - """ - - vertices = {} - index_column = 0 - for row in M: - index_row = 0 - for item in row: - # there should only be one item per row - if item: - vertex = h[index_row] - vertices.setdefault(vertex.type, set()).add(vertex) - break - index_row += 1 - index_column += 1 - - self.result = self.matchNaive(vertices=vertices, edges=self.graph.edges) - return self.result != None - - def refineM(M, H, P, h, pp): - """ - Refine M, for every vertex from the pattern, check if each possible - matching (candidate) his neighbours can also be matched. (M's column - represents vertices from P, and the row represents its candidate.) - If this is not possible set M[i,j] to false, refining/reducing the - search space. - """ - any_changes=True - while any_changes: - any_changes = False - # for all vertices from the pattern - for i in range(0, len(P)): # P is a nxn-matrix - # for all its possible assignments - for j in range(0, len(H[0])): - # if bound vertex of P, check if all neigbours are matchable - if M[i][j]: - # for all the pattern his neighbours - for k in range(0, len(P)): - # if it is a neighbour (from outgoing edges) - if P[i][k]: - match = False - for p in range(0, len(H[0])): - # check if we can match a candidate neighbour - # (from M* to to the graph (H)) - if M[k][p] and H[j][p]: - if self.optimize: - # also check correct type - if pp[k].type != h[p].type: - continue - match = True - break - if not match: - M[i][j] = False - any_changes = True - - # if it is a neighbour (from incoming edges) - if P[k][i]: - match = False - for p in range(0, len(H[0])): - # check if we can match a candidate neighbour - # (from M* to to the graph (H)) - if M[k][p] and H[p][j]: - if self.optimize: - # also check correct type - if pp[i].type != h[j].type: - continue - match = True - break - if not match: - M[i][j] = False - any_changes = True - - def findM(M_star, M, order, H, P, h, p, index_M=0): - """ - Find an isomorphic mapping for the vertices of P to H. - This mapping is represented by a matrix M if, - and only if M(MH)^T = P^T. - """ - # We are at the end, we found an candidate. - # Remember that we are at the end, bu first check if there is - # a row with ony False, if so, we do not need to check if it is - # properly connected. - check_prop = False - if index_M == len(M): - check_prop = True - index_M -= 1 - - # we need to refer to this row - old_row = M_star[index_M] - # previous rows (these are sparse, 1 per row, save only its position) - prev_pos = [] - for i in range(0, index_M): - row = M[i] - only_false = True - for j in range(0, len(old_row)): - if row[j]: - only_false = False - prev_pos.append(j) - break - if only_false: - # check if a row with only False occurs, - # if so, we will not find an occurence - return False - - # We are at the end, we found an candidate. - if check_prop: - index_M += 1 - return propConnected(M, H, P, h, p) - - M[index_M] = [False] * len(old_row) - index_order = 0 - for index_order in range(0, len(order)): - index_row = order[index_order] - # put previous True back on False - if index_order > 0: - M[index_M][order[index_order - 1]] = False - - if old_row[index_row]: - M[index_M][index_row] = True - - findMPart = True - # 1 0 0 Assume 3th round, and we select x, - # 0 1 0 no element at the same possition in the row, - # 0 x 0 of the elements above itselve in the same - # column may be 1. In the example it is, then try - # selecting an other element. - for index_column in range(0, index_M): - if M[index_column][index_row]: - findMPart = False - break - - if not findMPart: - continue - - refineM(M, H, P, h, p) - - if findM(M_star, M, order, H, P, h, p, index_M + 1): - return True - - # reset previous rows their True's - prev_row = 0 - for pos in prev_pos: - M[prev_row][pos] = True - prev_row += 1 - # reset rows below current row - for index_column in range(index_M + 1, len(M)): - # deep copy, we do not want to just copy pointer to array/list - M[index_column] = M_star[index_column][:] - - # reset current row (the rest is already reset) - M[index_M] = M_star[index_M][:] - - return False - - # create adjecency matrix of the graph - H, h = self.createAdjacencyMatrixMap(self.graph) - # create adjecency matrix of the pattern - P, p = self.createAdjacencyMatrixMap(self.pattern) - # create M* binary matrix - M_star = createM_star(h, p) - - # create the order we will use later on - order = createDecreasingOrder(h) - # deepcopy M_s into M - M = [row[:] for row in M_star] - - if self.optimize: - refineM(M, H, P, h, p) - - findM(M_star, M, order, H, P, h, p) - - return self.result - - - def matchVF2(self): + def matchVF2(self, pattern, graph): class VF2_Obj(object): """ @@ -784,7 +361,7 @@ class PatternMatching(object): # take a coding shortcut, # use self.matchNaive function to see if it is feasable. # this way, we immidiatly test the semantic attributes - if not self.matchNaive(pattern_vertices=neighbours_pattern, vertices=neighbours_graph, edges=self.graph.edges): + if not self.matchNaive(pattern, pattern_vertices=neighbours_pattern, vertices=neighbours_graph, edges=graph.edges): return False # count ext_edges from core_graph to a adjecent vertices and @@ -878,9 +455,11 @@ class PatternMatching(object): # print(self.alreadyVisited) self.indent += 1 - if findM(H, P, h, p, VF2_obj, index_M + 1): + matched = yield from findM(H, P, h, p, VF2_obj, index_M + 1) + if matched: # return True - print(self.indent*" ","found match", len(self.results), ", continuing...") + # print(self.indent*" ","found match", len(self.results), ", continuing...") + pass self.indent -= 1 if True: @@ -917,7 +496,8 @@ class PatternMatching(object): if N_pattern[m] == -1 or VF2_obj.core_pattern[m]: continue print(self.indent*" "," m:", m) - if matchPhase(H, P, h, p, index_M, VF2_obj, n, m): + matched = yield from matchPhase(H, P, h, p, index_M, VF2_obj, n, m) + if matched: return True return False @@ -941,7 +521,8 @@ class PatternMatching(object): # print(self.indent*" "," skipping") continue print(self.indent*" "," m:", m) - if matchPhase(H, P, h, p, index_M, VF2_obj, n, m): + matched = yield from matchPhase(H, P, h, p, index_M, VF2_obj, n, m) + if matched: return True return False @@ -955,50 +536,50 @@ class PatternMatching(object): for vertex_bound, _ in VF2_obj.mapping.items(): bound_graph_vertices.setdefault(vertex_bound.type, set()).add(vertex_bound) - self.result = self.matchNaive(vertices=bound_graph_vertices, edges=self.graph.edges) - if self.result != None: - self.results.append(self.result) - return self.result != None + result = self.matchNaive(pattern, vertices=bound_graph_vertices, edges=graph.edges) + if result != None: + yield result + return result != None if index_M > 0: # try the candidates is the preffered order # first try the adjacent vertices connected via the outgoing edges. print(self.indent*" ","preferred L1") - if preferred(H, P, h, p, index_M, VF2_obj, VF2_obj.N_out_graph, VF2_obj.N_out_pattern): + matched = yield from preferred(H, P, h, p, index_M, VF2_obj, VF2_obj.N_out_graph, VF2_obj.N_out_pattern) + if matched: return True print(self.indent*" ","preferred L2") # then try the adjacent vertices connected via the incoming edges. - if preferred(H, P, h, p, index_M, VF2_obj, VF2_obj.N_inc_graph, VF2_obj.N_inc_pattern): + matched = yield from preferred(H, P, h, p, index_M, VF2_obj, VF2_obj.N_inc_graph, VF2_obj.N_inc_pattern) + if matched: return True print(self.indent*" ","leastPreferred") # and lastly, try the vertices not connected to the currently matched vertices - if leastPreferred(H, P, h, p, index_M, VF2_obj): + matched = yield from leastPreferred(H, P, h, p, index_M, VF2_obj) + if matched: return True return False + print("graph:", graph) + # create adjecency matrix of the graph - H, h = self.createAdjacencyMatrixMap(self.graph) - print("adjacency:", H) - print("h:", len(h)) + H, h = self.createAdjacencyMatrixMap(graph, pattern) # create adjecency matrix of the pattern - P, p = self.createAdjacencyMatrixMap(self.pattern) + P, p = self.createAdjacencyMatrixMap(pattern, pattern) VF2_obj = VF2_Obj(len(h), len(p)) - self.indent = 0 - # Only for debugging: + self.indent = 0 self.reverseMapH = { h[i] : i for i in range(len(h))} self.reverseMapP = { p[i] : i for i in range(len(p))} # Set of partial matches already explored - prevents us from producing the same match multiple times # Encoded as a mapping from match size to the partial match self.alreadyVisited = set() - - findM(H, P, h, p, VF2_obj) - return self.results \ No newline at end of file + yield from findM(H, P, h, p, VF2_obj)