From a0ccb3f35d3a9143e26679106c39c5c3f5207efa Mon Sep 17 00:00:00 2001 From: Joeri Exelmans Date: Wed, 4 Sep 2024 17:05:24 +0200 Subject: [PATCH] work on my matcher --- pattern_matching/matcher.py | 108 ++++++++++++++++++++---------------- 1 file changed, 60 insertions(+), 48 deletions(-) diff --git a/pattern_matching/matcher.py b/pattern_matching/matcher.py index ce9a010..8817a9f 100644 --- a/pattern_matching/matcher.py +++ b/pattern_matching/matcher.py @@ -1,3 +1,6 @@ +# This module contains a VF2-inspired graph matching algorithm +# Author: Joeri Exelmans + import itertools class Graph: @@ -39,6 +42,13 @@ class MatcherState: # will always try to grow mapping via outgoing/incoming edges of this pair before attempting other non-connected vertices self.boundary = None + @staticmethod + def make_initial(host, guest): + state = MatcherState() + state.h_unmatched_vtxs = host.vtxs + state.g_unmatched_vtxs = guest.vtxs + return state + # Grow the match set (creating a new copy) def grow_edge(self, host_edge, guest_edge): new_state = MatcherState() @@ -87,32 +97,31 @@ class MatcherVF2: self.guest = guest self.compare_fn = compare_fn - def match(self, state = None, already_visited = set(), indent=0): - print(" "*indent, "match") - if state == None: - state = MatcherState() - state.h_unmatched_vtxs = self.host.vtxs - state.g_unmatched_vtxs = self.guest.vtxs + def match(self): + yield from self._match( + state=MatcherState.make_initial(self.host, self.guest), + already_visited=set()) + + def _match(self, state, already_visited, indent=0): + # print(" "*indent, "match") def visit_for_first_time(state): hashable_state = state.make_hashable() if hashable_state in already_visited: - print(" "*indent, 'S K I P - A L R E A D Y V I S I T E D S T A T E') return False already_visited.add(hashable_state) - # print('visisted', len(already_visited), 'states') return True if len(state.mapping_edges) == len(self.guest.edges): - print(" "*indent, "GOT MATCH:") - print(" "*indent, " ", state.mapping_vtxs) - print(" "*indent, " ", state.mapping_edges) + # print(" "*indent, "GOT MATCH:") + # print(" "*indent, " ", state.mapping_vtxs) + # print(" "*indent, " ", state.mapping_edges) yield state return def attempt_grow(direction, indent): - print(" "*indent, 'attempt_grow', direction) + # print(" "*indent, 'attempt_grow', direction) if state.boundary != None: g_vtx, h_vtx = state.boundary for g_candidate_edge in getattr(g_vtx, direction): @@ -126,7 +135,7 @@ class MatcherVF2: h_candidate_vtx = h_candidate_edge.tgt new_state = state.grow_edge(h_candidate_edge, g_candidate_edge) if visit_for_first_time(new_state): - print(" "*indent, 'grow edge', g_candidate_edge, ':', h_candidate_edge) + # print(" "*indent, 'grow edge', g_candidate_edge, ':', h_candidate_edge) yield from attempt_match_vtxs( new_state, g_candidate_vtx, @@ -134,12 +143,11 @@ class MatcherVF2: indent+1) def attempt_match_vtxs(state, g_candidate_vtx, h_candidate_vtx, indent): - # It seems faster (benchmarked) to generate the new_state, even if we still have to check if it's a valid (partial) match, so we can put it in already_visited new_state = state.grow_vtx( h_candidate_vtx, g_candidate_vtx) if visit_for_first_time(new_state): - print(" "*indent, 'attempt_match_vtxs') + # print(" "*indent, 'attempt_match_vtxs') if h_candidate_vtx in state.r_mapping_vtxs: if state.r_mapping_vtxs[h_candidate_vtx] != g_candidate_vtx: return # host vtx is already mapped but doesn't match guest vtx @@ -153,48 +161,52 @@ class MatcherVF2: return if not self.compare_fn(h_candidate_vtx.value, g_candidate_vtx.value): return - print(" "*indent, 'grow vtx', g_candidate_vtx, ':', h_candidate_vtx) - yield from self.match(new_state, already_visited, indent+1) + # print(" "*indent, 'grow vtx', g_candidate_vtx, ':', h_candidate_vtx) + yield from self._match(new_state, already_visited, indent+1) - print(" "*indent, 'preferred...') + # print(" "*indent, 'preferred...') yield from attempt_grow('outgoing', indent+1) yield from attempt_grow('incoming', indent+1) - print(" "*indent, 'least preferred...') + # print(" "*indent, 'least preferred...') for g_candidate_vtx in state.g_unmatched_vtxs: for h_candidate_vtx in state.h_unmatched_vtxs: yield from attempt_match_vtxs(state, g_candidate_vtx, h_candidate_vtx, indent+1) - if indent == 0: - print('visited', len(already_visited), 'states total') + # if indent == 0: + # print('visited', len(already_visited), 'states total') -host = Graph() -host.vtxs = [Vertex(0), Vertex(1), Vertex(2)] -host.edges = [ - Edge(host.vtxs[0], host.vtxs[1]), - Edge(host.vtxs[1], host.vtxs[2]), - Edge(host.vtxs[2], host.vtxs[0]), -] -guest = Graph() -guest.vtxs = [Vertex('src'), Vertex('tgt')] -guest.edges = [ - Edge(guest.vtxs[0], guest.vtxs[1]), -] +# demo time... +if __name__ == "__main__": + host = Graph() + host.vtxs = [Vertex(0), Vertex(1), Vertex(2)] + host.edges = [ + Edge(host.vtxs[0], host.vtxs[1]), + Edge(host.vtxs[1], host.vtxs[2]), + Edge(host.vtxs[2], host.vtxs[0]), + ] -m = MatcherVF2(host, guest, lambda hv, gv: True) -import time -durations = 0 -for n in range(100000): - time_start = time.perf_counter_ns() - matches = [mm for mm in m.match()] - print("found", len(matches), "matches") - time_end = time.perf_counter_ns() - time_duration = time_end - time_start - durations += time_duration + guest = Graph() + guest.vtxs = [Vertex('src'), Vertex('tgt')] + guest.edges = [ + Edge(guest.vtxs[0], guest.vtxs[1]), + ] -print(f'Took {durations/1000000} ms') -for mm in matches: - print("match:") - print(" ", mm.mapping_vtxs) - print(" ", mm.mapping_edges) \ No newline at end of file + m = MatcherVF2(host, guest, lambda hv, gv: True) + import time + durations = 0 + iterations = 2000 + for n in range(iterations): + time_start = time.perf_counter_ns() + matches = [mm for mm in m.match()] + # print("found", len(matches), "matches") + time_end = time.perf_counter_ns() + time_duration = time_end - time_start + durations += time_duration + + print(f'{iterations} iterations, took {durations/1000000:.3f} ms, {durations/iterations/1000000:.3f} ms per iteration') + for mm in matches: + print("match:") + print(" ", mm.mapping_vtxs) + print(" ", mm.mapping_edges) \ No newline at end of file