diff --git a/examples/performance/runner.py b/examples/performance/runner.py index bf8afc6..bda2f99 100644 --- a/examples/performance/runner.py +++ b/examples/performance/runner.py @@ -1,3 +1,4 @@ +# Artificial model transformation thingy to measure performance # Model transformation experiment from state.devstate import DevState @@ -31,48 +32,77 @@ if __name__ == "__main__": Many:Class ManyB:Class Other:Class - OtherB:Class - OtherC:Class ass:Association(Many->ManyB) + ass2:Association(Rare->Many) """ dsl_mm_id = parser.parse_od(state, dsl_mm_cs, mm=scd_mmm_id) dsl_m_cs = """ rare:Rare + many0:Many + many0B:Many many1:Many + many1B:Many many2:Many + many2B:Many many3:Many + many3B:Many many4:Many + many4B:Many + many5:ManyB many6:ManyB many7:ManyB many8:ManyB + many50:ManyB + many60:ManyB + many70:ManyB + many80:ManyB + many51:ManyB + many61:ManyB + many71:ManyB + many81:ManyB + many501:ManyB + many601:ManyB + many701:ManyB + many801:ManyB + many5Z:ManyB + many6Z:ManyB + many7Z:ManyB + many8Z:ManyB + many50Z:ManyB + many60Z:ManyB + many70Z:ManyB + many80Z:ManyB + many51Z:ManyB + many61Z:ManyB + many71Z:ManyB + many81Z:ManyB + many501Z:ManyB + many601Z:ManyB + many701Z:ManyB + many801Z:ManyB + + Other0:Other + Other1:Other + Other2:Other + Other3:Other + Other0B:Other + Other1B:Other + Other2B:Other + Other3B:Other + Other0C:Other + Other1C:Other + Other2C:Other + Other3C:Other :ass (many2->many6) :ass (many3->many8) - # other0:Other - # other1:OtherC - # other2:Other - # other3:Other - # other4:Other - # other5:OtherB - # other6:OtherB - # other7:OtherB - # other8:OtherB - # other9:OtherB - # other10:OtherB - # other11:OtherC - # other12:OtherC - # other13:OtherC - # other14:OtherC - - # other1099:OtherB - # other1199:OtherC - # other1299:OtherC - # other1399:OtherC - # other1499:OtherC + :ass2 (rare -> many0) + :ass2 (rare -> many1) + :ass2 (rare -> many2) """ dsl_m_id = parser.parse_od(state, dsl_m_cs, mm=dsl_mm_id) @@ -86,17 +116,22 @@ if __name__ == "__main__": # TODO: enable more powerful constraints pattern_cs = f""" # object to match - rare:{prefix}Rare {{ + rare:RAM_Rare {{ }} - many:{prefix}Many - manyB:{prefix}ManyB - manyB2:{prefix}ManyB + many:RAM_Many + manyB:RAM_ManyB + manyB2:RAM_ManyB + + :RAM_ass (many -> manyB) + :RAM_ass (many -> manyB2) + :RAM_ass2 (rare -> many) """ pattern_id = parser.parse_od(state, pattern_cs, mm=ramified_mm_id) with Timer("find all matches"): - matches = list(match_od(state, dsl_m_id, dsl_mm_id, pattern_id, ramified_mm_id)) + for i in range(100): + matches = list(match_od(state, dsl_m_id, dsl_mm_id, pattern_id, ramified_mm_id)) for match in matches: diff --git a/transformation/matcher.py b/transformation/matcher.py index 9c5b2da..23670c3 100644 --- a/transformation/matcher.py +++ b/transformation/matcher.py @@ -12,7 +12,7 @@ import itertools import re import functools -from util.timer import Timer +from util.timer import Timer, counted class _is_edge: def __repr__(self): @@ -286,6 +286,27 @@ def match_od(state, host_m, host_mm, pattern_m, pattern_mm, pivot={}): g_names, guest = model_to_graph(state, pattern_m, pattern_mm, _filter=is_matchable) + # precompute the candidates for every guest vertex: + guest_to_host_candidate_vtxs = {} + vtxs_of_host_type = {} + + for g_vtx in guest.vtxs: + object_node = g_vtx.node_id + if hasattr(g_vtx, 'typ'): + orig_class_node = ramify.get_original_type(bottom, g_vtx.typ) + orig_class_name = odapi.get_name(orig_class_node) + if orig_class_name in vtxs_of_host_type: + cands = vtxs_of_host_type[orig_class_name] + else: + cands = vtxs_of_host_type[orig_class_name] = len(odapi.get_all_instances(orig_class_name, include_subtypes=True)) + else: + cands = len(host.vtxs) + guest_to_host_candidate_vtxs[g_vtx] = cands + + # print(guest_to_host_candidate_vtxs) + + + # transform 'pivot' into something VF2 understands graph_pivot = { g_names[guest_name] : h_names[host_name] for guest_name, host_name in pivot.items() @@ -339,7 +360,7 @@ def match_od(state, host_m, host_mm, pattern_m, pattern_mm, pivot={}): compare = RAMCompare(bottom, services_od.OD(host_mm, host_m, state)) - matcher = MatcherVF2(host, guest, compare) + matcher = MatcherVF2(host, guest, compare, guest_to_host_candidate_vtxs) for m in matcher.match(graph_pivot): # Convert mapping name_mapping = {} diff --git a/transformation/vf2.py b/transformation/vf2.py index 4d32820..e7d0ac6 100644 --- a/transformation/vf2.py +++ b/transformation/vf2.py @@ -4,7 +4,7 @@ import itertools -from util.timer import Timer +from util.timer import Timer, counted # like finding the 'strongly connected componenets', but edges are navigable in any direction def find_connected_components(graph): @@ -81,6 +81,11 @@ class MatcherState: state = MatcherState() state.h_unmatched_vtxs = [vtx for vtx in host.vtxs if vtx not in pivot.values()] state.g_unmatched_vtxs = [vtx for vtx in guest.vtxs if vtx not in pivot.keys()] + # if guest_to_host_candidates != None: + # state.g_unmatched_vtxs.sort( + # # performance thingy: + # # try to match guest vtxs with few candidates first (fail early!): + # key=lambda guest_vtx: guest_to_host_candidates.get(guest_vtx, 0)) state.mapping_vtxs = pivot state.r_mapping_vtxs = { v: k for k,v in state.mapping_vtxs.items() } return state @@ -129,27 +134,42 @@ class MatcherState: # return self.make_hashable().__repr__() return "VTXS: "+self.mapping_vtxs.__repr__()+"\nEDGES: "+self.mapping_edges.__repr__() + class MatcherVF2: # Guest is the pattern - def __init__(self, host, guest, compare_fn): + def __init__(self, host, guest, compare_fn, guest_to_host_candidates=None): self.host = host self.guest = guest self.compare_fn = compare_fn + # map guest vertex to number of candidate vertices in host graph: + if guest_to_host_candidates != None: + self.guest_to_host_candidates = guest_to_host_candidates + else: + # atttempt to match every guest vertex with every host vertex (slow!) + self.guest_to_host_candidates = { g_vtx : len(host.vtxs) for g_vtx in guest.vtxs } + # with Timer("find_connected_components - guest"): self.guest_vtx_to_component, self.guest_component_to_vtxs = find_connected_components(guest) - # print("number of guest connected components:", len(self.guest_component_to_vtxs)) + for component in self.guest_component_to_vtxs: + pass + # sort vertices in component such that the vertices of the rarest type (with the fewest element) occurs first + component.sort(key=lambda guest_vtx: guest_to_host_candidates[guest_vtx]) + if len(self.guest_component_to_vtxs) > 1: + print("warning: pattern has multiple components:", len(self.guest_component_to_vtxs)) def match(self, pivot={}): yield from self._match( state=MatcherState.make_initial(self.host, self.guest, pivot), already_visited=set()) - + # @counted def _match(self, state, already_visited, indent=0): # input() + num_matches = 0 + def print_debug(*args): pass # print(" "*indent, *args) # uncomment to see a trace of the matching process @@ -161,7 +181,7 @@ class MatcherVF2: if hashable in already_visited: print_debug(" SKIP - ALREADY VISITED") # print_debug(" ", hashable) - return + return 0 # print_debug(" ", [hash(a) for a in already_visited]) # print_debug(" ADD STATE") # print_debug(" ", hash(hashable)) @@ -173,7 +193,7 @@ class MatcherVF2: print_debug(" ", state.mapping_vtxs) print_debug(" ", state.mapping_edges) yield state - return + return 1 def read_edge(edge, direction): if direction == "outgoing": @@ -184,6 +204,7 @@ class MatcherVF2: raise Exception("wtf!") def attempt_grow(direction, indent): + num_matches = 0 for g_matched_vtx, h_matched_vtx in state.mapping_vtxs.items(): print_debug('attempt_grow', direction) for g_candidate_edge in getattr(g_matched_vtx, direction): @@ -204,69 +225,79 @@ class MatcherVF2: print_debug('grow edge', g_candidate_edge, ':', h_candidate_edge, id(g_candidate_edge), id(h_candidate_edge)) new_state = state.grow_edge(h_candidate_edge, g_candidate_edge) h_candidate_vtx = read_edge(h_candidate_edge, direction) - yield from attempt_match_vtxs( + num_matches += yield from attempt_match_vtxs( new_state, g_candidate_vtx, h_candidate_vtx, indent+1) print_debug('backtrack edge', g_candidate_edge, ':', h_candidate_edge, id(g_candidate_edge), id(h_candidate_edge)) + return num_matches def attempt_match_vtxs(state, g_candidate_vtx, h_candidate_vtx, indent): print_debug('attempt_match_vtxs') if g_candidate_vtx in state.mapping_vtxs: if state.mapping_vtxs[g_candidate_vtx] != h_candidate_vtx: print_debug(" nope, guest already mapped (mismatch)") - return # guest vtx is already mapped but doesn't match host vtx + return 0 # guest vtx is already mapped but doesn't match host vtx if h_candidate_vtx in state.r_mapping_vtxs: if state.r_mapping_vtxs[h_candidate_vtx] != g_candidate_vtx: print_debug(" nope, host already mapped (mismatch)") - return # host vtx is already mapped but doesn't match guest vtx + return 0 # host vtx is already mapped but doesn't match guest vtx g_outdegree = len(g_candidate_vtx.outgoing) h_outdegree = len(h_candidate_vtx.outgoing) if g_outdegree > h_outdegree: print_debug(" nope, outdegree") - return + return 0 g_indegree = len(g_candidate_vtx.incoming) h_indegree = len(h_candidate_vtx.incoming) if g_indegree > h_indegree: print_debug(" nope, indegree") - return + return 0 if not self.compare_fn(g_candidate_vtx, h_candidate_vtx): print_debug(" nope, bad compare") - return + return 0 new_state = state.grow_vtx( h_candidate_vtx, g_candidate_vtx) print_debug('grow vtx', g_candidate_vtx, ':', h_candidate_vtx, id(g_candidate_vtx), id(h_candidate_vtx)) - yield from self._match(new_state, already_visited, indent+1) + num_matches = yield from self._match(new_state, already_visited, indent+1) print_debug('backtrack vtx', g_candidate_vtx, ':', h_candidate_vtx, id(g_candidate_vtx), id(h_candidate_vtx)) + return num_matches print_debug('preferred...') - yield from attempt_grow('outgoing', indent+1) - yield from attempt_grow('incoming', indent+1) + num_matches += yield from attempt_grow('outgoing', indent+1) + num_matches += yield from attempt_grow('incoming', indent+1) - print_debug('least preferred...') - if state.boundary != None: - g_boundary_vtx, _ = state.boundary - guest_boundary_component = self.guest_vtx_to_component[g_boundary_vtx] - # only try guest vertices that are in a different component (all vertices in the same component are already discovered via 'attempt_grow') - guest_components_to_try = (c for i,c in enumerate(self.guest_component_to_vtxs) if i != guest_boundary_component) - # for the host vertices however, we have to try them from all components, because different connected components of our pattern (=guest) could be mapped onto the same connected component in the host - else: - guest_components_to_try = self.guest_component_to_vtxs + if num_matches == 0: + print_debug('least preferred...') + if state.boundary != None: + g_boundary_vtx, _ = state.boundary + guest_boundary_component = self.guest_vtx_to_component[g_boundary_vtx] + # only try guest vertices that are in a different component (all vertices in the same component are already discovered via 'attempt_grow') + guest_components_to_try = (c for i,c in enumerate(self.guest_component_to_vtxs) if i != guest_boundary_component) + # for the host vertices however, we have to try them from all components, because different connected components of our pattern (=guest) could be mapped onto the same connected component in the host + else: + guest_components_to_try = self.guest_component_to_vtxs - for g_component in guest_components_to_try: - # we only need to pick ONE vertex from the component - # in the future, this can be optimized further by picking the vertex of the type with the fewest instances - g_candidate_vtx = g_component[0] - if g_candidate_vtx in state.mapping_vtxs: - print_debug("skip (already matched)", g_candidate_vtx) - continue - for h_candidate_vtx in state.h_unmatched_vtxs: - yield from attempt_match_vtxs(state, g_candidate_vtx, h_candidate_vtx, indent+1) + for g_component in guest_components_to_try: + # we only need to pick ONE vertex from the component + # in the future, this can be optimized further by picking the vertex of the type with the fewest instances + g_candidate_vtx = g_component[0] + g_vtx_matches = 0 + g_vtx_max = self.guest_to_host_candidates[g_candidate_vtx] + # print(' guest vtx has', g_vtx_max, ' host candidates') + if g_candidate_vtx in state.mapping_vtxs: + print_debug("skip (already matched)", g_candidate_vtx) + continue + for h_candidate_vtx in state.h_unmatched_vtxs: + N = yield from attempt_match_vtxs(state, g_candidate_vtx, h_candidate_vtx, indent+1) + g_vtx_matches += N > 0 + num_matches += N + if g_vtx_matches == g_vtx_max: + print("EARLY STOP") + break # found all matches - if indent == 0: - print_debug('visited', len(already_visited), 'states total') + return num_matches # demo time... if __name__ == "__main__":