From 95a8076a178edcdeea34a3306a50505f46e28bfb Mon Sep 17 00:00:00 2001 From: Joeri Exelmans Date: Mon, 2 Sep 2024 15:38:30 +0200 Subject: [PATCH] Add Sten Vercammen's pattern matching library (ported to Python 3, numpy dependency replaced by standard library) --- pattern_matching/enum.py | 31 + pattern_matching/generator.py | 202 ++++++ pattern_matching/graph.py | 157 +++++ pattern_matching/graphToDot.py | 44 ++ pattern_matching/main.py | 88 +++ pattern_matching/patternMatching.py | 947 ++++++++++++++++++++++++++++ pattern_matching/planGraph.py | 528 ++++++++++++++++ pattern_matching/run.sh | 8 + pattern_matching/searchGraph.py | 115 ++++ 9 files changed, 2120 insertions(+) create mode 100644 pattern_matching/enum.py create mode 100644 pattern_matching/generator.py create mode 100644 pattern_matching/graph.py create mode 100644 pattern_matching/graphToDot.py create mode 100644 pattern_matching/main.py create mode 100644 pattern_matching/patternMatching.py create mode 100644 pattern_matching/planGraph.py create mode 100755 pattern_matching/run.sh create mode 100644 pattern_matching/searchGraph.py diff --git a/pattern_matching/enum.py b/pattern_matching/enum.py new file mode 100644 index 0000000..afde1bb --- /dev/null +++ b/pattern_matching/enum.py @@ -0,0 +1,31 @@ +# coding: utf-8 + +""" +Author: Sten Vercamman + Univeristy of Antwerp + +Example code for paper: Efficient model transformations for novices +url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen + +The main goal of this code is to give an overview, and an understandable +implementation, of known techniques for pattern matching and solving the +sub-graph homomorphism problem. The presented techniques do not include +performance adaptations/optimizations. It is not optimized to be efficient +but rather for the ease of understanding the workings of the algorithms. +The paper does list some possible extensions/optimizations. + +It is intended as a guideline, even for novices, and provides an in-depth look +at the workings behind various techniques for efficient pattern matching. +""" + +class Enum(object): + """ + Custom Enum object for compatibility (enum is introduced in python 3.4) + Usage create : a = Enum(['e0', 'e1', ...]) + Usage call : a.e0 + """ + def __init__(self, args): + next = 0 + for arg in args: + self.__dict__[arg] = next + next += 1 \ No newline at end of file diff --git a/pattern_matching/generator.py b/pattern_matching/generator.py new file mode 100644 index 0000000..ba60b18 --- /dev/null +++ b/pattern_matching/generator.py @@ -0,0 +1,202 @@ +# coding: utf-8 + +""" +Author: Sten Vercamman + Univeristy of Antwerp + +Example code for paper: Efficient model transformations for novices +url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen + +The main goal of this code is to give an overview, and an understandable +implementation, of known techniques for pattern matching and solving the +sub-graph homomorphism problem. The presented techniques do not include +performance adaptations/optimizations. It is not optimized to be efficient +but rather for the ease of understanding the workings of the algorithms. +The paper does list some possible extensions/optimizations. + +It is intended as a guideline, even for novices, and provides an in-depth look +at the workings behind various techniques for efficient pattern matching. +""" + +import graph +# import numpy as np +import math +import collections +import random + +class GraphGenerator(object): + """ + Generates a random Graph with dv an array containing all vertices (there type), + de an array containing all edges (their type) and dc_inc an array representing + the incoming edges (analogue for dc_out) + """ + def __init__(self, dv, de, dc_inc, dc_out, debug=False): + if len(de) != len(dc_inc): + raise ValueError('de and dc_inc should be the same length.') + if len(de) != len(dc_out): + raise ValueError('de and dc_out should be the same length.') + + self.dv = dv + self.de = de + self.dc_inc = dc_inc + self.dc_out = dc_out + + # print for debugging, so you know the used values + if debug: + print('dv') + print('[',','.join(map(str,dv)),']') + print('_____') + print('de') + print('[',','.join(map(str,de)),']') + print('_____') + print('dc_inc') + print('[',','.join(map(str,dc_inc)),']') + print('_____') + print('dc_out') + print('[',','.join(map(str,dc_out)),']') + print('_____') + + self.graph = graph.Graph() + self.vertices = [] + # create all the vertices: + for v_type in self.dv: + # v_type represents the type of the vertex + self.vertices.append(self.graph.addCreateVertex('v' + str(v_type))) + + index = 0 + # create all edges + for e_type in self.de: + # e_type represents the type of the edge + src = self.vertices[self.dc_out[index]] # get src vertex + tgt = self.vertices[self.dc_inc[index]] # get tgt vertex + self.graph.addCreateEdge(src, tgt, 'e' + str(e_type)) # create edge + index += 1 + + def getRandomGraph(self): + return self.graph + + def getRandomPattern(self, max_nr_of_v, max_nr_of_e, start=0, debug=False): + # create pattern + pattern = graph.Graph() + + # map from graph to new pattern + graph_to_pattern = {} + + # map of possible edges + # we don't need a dict, but python v2.7 does not have an OrderedSet + possible_edges = collections.OrderedDict() + + # set of chosen edges + chosen_edges = set() + + # start node from graph + g_node = self.vertices[start] + p_node = pattern.addCreateVertex(g_node.type) + # for debuging, print the order in which the pattern gets created and + # connects it edges + if debug: + print('v'+str(id(p_node))+'=pattern.addCreateVertex('+"'"+str(g_node.type)+"'"+')') + # save corrolation + graph_to_pattern[g_node] = p_node + + def insertAllEdges(edges, possible_edges, chosen_edges): + for edge in edges: + # if we did not chose the edge + if edge not in chosen_edges: + # if inc_edge not in possible edges, add it with value 1 + possible_edges[edge] = None + + def insertEdges(g_vertex, possible_edges, chosen_edges): + insertAllEdges(g_vertex.incoming_edges, possible_edges, chosen_edges) + insertAllEdges(g_vertex.outgoing_edges, possible_edges, chosen_edges) + + insertEdges(g_node, possible_edges, chosen_edges) + + while max_nr_of_v > len(graph_to_pattern) and max_nr_of_e > len(chosen_edges): + candidate = None + if len(possible_edges) == 0: + break + # get a random number between 0 and len(possible_edges) + # We us a triangular distribution to approximate the fact that + # the first element is the longest in the possible_edges and + # already had the post chance of beeing choosen. + # (The approximation is because the first few ellements where + # added in the same itteration, but doing this exact is + # computationally expensive.) + if len(possible_edges) == 1: + randie = 0 + else: + randie = int(round(random.triangular(1, len(possible_edges), len(possible_edges)))) - 1 + candidate = list(possible_edges.keys())[randie] + del possible_edges[candidate] + chosen_edges.add(candidate) + + src = graph_to_pattern.get(candidate.src) + tgt = graph_to_pattern.get(candidate.tgt) + src_is_new = True + if src != None and tgt != None: + # create edge between source and target + pattern.addCreateEdge(src, tgt, candidate.type) + if debug: + print('pattern.addCreateEdge('+'v'+str(id(src))+', '+'v'+str(id(tgt))+', '+"'"+str(candidate.type)+"'"+')') + # skip adding new edges + continue + elif src == None: + # create pattern vertex + src = pattern.addCreateVertex(candidate.src.type) + if debug: + print('v'+str(id(src))+'=pattern.addCreateVertex('+"'"+str(candidate.src.type)+"'"+')') + # map newly created pattern vertex + graph_to_pattern[candidate.src] = src + # create edge between source and target + pattern.addCreateEdge(src, tgt, candidate.type) + if debug: + print('pattern.addCreateEdge('+'v'+str(id(src))+', '+'v'+str(id(tgt))+', '+"'"+str(candidate.type)+"'"+')') + elif tgt == None: + src_is_new = False + # create pattern vertex + tgt = pattern.addCreateVertex(candidate.tgt.type) + if debug: + print('v'+str(id(tgt))+'=pattern.addCreateVertex('+"'"+str(candidate.tgt.type)+"'"+')') + # map newly created pattern vertex + graph_to_pattern[candidate.tgt] = tgt + # create edge between source and target + pattern.addCreateEdge(src, tgt, candidate.type) + if debug: + print('pattern.addCreateEdge('+'v'+str(id(src))+', '+'v'+str(id(tgt))+', '+"'"+str(candidate.type)+"'"+')') + else: + raise RuntimeError('Bug: src or tgt of edge should be in out pattern') + + # select the vertex from the chosen edge that was not yet part of the pattern + if src_is_new: + new_vertex = candidate.src + else: + new_vertex = candidate.tgt + # insert all edges from the new vertex + insertEdges(new_vertex, possible_edges, chosen_edges) + + return pattern + + def createConstantPattern(): + """ + Use this to create the same pattern over and over again. + """ + # create pattern + pattern = graph.Graph() + + + # copy and paste printed pattern from debug output or create a pattern + # below the following line: + # ---------------------------------------------------------------------- + v4447242448=pattern.addCreateVertex('v4') + v4457323088=pattern.addCreateVertex('v6') + pattern.addCreateEdge(v4447242448, v4457323088, 'e4') + v4457323216=pattern.addCreateVertex('v8') + pattern.addCreateEdge(v4457323216, v4447242448, 'e4') + v4457323344=pattern.addCreateVertex('v7') + pattern.addCreateEdge(v4457323216, v4457323344, 'e3') + v4457323472=pattern.addCreateVertex('v7') + pattern.addCreateEdge(v4457323344, v4457323472, 'e1') + + # ---------------------------------------------------------------------- + return pattern diff --git a/pattern_matching/graph.py b/pattern_matching/graph.py new file mode 100644 index 0000000..c9f62cc --- /dev/null +++ b/pattern_matching/graph.py @@ -0,0 +1,157 @@ +# coding: utf-8 + +""" +Author: Sten Vercamman + Univeristy of Antwerp + +Example code for paper: Efficient model transformations for novices +url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen + +The main goal of this code is to give an overview, and an understandable +implementation, of known techniques for pattern matching and solving the +sub-graph homomorphism problem. The presented techniques do not include +performance adaptations/optimizations. It is not optimized to be efficient +but rather for the ease of understanding the workings of the algorithms. +The paper does list some possible extensions/optimizations. + +It is intended as a guideline, even for novices, and provides an in-depth look +at the workings behind various techniques for efficient pattern matching. +""" + +class Properties(object): + """ + Holds all Properties. + """ + def __init__(self): + # member variables: + self.properties = {} + + def addProperty(self, name, value): + """ + Adds property (overrides if name already exists). + """ + self.properties[name] = value + + def getProperty(self, name): + """ + Returns property with given name or None if not found. + """ + return self.properties.get(name) + +class Edge(Properties): + """ + Describes an Edge with source and target Node. + The Edge can have several properties, like a name, a weight, etc... + """ + def __init__(self, src, tgt, str_type=None): + # Call parent class constructor + Properties.__init__(self) + # member variables: + self.src = src + self.tgt = tgt + self.type = str_type + +class Vertex(Properties): + """ + Describes a Vertex with incoming, outgoing and undirected (both ways) edges. + The vertex can have several properties, like a name, a weight, etc... + """ + def __init__(self, str_type): + # Call parent class constructor + Properties.__init__(self) + # member variables: + self.incoming_edges = set() # undirected edges should be stored both in + self.outgoing_edges = set() # incoming and outgoing edges + self.type = str_type + + def addIncomingEdge(self, edge): + """ + Adds an incoming Edge. + """ + if not isinstance(edge, Edge): + raise TypeError('addIncomingEdge without it being an edge') + self.incoming_edges.add(edge) + + def addOutgoingEdge(self, edge): + """ + Adds an outgoing Edge. + """ + if not isinstance(edge, Edge): + raise TypeError('addOutgoingEdge without it being an edge') + self.outgoing_edges.add(edge) + + def addUndirectedEdge(self, edge): + """ + Adds an undirected (or bi-directed) Edge. + """ + self.addIncomingEdge(edge) + self.addOutgoingEdge(edge) + +class Graph(object): + """ + Holds a Graph. + """ + def __init__(self): + # member variables: + # redundant type keeping, "needed" for fast iterating over specific type + self.vertices = {} # {type, set(v1, v2, ...)} + self.edges = {} # {type, set(e1, e2, ...)} + + def addCreateVertex(self, str_type): + """ + Creates a Vertex of str_type, stores it and returs it + (so that properties can be added to it). + """ + vertex = Vertex(str_type) + self.addVertex(vertex) + return vertex + + def addVertex(self, vertex): + """ + Stores a Vertex into the Graph. + """ + if not isinstance(vertex, Vertex): + raise TypeError('addVertex expects a Vertex') + # add vertex, but it first creates a new set for the vertex type + # if the type does not exist in the dictionary + self.vertices.setdefault(vertex.type, set()).add(vertex) + + def getVerticesOfType(self, str_type): + """ + Returns all vertices of a specific type, + Return [] if there are no vertices with the given type + """ + return self.vertices.get(str_type, []) + + def getEdgesOfType(self, str_type): + """ + Returns all edges of a specific type, + Return [] if there are no edges with the given type + """ + return self.edges.get(str_type, []) + + def addCreateEdge(self, src, tgt, str_type): + """ + Creates edge of str_type from src to tgt, and returns it, + so that properties can be added to the edge. + """ + if not isinstance(src, Vertex): + raise TypeError('addCreateEdge: src is not a Vertex') + if not isinstance(tgt, Vertex): + raise TypeError('addCreateEdge: tgt is not a Vertex') + edge = Edge(src, tgt, str_type) + # link vertices connected to this edge + edge.src.addOutgoingEdge(edge) + edge.tgt.addIncomingEdge(edge) + self.addEdge(edge) + return edge + + def addEdge(self, edge): + """ + Stores an Edge into the Graph. + """ + if not isinstance(edge, Edge): + raise TypeError('addEdge expects an Edge') + # add edge, but it first creates a new set for the edge type + # if the type does not exist in the dictionary + self.edges.setdefault(edge.type, set()).add(edge) \ No newline at end of file diff --git a/pattern_matching/graphToDot.py b/pattern_matching/graphToDot.py new file mode 100644 index 0000000..506a18f --- /dev/null +++ b/pattern_matching/graphToDot.py @@ -0,0 +1,44 @@ +# coding: utf-8 + +""" +Author: Sten Vercamman + Univeristy of Antwerp + +Example code for paper: Efficient model transformations for novices +url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen + +The main goal of this code is to give an overview, and an understandable +implementation, of known techniques for pattern matching and solving the +sub-graph homomorphism problem. The presented techniques do not include +performance adaptations/optimizations. It is not optimized to be efficient +but rather for the ease of understanding the workings of the algorithms. +The paper does list some possible extensions/optimizations. + +It is intended as a guideline, even for novices, and provides an in-depth look +at the workings behind various techniques for efficient pattern matching. +""" + +import graph as mg + +def printGraph(fileName, graph, matched_v={}, matched_e={}): + if not isinstance(graph, mg.Graph): + raise TypeError('Can only print Graph Graphs') + + with open(fileName, 'w') as f: + f.write('digraph randomGraph {\n\n') + for str_type, plan_vertices in graph.vertices.items(): + for plan_vertex in plan_vertices: + vertex_str = str(id(plan_vertex)) + ' [label="'+str(str_type)+'"' + if plan_vertex in list(matched_v.values()): + vertex_str += ', style=dashed, style=filled]\n' + else: + vertex_str += ']\n' + f.write(vertex_str) + for out_edge in plan_vertex.outgoing_edges: + edge_str = str(id(plan_vertex)) + ' -> ' + str(id(out_edge.tgt)) + ' [label="'+str(out_edge.type)+'"' + if out_edge in list(matched_e.values()): + edge_str += ', style=dashed, penwidth = 4]\n' + else: + edge_str += ']\n' + f.write(edge_str) + f.write('\n}') \ No newline at end of file diff --git a/pattern_matching/main.py b/pattern_matching/main.py new file mode 100644 index 0000000..4807bb7 --- /dev/null +++ b/pattern_matching/main.py @@ -0,0 +1,88 @@ +# coding: utf-8 + +""" +Author: Sten Vercamman + Univeristy of Antwerp + +Example code for paper: Efficient model transformations for novices +url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen + +The main goal of this code is to give an overview, and an understandable +implementation, of known techniques for pattern matching and solving the +sub-graph homomorphism problem. The presented techniques do not include +performance adaptations/optimizations. It is not optimized to be efficient +but rather for the ease of understanding the workings of the algorithms. +The paper does list some possible extensions/optimizations. + +It is intended as a guideline, even for novices, and provides an in-depth look +at the workings behind various techniques for efficient pattern matching. +""" + +from generator import * +from patternMatching import * + +import graphToDot + +import random + +debug = False + +if __name__ == '__main__': + """ + The main function called when running from the command line. + """ + nr_of_vertices = 50 + nr_of_diff_types_v = 10 + nr_of_edges = 150 + nr_of_diff_types_e = 10 + + dv = [random.randint(0, nr_of_diff_types_v) for _ in range(nr_of_vertices)] + # dv = np.random.random_integers(0, nr_of_diff_types_v, nr_of_vertices) + de = [random.randint(0, nr_of_diff_types_e) for _ in range(nr_of_edges)] + # de = np.random.random_integers(0, nr_of_diff_types_e, nr_of_edges) + dc_inc = [random.randint(0, nr_of_vertices-1) for _ in range(nr_of_edges)] + # dc_inc = np.random.random_integers(0, nr_of_vertices-1, nr_of_edges) + dc_out = [random.randint(0, nr_of_vertices-1) for _ in range(nr_of_edges)] + # dc_out = np.random.random_integers(0, nr_of_vertices-1, nr_of_edges) + + # override random graph by copy pasting output from terminal + dv = [ 10,5,4,0,8,6,8,0,4,8,5,5,7,0,10,0,5,6,10,4,0,3,0,8,2,7,5,8,1,0,2,10,0,0,1,6,8,4,7,6,4,2,10,10,6,4,6,0,2,7 ] + de = [ 8,10,8,1,6,7,4,3,5,2,0,0,9,6,0,3,8,3,2,7,2,3,10,8,10,8,10,2,5,5,10,6,7,5,1,2,1,2,2,3,7,7,2,1,7,2,9,10,8,1,9,4,1,3,1,1,8,2,2,9,10,9,1,9,4,10,10,10,9,3,5,3,6,6,9,1,2,6,3,2,4,10,9,6,5,6,2,4,3,2,4,10,6,2,8,8,0,5,1,7,3,4,3,8,7,3,0,8,3,3,8,5,10,5,9,3,1,10,3,2,6,3,10,0,5,10,9,10,0,1,4,7,10,3,1,9,1,2,3,7,4,3,7,8,8,4,5,10,1,4 ] + dc_inc = [ 0,25,18,47,22,25,16,45,38,25,5,45,15,44,17,46,6,17,35,8,16,29,48,47,25,34,4,20,24,1,47,44,8,25,32,3,16,6,33,21,6,13,41,10,17,25,21,33,31,30,5,4,45,26,16,42,12,25,29,3,32,30,14,26,11,13,7,13,3,43,43,22,48,37,20,28,15,40,19,33,43,16,49,36,11,25,9,42,3,22,16,40,42,44,27,30,1,18,10,35,19,6,9,43,37,38,45,19,41,14,37,45,0,31,29,31,24,20,44,46,8,45,43,3,38,38,35,12,19,45,7,34,20,28,12,17,45,17,35,49,20,21,49,1,35,38,38,36,33,30 ] + dc_out = [ 9,2,49,49,37,33,16,21,5,46,4,15,9,6,14,22,16,33,23,21,15,31,37,23,47,3,30,26,35,9,29,21,39,32,22,43,5,9,41,30,31,30,37,33,31,34,23,22,34,26,44,36,38,33,48,5,9,34,13,7,48,41,43,26,26,7,12,6,12,28,22,8,29,22,24,27,16,4,31,41,32,15,19,20,38,0,26,18,43,46,40,17,29,14,34,14,32,17,32,47,16,45,7,4,35,22,42,11,38,2,0,29,4,38,17,44,9,23,5,10,31,17,1,11,16,5,37,27,35,32,45,16,18,1,14,4,42,24,43,31,21,38,6,34,39,46,20,1,38,47 ] + + gg = GraphGenerator(dv, de, dc_inc, dc_out, debug) + + graph = gg.getRandomGraph() + pattern = gg.getRandomPattern(5, 15, debug=debug) + + + # override random pattern by copy pasting output from terminal to create + # pattern, paste it in the createConstantPattern function in the generator.py + # pattern = gg.createConstantPattern() + + # generate here to know pattern and graph before searching it + graphToDot.printGraph('randomPattern.dot', pattern) + graphToDot.printGraph('randomGraph.dot', graph) + + + #PM = PatternMatching('naive') + #PM = PatternMatching('SP') + # PM = PatternMatching('Ullmann') + PM = PatternMatching('VF2') + v,e = PM.match(pattern, graph) + + # regenerate graph, to show matched pattern + graphToDot.printGraph('randomGraph.dot', graph, v, e) + + if debug: + print(len(v)) + print('___') + print(v) + for key, value in v.items(): + print(value.type) + print(len(e)) + print(e) + print('___') + for key, value in e.items(): + print(value.type) \ No newline at end of file diff --git a/pattern_matching/patternMatching.py b/pattern_matching/patternMatching.py new file mode 100644 index 0000000..77e2c98 --- /dev/null +++ b/pattern_matching/patternMatching.py @@ -0,0 +1,947 @@ +# coding: utf-8 + +""" +Author: Sten Vercamman + Univeristy of Antwerp + +Example code for paper: Efficient model transformations for novices +url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen + +The main goal of this code is to give an overview, and an understandable +implementation, of known techniques for pattern matching and solving the +sub-graph homomorphism problem. The presented techniques do not include +performance adaptations/optimizations. It is not optimized to be efficient +but rather for the ease of understanding the workings of the algorithms. +The paper does list some possible extensions/optimizations. + +It is intended as a guideline, even for novices, and provides an in-depth look +at the workings behind various techniques for efficient pattern matching. +""" + +from planGraph import * + +import collections +import itertools +# import numpy as np + +class PatternMatching(object): + """ + Returns an occurrence of a given pattern from the given Graph + """ + def __init__(self, matching_type='SP', optimize=True): + # store the type of matching we want to use + self.type = matching_type + self.bound_vertices = {} # saves the currently bound vertices + self.bound_edges = {} # saves the currently bound edges + self.result = None + self.previous = [] + self.optimize = optimize + + def match(self, pattern, graph): + """ + Call this function to find an occurrence of the pattern in the (host) graph. + Setting the type of matching (naive, SP, Ullmann, VF2) is done by + setting self.matching_type to its name. + """ + if not (isinstance(pattern, SearchGraph) or isinstance(pattern, Graph)): + raise TypeError('pattern must be a SearchGraph or Graph') + if not (isinstance(graph, SearchGraph) or isinstance(graph, Graph)): + raise TypeError('graph must be a SearchGraph or Graph') + + self.pattern = pattern + self.graph = graph + + if self.type == 'naive': + result = self.matchNaive(vertices=graph.vertices, edges=graph.edges) + elif self.type == 'SP': + result = self.matchSP() + elif self.type == 'Ullmann': + result = self.matchUllmann() + elif self.type == 'VF2': + result = self.matchVF2() + else: + raise ValueError('Unknown type for matching') + + # cleanup + self.pattern = None + self.graph = None + self.bound_vertices = {} + self.bound_edges = {} + self.result = None + + return result + + def matchNaive(self, pattern_vertices=None, vertices=None, edges=None): + """ + Try to find an occurrence of the pattern in the Graph naively. + """ + # allow call with specific arguments + if pattern_vertices == None: + pattern_vertices = self.pattern.vertices + if vertices == None: + vertices = self.bound_vertices + if edges == None: + edges = self.bound_edges + + def visitEdge(pattern_vertices, p_edge, inc, g_edges, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges): + """ + Visit a pattern edge, and try to bind it to a graph edge. + (If the first fails, try the second, and so on...) + """ + for g_edge in g_edges: + # only reckon the edge if its in edges and not visited + # (as the graph might be a subgraph of a more complex graph) + if g_edge not in edges.get(g_edge.type, []) or g_edge in visited_g_edges: + continue + if g_edge.type == p_edge.type and g_edge not in visited_g_edges: + visited_p_edges[p_edge] = g_edge + visited_g_edges.add(g_edge) + if inc: + p_vertex = p_edge.src + else: + p_vertex = p_edge.tgt + if visitVertices(pattern_vertices, p_vertex, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges): + return True + # remove added edges if they lead to no match, retry with others + del visited_p_edges[p_edge] + visited_g_edges.remove(g_edge) + # no edge leads to a possitive match + return False + + def visitEdges(pattern_vertices, p_edges, inc, g_edges, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges): + """ + Visit all edges of the pattern vertex (edges given as argument). + We need to try visiting them for all its permutations, as matching + v -e1-> first and v -e2-> second and v -e3-> third, might not result + in a matching an occurrence of the pattern, but matching v -e2-> + first and v -e3-> second and v -e1-> third might. + """ + def removePrevEdge(visitedEdges, visited_p_edges, visited_g_edges): + """ + Undo the binding of the brevious edge, (the current bindinds do + not lead to an occurrence of the pattern in the graph). + """ + for wrong_edge in visitedEdges: + # remove binding (pattern edge to graph edge) + wrong_g_edge = visited_p_edges.get(wrong_edge) + del visited_p_edges[wrong_edge] + # remove visited graph edge + visited_g_edges.remove(wrong_g_edge) + + for it in itertools.permutations(p_edges): + visitedEdges = [] + foundallEdges = True + for edge in it: + if visited_p_edges.get(edge) == None: + if not visitEdge(pattern_vertices, edge, inc, g_edges, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges): + # this did not work, so we have to undo all added edges + # (the current edge is not added, as it failed) + # we then can try a different permutation + removePrevEdge(visitedEdges, visited_p_edges, visited_g_edges) + foundallEdges = False + break # try other order + # add good visited (we know it succeeded) + visitedEdges.append(edge) + else: + # we visited this pattern edge, and have the coressponding graph edge + # if it is an incoming pattern edge, we need to make sure that + # the graph target that is map from the pattern target + # (of this incoming pattern edge, which has to be bound at this point) + # has the graph adge as an incoming edge, + # otherwise the graph is not properly connected + if inc: + if not visited_p_edges[edge] in visited_p_vertices[edge.tgt].incoming_edges: + # did not work + removePrevEdge(visitedEdges, visited_p_edges, visited_g_edges) + foundallEdges = False + break # try other order + else: + # analog for an outgoing edge + if not visited_p_edges[edge] in visited_p_vertices[edge.src].outgoing_edges: + # did not work + removePrevEdge(visitedEdges, visited_p_edges, visited_g_edges) + foundallEdges = False + break # try other order + + # all edges are good, look no further + if foundallEdges: + break + return foundallEdges + + def visitVertex(pattern_vertices, p_vertex, g_vertex, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges): + """ + Visit a pattern vertex, and try to bind it to the graph vertex + (both are given as argument). A binding is successful if all the + pattern vertex his incoming and outgoing edges can be bound + (to the graph vertex). + """ + if g_vertex in visited_g_vertices: + return False + # save visited graph vertex + visited_g_vertices.add(g_vertex) + # map pattern vertex to visited graph vertex + visited_p_vertices[p_vertex] = g_vertex + + if visitEdges(pattern_vertices, p_vertex.incoming_edges, True, g_vertex.incoming_edges, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges): + if visitEdges(pattern_vertices, p_vertex.outgoing_edges, False, g_vertex.outgoing_edges, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges): + return True + # cleanup, remove from visited as this does not lead to + # an occurrence of the pttern in the graph + visited_g_vertices.remove(g_vertex) + del visited_p_vertices[p_vertex] + return False + + def visitVertices(pattern_vertices, p_vertex, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges): + """ + Visit a pattern vertex and try to bind a graph vertex to it. + """ + # if already matched or if it is a vertex not in the pattern_vertices + # (second is for when you want to match the pattern partionally) + if visited_p_vertices.get(p_vertex) != None or p_vertex not in pattern_vertices.get(p_vertex.type, set()): + return True + + # try visiting graph vertices of same type as pattern vertex + for g_vertex in vertices.get(p_vertex.type, []): + if g_vertex not in visited_g_vertices: + if visitVertex(pattern_vertices, p_vertex, g_vertex, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges): + return True + + return False + + visited_p_vertices = {} + visited_p_edges = {} + visited_g_vertices = set() + visited_g_edges = set() + + # for loop is need for when pattern consists of multiple not connected structures + allVertices = [] + for _, p_vertices in pattern_vertices.items(): + allVertices.extend(p_vertices) + foundIt = False + for it_p_vertices in itertools.permutations(allVertices): + foundIt = True + for p_vertex in it_p_vertices: + if not visitVertices(pattern_vertices, p_vertex, visited_p_vertices, visited_p_edges, visited_g_vertices, visited_g_edges, vertices, edges): + foundIt = False + # reset visited + visited_p_vertices = {} + visited_p_edges = {} + visited_g_vertices = set() + visited_g_edges = set() + break + if foundIt: + break + if foundIt: + return (visited_p_vertices, visited_p_edges) + else: + return None + + def matchSP(self): + """ + Find an occurrence of the pattern in the Graph + by using the generated SearchPlan. + """ + if isinstance(self.graph, Graph): + sg = SearchGraph(self.graph) + elif isinstance(self.graph, SearchGraph): + sg = self.graph + else: + raise TypeError('Pattern matching with a SearchPlan must be given a Graph or SearchGraph') + + pg = PlanGraph(self.pattern) + SP = pg.Edmonds(sg) + + self.fileIndex = 0 + + def propConnected(): + """ + Checks if the found vertices and edges can be uniquely matched + onto the pattern graph. + """ + self.result = self.matchNaive() + return self.result != None + + def matchOP(elem, bound, ops, index): + """ + Execute a primitive operation, return whether ot not it succeeded. + """ + type_bound = bound.setdefault(elem.type, set()) + # if elem not yet bound, bind it, and try matching the next operations + if elem not in type_bound: + type_bound.add(elem) + # if matching of next operation failed, try with a different elem + if matchAllOP(ops, index+1): + return True + else: + type_bound.remove(elem) + return False + + def matchAllOP(ops, index=0): + """ + Try to match an occurrence of the pattern in the graph, + by recursivly ,atching elements that adhere to the SearchPlan + """ + # if we matched all elements, + # check if the bound elements are properly connected + if index == len(ops): + return propConnected() + + op = ops[index] + + if op[0] == PRIM_OP.lkp: # lkp(elem) + if op[2]: # lookup a vertex + # If the graph does not have a vertex of the same vertex + # type, we'll have to return False, happens if elems == []. + elems = self.graph.vertices.get(op[1], []) + bound = self.bound_vertices + else: # loopup an edge + # If the graph does not have an edge of the same edge + # type, we'll have to return False, happens if elems == []. + elems = self.graph.edges.get(op[1], []) + bound = self.bound_edges + + # if elems == [], we'll skip the loop and return False + for elem in elems: + if matchOP(elem, bound, ops, index): + return True + # if all not bound elems fails, backtrack + return False + + elif op[0] == PRIM_OP.src: # src(e): bind src of a bound edge e + # Should always succeed, as the edge must be already bound + # (there should be at least one elem in self.bound_edges[op[1]]). + for edge in self.bound_edges[op[1]]: + if matchOP(edge.src, self.bound_vertices, ops, index): + return True + # if all not bound elems fails, backtrack + return False + + elif op[0] == PRIM_OP.tgt: # tgt(e): bind tgt of a bound edge e + # Should always succeed, as the edge must be already bound + # (there should be at least one elem in self.bound_edges[op[1]]). + for edge in self.bound_edges[op[1]]: + if matchOP(edge.tgt, self.bound_vertices, ops, index): + return True + # if all not bound elems fails, backtrack + return False + + elif op[0] == PRIM_OP.inc: # in(v, e): bind incoming edge e of a bound vertex v + # It's possible we will try to find a vertex of a certain type + # in the bound_vertices which should be bound implicitly + # (by a src/tgt op), that is not bound. Happens when implicit + # binding bounded a "wrong" vertex. We then need to return False + # (happens by skiping for loop by looping over []) + for vertex in self.bound_vertices.get(op[1], []): + for edge in vertex.incoming_edges: + if edge.type == op[2]: + if matchOP(edge, self.bound_edges, ops, index): + return True + # if all not bound elems fails, backtrack + return False + + elif op[0] == PRIM_OP.out: # out(v, e): bind outgoing edge e of a bound vertex v + # Return False if we expect an element to be bound that is not + # bound (for the same reason as the inc op). + for vertex in self.bound_vertices.get(op[1], []): + for edge in vertex.outgoing_edges: + if edge.type == op[2]: + if matchOP(edge, self.bound_edges, ops, index): + return True + # if all not bound elems fails, backtrack + return False + else: + raise TypeError('Unknown PRIM_OP type') + + # try and match all (primitive) operations from the SearchPlan + matchAllOP(SP) + + # Either nothing is found, or we found an occurrence, + # it is impossble to have a partionally matched occurrence + for key, bound_elems in self.bound_vertices.items(): + if len(bound_elems) == 0: + # The pattern does not exist in the Graph + return None + else: + # We found a pattern + return self.result + + + def createAdjacencyMatrixMap(self, graph): + """ + Return adjacency matrix and the order of the vertices. + """ + matrix = collections.OrderedDict() # { vertex, (index, [has edge from index to pos?]) } + + # contains all vertices we'll use for the AdjacencyMatrix + allVertices = [] + + if self.optimize: + # insert only the vertices from the graph which have a type + # that is present in the pattern + for vertex_type, _ in self.pattern.vertices.items(): + graph_vertices = graph.vertices.get(vertex_type) + if graph_vertices != None: + allVertices.extend(graph_vertices) + else: + # we will not be able to find the pattern + # as the pattern contains a vertex of a certain type + # that is not present in the host graph + return False + else: + # insert all vertices from the graph + for _, vertices in graph.vertices.items(): + allVertices.extend(vertices) + + # create squared zero matrix + index = 0 + for vertex in allVertices: + matrix[vertex] = (index, [False] * len(allVertices)) + index += 1 + + for _, edges in graph.edges.items(): + for edge in edges: + if self.optimize: + if edge.tgt not in matrix or edge.src not in matrix: + # skip adding edge if the target or source type + # is not present in the pattern + # (and therefor not added to the matrix) + continue + index = matrix[edge.tgt][0] + matrix[edge.src][1][index] = True + + AM = [] + vertices_order = [] + for vertex, row in matrix.items(): + AM.append(row[1]) + vertices_order.append(vertex) + + return AM, vertices_order + + def matchUllmann(self): + """ + Find an occurrence of the pattern in the Graph + by using Ullmann for solving the Constraint Satisfaction Problem (CSP). + """ + + def createM_star(h, p): + """ + Create M*[v, w] = 1 if deg(v) <= deg(w), for v in V_P, w in V_H + = 0 otherwise + + M and P are given to ensure corect order. + """ + m = [] # [[..], ...] + for p_vertex in p: + row = [] + for g_vertex in h: + # for the degree function, we choose to look at the + # outgoing edges AND the incoming edges + # (one might prefer to use only one of them) + if self.optimize: + # also check if type matches + if p_vertex.type != g_vertex.type: + row.append(False) + continue + row.append( len(p_vertex.incoming_edges) <= + len(g_vertex.incoming_edges) and + len(p_vertex.outgoing_edges) <= + len(g_vertex.outgoing_edges)) + m.append(row) + + return m + + def createDecreasingOrder(h): + """ + It turns out that the more edges a vertex has, the sooner it will + fail in matching the pattern. For efficiency reasons, we want it + to fail as fast as possible. + """ + order = [] # [(value, index), ...] + index = 0 + for g_vertex in h: + order.append(( len(g_vertex.outgoing_edges) + + len(g_vertex.outgoing_edges), index)) + index += 1 + + order.sort(key = lambda elem: elem[0]) + # sort and only return the indices (which specify the order) + return [index for (_, index) in order] + + def propConnected(M, H, P, h, p): + """ + Checks if the vertices represented in M are isomorphic to P and if + they can be matched onto the pattern graph. + """ + print(M, H, P, h, p) + # P_candi = np.dot(M, np.transpose(np.dot(M, H))) + + + """ + # If we do not aply the refineM function, we will want to check if + # this succeeds, as it checks for isomorphism. + # If we apply the refineM function, it is garanteed to be isomorphic. + + index_column = 0 + for row in P_candi: + index_row = 0 + for item in row: + # for all i,j: P[i, j] = 1 : M(MH)^T [j, i] = 1 + # (not the other way around) + # (return False when item is 0 and P[i,j] is 1) + if item < P[index_row][index_column]: + return False + index_row += 1 + index_column += 1 + """ + + vertices = {} + index_column = 0 + for row in M: + index_row = 0 + for item in row: + # there should only be one item per row + if item: + vertex = h[index_row] + vertices.setdefault(vertex.type, set()).add(vertex) + break + index_row += 1 + index_column += 1 + + self.result = self.matchNaive(vertices=vertices, edges=self.graph.edges) + return self.result != None + + def refineM(M, H, P, h, pp): + """ + Refine M, for every vertex from the pattern, check if each possible + matching (candidate) his neighbours can also be matched. (M's column + represents vertices from P, and the row represents its candidate.) + If this is not possible set M[i,j] to false, refining/reducing the + search space. + """ + any_changes=True + while any_changes: + any_changes = False + # for all vertices from the pattern + for i in range(0, len(P)): # P is a nxn-matrix + # for all its possible assignments + for j in range(0, len(H[0])): + # if bound vertex of P, check if all neigbours are matchable + if M[i][j]: + # for all the pattern his neighbours + for k in range(0, len(P)): + # if it is a neighbour (from outgoing edges) + if P[i][k]: + match = False + for p in range(0, len(H[0])): + # check if we can match a candidate neighbour + # (from M* to to the graph (H)) + if M[k][p] and H[j][p]: + if self.optimize: + # also check correct type + if pp[k].type != h[p].type: + continue + match = True + break + if not match: + M[i][j] = False + any_changes = True + + # if it is a neighbour (from incoming edges) + if P[k][i]: + match = False + for p in range(0, len(H[0])): + # check if we can match a candidate neighbour + # (from M* to to the graph (H)) + if M[k][p] and H[p][j]: + if self.optimize: + # also check correct type + if pp[i].type != h[j].type: + continue + match = True + break + if not match: + M[i][j] = False + any_changes = True + + def findM(M_star, M, order, H, P, h, p, index_M=0): + """ + Find an isomorphic mapping for the vertices of P to H. + This mapping is represented by a matrix M if, + and only if M(MH)^T = P^T. + """ + # We are at the end, we found an candidate. + # Remember that we are at the end, bu first check if there is + # a row with ony False, if so, we do not need to check if it is + # properly connected. + check_prop = False + if index_M == len(M): + check_prop = True + index_M -= 1 + + # we need to refer to this row + old_row = M_star[index_M] + # previous rows (these are sparse, 1 per row, save only its position) + prev_pos = [] + for i in range(0, index_M): + row = M[i] + only_false = True + for j in range(0, len(old_row)): + if row[j]: + only_false = False + prev_pos.append(j) + break + if only_false: + # check if a row with only False occurs, + # if so, we will not find an occurence + return False + + # We are at the end, we found an candidate. + if check_prop: + index_M += 1 + return propConnected(M, H, P, h, p) + + M[index_M] = [False] * len(old_row) + index_order = 0 + for index_order in range(0, len(order)): + index_row = order[index_order] + # put previous True back on False + if index_order > 0: + M[index_M][order[index_order - 1]] = False + + if old_row[index_row]: + M[index_M][index_row] = True + + findMPart = True + # 1 0 0 Assume 3th round, and we select x, + # 0 1 0 no element at the same possition in the row, + # 0 x 0 of the elements above itselve in the same + # column may be 1. In the example it is, then try + # selecting an other element. + for index_column in range(0, index_M): + if M[index_column][index_row]: + findMPart = False + break + + if not findMPart: + continue + + refineM(M, H, P, h, p) + + if findM(M_star, M, order, H, P, h, p, index_M + 1): + return True + + # reset previous rows their True's + prev_row = 0 + for pos in prev_pos: + M[prev_row][pos] = True + prev_row += 1 + # reset rows below current row + for index_column in range(index_M + 1, len(M)): + # deep copy, we do not want to just copy pointer to array/list + M[index_column] = M_star[index_column][:] + + # reset current row (the rest is already reset) + M[index_M] = M_star[index_M][:] + + return False + + # create adjecency matrix of the graph + H, h = self.createAdjacencyMatrixMap(self.graph) + # create adjecency matrix of the pattern + P, p = self.createAdjacencyMatrixMap(self.pattern) + # create M* binary matrix + M_star = createM_star(h, p) + + # create the order we will use later on + order = createDecreasingOrder(h) + # deepcopy M_s into M + M = [row[:] for row in M_star] + + if self.optimize: + refineM(M, H, P, h, p) + + findM(M_star, M, order, H, P, h, p) + + return self.result + + + def matchVF2(self): + + class VF2_Obj(object): + """ + Structor for keeping the VF2 data. + """ + def __init__(self, len_graph_vertices, len_pattern_vertices): + # represents if n-the element (h[n] or p[n]) matched + self.core_graph = [False]*len_graph_vertices + self.core_pattern = [False]*len_pattern_vertices + + # save mapping from pattern to graph + self.mapping = {} + + # preference lvl 1 + # ordered set of vertices adjecent to M_graph connected via an outgoing edge + self.N_out_graph = [-1]*len_graph_vertices + # ordered set of vertices adjecent to M_pattern connected via an outgoing edge + self.N_out_pattern = [-1]*len_pattern_vertices + + # preference lvl 2 + # ordered set of vertices adjecent to M_graph connected via an incoming edge + self.N_inc_graph = [-1]*len_graph_vertices + # ordered set of vertices adjecent to M_pattern connected via an incoming edge + self.N_inc_pattern = [-1]*len_pattern_vertices + + # preference lvl 3 + # not in the above + + def findM(H, P, h, p, VF2_obj, index_M=0): + """ + Find an isomorphic mapping for the vertices of P to H. + This mapping is represented by a matrix M if, + and only if M(MH)^T = P^T. + + This operates in a simular way as Ullmann. Ullmann has a predefind + order for matching (sorted on most edges first). VF2's order is to + first try to match the adjacency vertices connected via outgoing + edges, then thos connected via incoming edges and then those that + not connected to the currently mathed vertices. + """ + def addOutNeighbours(neighbours, N, index_M): + """ + Given outgoing neighbours (a row from an adjacency matrix), + label them as added by saving when they got added (index_M + represents this, otherwise it is -1) + """ + for neighbour_index in range(0, len(neighbours)): + if neighbours[neighbour_index]: + if N[neighbour_index] == -1: + N[neighbour_index] = index_M + + def addIncNeighbours(G, j, N, index_M): + """ + Given the adjacency matrix, and the colum j, representing that + we want to add the incoming edges to vertex j, + label them as added by saving when they got added (index_M + represents this, otherwise it is -1) + """ + for i in range(0, len(G)): + if G[i][j]: + if N[i] == -1: + N[i] = index_M + + def delNeighbours(N, index_M): + """ + Remove neighbours that where added at index_M. + If we call this function, we are backtracking and we want to + remove the added neighbours from the just tried matching (n, m) + pair (whiched failed). + """ + for n in range(0, len(N)): + if N[n] == index_M: + N[n] = -1 + + def feasibilityTest(H, P, h, p, VF2_obj, n, m): + """ + Examine all the nodes connected to n and m; if such nodes are + in the current partial mapping, check if each branch from or to + n has a corresponding branch from or to m and vice versa. + + If the nodes and the branches of the graphs being matched also + carry semantic attributes, another condition must also hold for + F(s, n, m) to be true; namely the attributes of the nodes and of + the branches being paired must be compatible. + + Another pruning step is to check if the nr of ext_edges between + the matched_vertices from the pattern and its adjecent vertices + are less than or equal to the nr of ext_edges between + matched_vertices from the graph and its adjecent vertices. + + And if the nr of ext_edges between those adjecent vertices from + the pattern and the not connected vertices are less than or + equal to the nr of ext_edges between those adjecent vertices from + the graph and its adjecent vertices. + """ + # Get all neighbours from graph node n and pattern node m + # (including n and m) + neighbours_graph = {} + neighbours_graph[h[n].type] = set([h[n]]) + + neighbours_pattern = {} + neighbours_pattern[p[m].type] = set([p[m]]) + + # add all neihgbours of pattern vertex m + for i in range(0, len(P)): # P is a nxn-matrix + if (P[m][i] or P[i][m]) and VF2_obj.core_pattern[i]: + neighbours_pattern.setdefault(p[i].type, set()).add(p[i]) + + # add all neihgbours of graph vertex n + for i in range(0, len(H)): # P is a nxn-matrix + if (H[n][i] or H[i][n]) and VF2_obj.core_graph[i]: + neighbours_graph.setdefault(h[i].type, set()).add(h[i]) + + # take a coding shortcut, + # use self.matchNaive function to see if it is feasable. + # this way, we immidiatly test the semantic attributes + if not self.matchNaive(pattern_vertices=neighbours_pattern, vertices=neighbours_graph, edges=self.graph.edges): + return False + + # count ext_edges from core_graph to a adjecent vertices and + # cuotn ext_edges for adjecent vertices and not matched vertices + # connected via the ext_edges + ext_edges_graph_ca = 0 + ext_edges_graph_an = 0 + # for all core vertices + for x in range(0, len(VF2_obj.core_graph)): + # for all its neighbours + for y in range(0, len(H)): + if H[x][y]: + # if it is a neighbor and not yet matched + if (VF2_obj.N_out_graph[y] != -1 or VF2_obj.N_inc_graph[y] != -1) and VF2_obj.core_graph[y]: + # if we matched it + if VF2_obj.core_graph[x] != -1: + ext_edges_graph_ca += 1 + else: + ext_edges_graph_an += 1 + + # count ext_edges from core_pattern to a adjecent vertices + # connected via the ext_edges + ext_edges_pattern_ca = 0 + ext_edges_pattern_an = 0 + # for all core vertices + for x in range(0, len(VF2_obj.core_pattern)): + # for all its neighbours + for y in range(0, len(P)): + if P[x][y]: + # if it is a neighbor and not yet matched + if (VF2_obj.N_out_pattern[y] != -1 or VF2_obj.N_inc_pattern[y] != -1) and VF2_obj.core_pattern[y]: + # if we matched it + if VF2_obj.core_pattern[x] != -1: + ext_edges_pattern_ca += 1 + else: + ext_edges_pattern_an += 1 + + # The nr of ext_edges between matched_vertices from the pattern + # and its adjecent vertices must be less than or equal to the nr + # of ext_edges between matched_vertices from the graph and its + # adjecent vertices, otherwise we wont find an occurrence + if ext_edges_pattern_ca > ext_edges_graph_ca: + return False + + # The nr of ext_edges between those adjancent vertices from the + # pattern and its not connected vertices must be less than or + # equal to the nr of ext_edges between those adjacent vertices + # from the graph and its not connected vertices, + # otherwise we wont find an occurrence + if ext_edges_pattern_an > ext_edges_graph_an: + return False + + return True + + def matchPhase(H, P, h, p, index_M, VF2_obj, n, m): + """ + The matching fase of the VF2 algorithm. If the chosen n, m pair + passes the feasibilityTest, the pair gets added and we start + to search for the next matching pair. + """ + # all candidate pair (n, m) represent graph x pattern + + if feasibilityTest(H, P, h, p, VF2_obj, n, m): + # adapt VF2_obj + VF2_obj.core_graph[n] = True + VF2_obj.core_pattern[m] = True + VF2_obj.mapping[h[n]] = p[m] + addOutNeighbours(H[n], VF2_obj.N_out_graph, index_M) + addIncNeighbours(H, n, VF2_obj.N_inc_graph, index_M) + addOutNeighbours(P[m], VF2_obj.N_out_pattern, index_M) + addIncNeighbours(P, m, VF2_obj.N_inc_pattern, index_M) + + if findM(H, P, h, p, VF2_obj, index_M + 1): + return True + + # else, cleanup, adapt VF2_obj + VF2_obj.core_graph[n] = False + VF2_obj.core_pattern[m] = False + del VF2_obj.mapping[h[n]] + delNeighbours(VF2_obj.N_out_graph, index_M) + delNeighbours(VF2_obj.N_inc_graph, index_M) + delNeighbours(VF2_obj.N_out_pattern, index_M) + delNeighbours(VF2_obj.N_inc_pattern, index_M) + + return False + + def preferred(H, P, h, p, index_M, VF2_obj, N_graph, N_pattern): + """ + Try to match the adjacency vertices connected via outgoing + or incoming edges. (Depending on what is given for N_graph and + N_pattern.) + """ + for n in range(0, len(N_graph)): + # skip graph vertices that are not in VF2_obj.N_out_graph + # (or already matched) + if N_graph[n] == -1 or VF2_obj.core_graph[n]: + continue + for m in range(0, len(N_pattern)): + # skip graph vertices that are not in VF2_obj.N_out_pattern + # (or already matched) + if N_pattern[m] == -1 or VF2_obj.core_pattern[m]: + continue + if matchPhase(H, P, h, p, index_M, VF2_obj, n, m): + return True + + return False + + def leastPreferred(H, P, h, p, index_M, VF2_obj): + """ + Try to match the vertices that are not connected to the curretly + matched vertices. + """ + for n in range(0, len(VF2_obj.N_out_graph)): + # skip vertices that are connected to the graph + # (or already matched) + if not (VF2_obj.N_out_graph[n] == -1 and VF2_obj.N_inc_graph[n] == -1) or VF2_obj.core_graph[n]: + continue + for m in range(0, len(VF2_obj.N_out_pattern)): + # skip vertices that are connected to the graph + # (or already matched) + if not (VF2_obj.N_out_pattern[m] == -1 and VF2_obj.N_inc_pattern[m] == -1) or VF2_obj.core_pattern[m]: + continue + if matchPhase(H, P, h, p, index_M, VF2_obj, n, m): + return True + + return False + + # We are at the end, we found an candidate. + if index_M == len(p): + bound_graph_vertices = {} + for vertex_bound, _ in VF2_obj.mapping.items(): + bound_graph_vertices.setdefault(vertex_bound.type, set()).add(vertex_bound) + + self.result = self.matchNaive(vertices=bound_graph_vertices, edges=self.graph.edges) + return self.result != None + + # try the candidates is the preffered order + # first try the adjacent vertices connected via the outgoing edges. + if preferred(H, P, h, p, index_M, VF2_obj, VF2_obj.N_out_graph, VF2_obj.N_out_pattern): + return True + + # then try the adjacent vertices connected via the incoming edges. + if preferred(H, P, h, p, index_M, VF2_obj, VF2_obj.N_inc_graph, VF2_obj.N_inc_pattern): + return True + + # and lastly, try the vertices not connected to the currently matched vertices + if leastPreferred(H, P, h, p, index_M, VF2_obj): + return True + + return False + + + # create adjecency matrix of the graph + H, h = self.createAdjacencyMatrixMap(self.graph) + # create adjecency matrix of the pattern + P, p = self.createAdjacencyMatrixMap(self.pattern) + + VF2_obj = VF2_Obj(len(h), len(p)) + + findM(H, P, h, p, VF2_obj) + + return self.result \ No newline at end of file diff --git a/pattern_matching/planGraph.py b/pattern_matching/planGraph.py new file mode 100644 index 0000000..2742ac2 --- /dev/null +++ b/pattern_matching/planGraph.py @@ -0,0 +1,528 @@ +# coding: utf-8 + +""" +Author: Sten Vercamman + Univeristy of Antwerp + +Example code for paper: Efficient model transformations for novices +url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen + +The main goal of this code is to give an overview, and an understandable +implementation, of known techniques for pattern matching and solving the +sub-graph homomorphism problem. The presented techniques do not include +performance adaptations/optimizations. It is not optimized to be efficient +but rather for the ease of understanding the workings of the algorithms. +The paper does list some possible extensions/optimizations. + +It is intended as a guideline, even for novices, and provides an in-depth look +at the workings behind various techniques for efficient pattern matching. +""" + +from searchGraph import * +from enum import * + +# Enum for all primitive operation types +# note: inc represent primitive operation in (as in is a reserved keyword in python) +PRIM_OP = Enum(['lkp', 'inc', 'out', 'src', 'tgt']) + +class PlanGraph(object): + """ + Holds the PlanGraph for a pattern. + Can create the search plan of the pattern for a given SearchGraph. + """ + def __init__(self, pattern): + if not isinstance(pattern, Graph): + raise TypeError('PlanGraph expects the pattern to be a Graph') + # member variables: + self.vertices = [] # will not be searched in + self.edges = [] # will not be searched in + + # representation map, maps vertex from pattern to element from PlanGraph + # (no need for edges) + repr_map = {} + + # 1.1: for every vertex in the pattern graph, + # create a vertex representing the pattern element + for str_type, vertices in pattern.vertices.items(): + for vertex in vertices: + # we only need to know the type of the vertex + plan_vertex = Vertex(str_type) + # and we need to know that is was a vertex + plan_vertex.is_vertex = True + # for re-linking the edges, we'll need to map the + # vertex of the pattern to the plan_vertex + repr_map[vertex] = plan_vertex + # save created plan_vertex + self.vertices.append(plan_vertex) + # 1.2: for every edge in the pattern graph, + # create a vertex representing the pattern elemen + for str_type, edges in pattern.edges.items(): + for edge in edges: + # we only need to know the type of the edge + plan_vertex = Vertex(edge.type) + # and we need to know that is was an edge + plan_vertex.is_vertex = False + # save created plan_vertex + self.vertices.append(plan_vertex) + # 4: for every element x from the PlanGraph + # that represents an edge e in the pattern: + # 4.1: create an edge labelled tgt from x to the vertex in the PlanGraph + # representing the target vertex of e in the pattern graph, + # and a reverted edge labelled in + # 4.1.1: tgt: + plan_edge = Edge(plan_vertex, repr_map[edge.tgt]) + # backup src and tgt (Edmonds might override it) + plan_edge.orig_src = plan_edge.src + plan_edge.orig_tgt = plan_edge.tgt + plan_edge.label = PRIM_OP.tgt + # link vertices connected to this plan_edge + plan_edge.src.addOutgoingEdge(plan_edge) + plan_edge.tgt.addIncomingEdge(plan_edge) + # tgt and src cost are always 1, we use logaritmic cost, + # (=> cost = ln(1) = 0.0) so that we do not need to minimaze + # a product, but can minimize a sum + # (as ln(c1...ck) = ln(c1) + ... + ln (ck)) + plan_edge.cost = 0.0 + # backup orig cost, as Edmonds changes cost + plan_edge.orig_cost = plan_edge.cost + # save created edge + self.edges.append(plan_edge) + # 4.1.2: in: + plan_edge = Edge(repr_map[edge.tgt], plan_vertex) + # backup src and tgt (Edmonds might override it) + plan_edge.orig_src = plan_edge.src + plan_edge.orig_tgt = plan_edge.tgt + plan_edge.label = PRIM_OP.inc + # link vertices connected to this plan_edge + plan_edge.src.addOutgoingEdge(plan_edge) + plan_edge.tgt.addIncomingEdge(plan_edge) + # save created edge + self.edges.append(plan_edge) + + # 4.2: create an edge labelled src from x to the vertex in the PlanGraph + # representing the source vertex of e in the pattern graph + # and a reverted edge labelled out + # 4.2.1: src + plan_edge = Edge(plan_vertex, repr_map[edge.src]) + # backup src and tgt (Edmonds might override it) + plan_edge.orig_src = plan_edge.src + plan_edge.orig_tgt = plan_edge.tgt + plan_edge.label = PRIM_OP.src + # link vertices connected to this plan_edge + plan_edge.src.addOutgoingEdge(plan_edge) + plan_edge.tgt.addIncomingEdge(plan_edge) + # tgt and src cost are always 1, we use logaritmic cost, + # (=> cost = ln(1) = 0.0) so that we do not need to minimaze + # a product, but can minimize a sum + # (as ln(c1...ck) = ln(c1) + ... + ln (ck)) + plan_edge.cost = 0.0 + # backup orig cost, as Edmonds changes cost + plan_edge.orig_cost = plan_edge.cost + # save created edge + self.edges.append(plan_edge) + # 4.2.2: out + plan_edge = Edge(repr_map[edge.src], plan_vertex) + # backup src and tgt (Edmonds might override it) + plan_edge.orig_src = plan_edge.src + plan_edge.orig_tgt = plan_edge.tgt + plan_edge.label = PRIM_OP.out + # link vertices connected to this plan_edge + plan_edge.src.addOutgoingEdge(plan_edge) + plan_edge.tgt.addIncomingEdge(plan_edge) + # save created edge + self.edges.append(plan_edge) + # 2: create a root vertex + self.root = Vertex('root') + # don't add it to the vertices + + # 3: for each element in the PlanGraph (that is not the root vertex), + # create an edge from the root to it, and label it lkp + for vertex in self.vertices: + plan_edge = Edge(self.root, vertex) + # backup src and tgt (Edmonds might override it) + plan_edge.orig_src = plan_edge.src + plan_edge.orig_tgt = plan_edge.tgt + plan_edge.label = PRIM_OP.lkp + # link vertices connected to this plan_edge + plan_edge.src.addOutgoingEdge(plan_edge) + plan_edge.tgt.addIncomingEdge(plan_edge) + # save created edge + self.edges.append(plan_edge) + + def updatePlanCost(self, graph): + """ + returns True if sucessfully updated cost, + returns False if a type in the pattern is not in the graph. + """ + if not isinstance(graph, SearchGraph): + raise TypeError('updatePlanCost expects a SearchGraph') + # update, lkp, in and out (not src and tgt as they are constant) + + for edge in self.edges: + if edge.label == PRIM_OP.lkp: + edge.cost = graph.getCostLkp(edge.tgt.type, edge.tgt.is_vertex) + if edge.cost == None: + print('failed lkp') + return False + elif edge.label == PRIM_OP.inc: + # in(v, e), binds an incoming edge e from an already bound vertex v, + # depends on the number of incoming edges of type e for the vertex type + edge.cost = graph.getCostInc(edge.src.type, edge.tgt.type) + if edge.cost == None: + print('failed in') + return False + elif edge.label == PRIM_OP.out: + # (analogue for out(v, e)) + edge.cost = graph.getCostOut(edge.src.type, edge.tgt.type) + if edge.cost == None: + print('failed out') + return False + # else: ignore src and tgt + # backup orig cost, as Edmonds changes cost + edge.orig_cost = edge.cost + return True + + def Edmonds(self, searchGraph): + """ + Returns the minimum directed spanning tree (MDST) + for the pattern and the provided graph. + Returns None if it is impossible to find the pattern in the Graph + (vertex type of edge type from pattern not in Graph). + """ + # update the cost for the PlanGraph + if not self.updatePlanCost(searchGraph): + print('type in pattern not found in Graph (in Edmonds)') + # (returns False if a type in the pattern can not be found in the graph) + return None + # Complete Edmonds algorithm has optimization steps: + # a: remove edges entering the root + # b: merge parallel edges from same src to same tgt with mim weight + # we can ignore this as: + # a: the root does not have incoming edges + # b: the PlanGraph does not have such paralllel edges + + # 1: for each node v (other than root), find incoming edge with lowest weight + # insert those + pi_v = {} + for plan_vertex in self.vertices: + min_weight = float('infinity') + min_edge = None + for plan_edge in plan_vertex.incoming_edges: + if plan_edge.cost < min_weight: + min_weight = plan_edge.cost + min_edge = plan_edge + # save plan_vertex and it's minimum incoming edge + pi_v[plan_vertex] = min_edge + if min_edge == None: + raise RuntimeError('baka: no min_edge found') + + def getCycle(vertex, reverse_graph, visited): + """ + Walk from vertex to root, we walk in a reverse order, as each vertex + only has one incoming edge, so we walk to the source of that incoming + edge. We stop when we already visited a vertex we walked on. + In both cases we return None. + When we visit a vertex from our current path, we return that cycle, + by first removing its tail. + """ + def addToVisited(walked, visited): + for vertex in walked: + visited.add(vertex) + + walked = [] # we could only save it once, but we need order + current_path = set() # and lookup in an array is slower than in set + # we asume root is in visited (it must be in it) + while vertex not in visited: + if vertex in current_path: + # we found a cycle, the cycle however might look like a: O--, + # g f e where we first visited a, then b, c, d,... + # h d c b a k points back to d, completing a cycle, + # i j k but c b a is the tail that does not belong + # in the cycle, removing this is "easy" as we know that + # we first visited the tail, so they are the first elements + # in our walked path + for tail_part in walked: + if tail_part != vertex: + current_path.remove(tail_part) + else: + break + + addToVisited(walked, visited) + return current_path + current_path.add(vertex) + walked.append(vertex) + # by definition, an MDST only has one incoming edge per vertex + # so we follow it upwards + # vertex <--(minimal edge)-- src + vertex = reverse_graph[vertex].src + + # no cycle found (the current path let to a visited vertex) + addToVisited(walked, visited) # add walked to visited + return None + + class VertexGraph(Vertex): + """ + Acts as a super vertex, holds a subgraph (that is/was once a cyle). + Uses for Edmonds contractions step. + The incoming edges are the edges leading to the vertices in the + VertexGraph (they exclude edges from a vertex in the cycle to + another vertex in the cycle). + Analogue for outgoing edges. + """ + def __init__(self, cycle, reverseGraph): + # Call parent class constructor + str_type = '' + for vertex in cycle: + str_type += str(vertex.type) + Vertex.__init__(self, str_type) + # member variables: + self.internalMDST = {} + + minIntWeight = self.findMinIntWeight(cycle, reverseGraph) + self.updateMinExtEdge(minIntWeight, reverseGraph) + + + def findMinIntWeight(self, cycle, reverseGraph): + """ + Find the the smallest cost of the cycle his internal incoming edges. + (Also save its internalMDST (currently a cycle).) + (The VertexGraph formed by the cycle will be added to the + reverseGraph by calling findMinExtEdge.) + """ + minIntWeight = float('infinity') + + cycleEdges = [] + origTgts = [] + for cyclePart in cycle: + cycleEdges.append(reverseGraph[cyclePart]) + origTgts.append(reverseGraph[cyclePart].orig_tgt) + + for vertex in cycle: + # add incoming edges to this VertexGraph + for inc_edge in vertex.incoming_edges: + # edge from within the cycle + if inc_edge.src in cycle: + minIntWeight = min(minIntWeight, inc_edge.cost) + else: + # edge from outside the cycle + self.addIncomingEdge(inc_edge) + # add outgoing edges to this VertexGraph + for out_edge in vertex.outgoing_edges: + if out_edge.tgt not in cycle: + # edge leaves the cycle + self.addOutgoingEdge(out_edge) + # update src to this VertexGraph + out_edge.src = self + # save internal MDST + min_edge = reverseGraph[vertex] + if min_edge.src in cycle: + self.internalMDST[vertex] = min_edge + else: + raise TypeError('how is this a cycle') + + return minIntWeight + + def updateMinExtEdge(self, minIntWeight, reverseGraph): + """ + Modifies all external incoming edges their cost and finds the + minimum external incoming edge with this modified weight. + This found edge will break the cycle, update the internalMDST + from a cycle to an MDST, updates the reverseGraph to include + the vertexGraph. + """ + minExt = None + minModWeight = -float('infinity') + + # Find incoming edge from outside of the circle with minimal + # modified cost. This edge will break the cycle. + for inc_edge in self.incoming_edges: + # An incoming edge (with src from within the cycle), can be + # from a contracted part of the graph. Assume bc is a + # contracted part (VertexGraph) a, bc is a newly formed + # cycle (due to the breaking of the previous cycle bc). bc + # has at least lkp incoming edges to b and c, but we should + # not consider the lkp of c to break the cycle. + # If we want to break a, bc, select plausable edges, + # /<--\ + # a bc bc's MDST b <-- c + # \-->/ + # by looking at their original targets. + # (if cycle inc_edge.orig_tgt == external inc_edge.orig_tgt) + if reverseGraph[inc_edge.tgt].orig_tgt == inc_edge.orig_tgt: + # modify costL cost of inc_edge - + # (cost of previously choosen minimum edge to cycle vertex - minIntWeight) + inc_edge.cost -= (reverseGraph[inc_edge.tgt].cost - minIntWeight) + if minExt is None or minModWeight > inc_edge.cost: + # save better edge from outside of the cycle + minExt = inc_edge + minModWeight = inc_edge.cost + + # Example: a, b is a cycle (we know that there are no other + # incoming edges to a and/or b, as there is on;y exactly one + # incoming edge per vertex), and the arow from c to b represents + # the minExt edge. We will remove the bottem arrow (from a to b) + # /<--\ and save the minExt edge in the reverseGraph. + # a b <-- c This breaks the cycle. As the internalMDST + # \-->/ saves the intenal MDST, and currently still + # holds a cycle, we have to remove it from the internalMDST. + # We have to remove all vertex bindings of the cycle from the + # reverseGraph (as it is contracted into a single VertexGraph), + # and store the minExt edge to this VertexGraph in it. + for int_vertex, _ in self.internalMDST.items(): + del reverseGraph[int_vertex] # remove cycle from reverseGraph + + del self.internalMDST[minExt.tgt] # remove/break cycle + + for inc_edge in self.incoming_edges: + # update inc_edge's target to this VertexGraph + inc_edge.tgt = self + + # save minExt edge to this VertexGraph in the reverseGraph + reverseGraph[self] = minExt + + while True: + # 2: find all cycles: + cycles = [] + visited = set([self.root]) # root does not have incoming edges, + for vertex in list(pi_v.keys()): # it can not be part of a cycle + if vertex not in visited: # getCycle depends on root being in visited + cycle = getCycle(vertex, pi_v, visited) + if cycle != None: + cycles.append(cycle) + + # 2: if the set of edges {pi(v), v} does not contain any cycles, + # Then we found our minimum directed spanning tree + # otherwise, we'll have to resolve the cycles + if len(cycles) == 0: + break + + # 3: For each formed cycle: + # 3a: find internal incoming edge with the smallest cost + # 3b: modify the cost of each arc which enters the cycle + # 3c: replace smallert internal edge with the modified edge which has the smallest cost + for cycle in cycles: + # Breaks a cycle by: + # - contracting cycle into VertexGraph + # - finding the internal incoming edge with the smallest cost + # - modify the cost of each arc which enters the cycle + # - replacing the smallest internal edge with the modified edge which has the smallest cost + # - changing reverseGraph accordingly (removes elements from cycle, ads vertexGraph) + # (This will find a solution as the graph keeps shrinking with every cycle, + # in the worst case the same amount as there are vertices, until + # onlty the root and one vertexGraph remains) + vertexGraph = VertexGraph(cycle, pi_v) + + class SortedContainer(object): + """ + A container that keeps elemets sorted based on a given sortValue. + Elements with the same value, will be returned in the order they got inserted. + """ + def __init__(self): + # member variables: + self.keys = [] # stores key in sorted order (sorted when pop gets called) + self.sorted = {} # {key, [elems with same key]} + + def add(self, sortValue, element): + """ + Adds element with sortValue to the SortedContainer. + """ + elems = self.sorted.get(sortValue) + if elems == None: + self.sorted[sortValue] = [element] + self.keys.append(sortValue) + else: + elems.append(element) + + def pop(self): + """ + Sorts the SortedContainer, returns element with smallest sortValue. + """ + self.keys.sort() + elems = self.sorted[self.keys[0]] + elem = elems.pop() + if len(elems) == 0: + del self.sorted[self.keys[0]] + del self.keys[0] + return elem + + def empty(self): + """ + Returns whether or not the sorted container is empty. + """ + return (len(self.keys) == 0) + + def createPRIM_OP(edge, inc_cost=True): + """ + Helper function to keep argument list short, + return contracted data for a PRIM_OP. + """ + if edge.label == PRIM_OP.inc or edge.label == PRIM_OP.out: + if inc_cost: # op # vertex type # actual edge type + return (edge.label, edge.orig_src.type, edge.orig_tgt.type, edge.cost) + else: + return (edge.label, edge.orig_src.type, edge.orig_tgt.type) + elif edge.label == PRIM_OP.lkp: + if inc_cost: # op # vertex/edge type # is vertex or edge + return (edge.label, edge.orig_tgt.type, edge.orig_tgt.is_vertex, edge.cost) + else: + return (edge.label, edge.orig_tgt.type, edge.orig_tgt.is_vertex) + else: # src, tgt operation + if inc_cost: # op # actual edge type + return (edge.label, edge.orig_src.type, edge.cost) + else: + return (edge.label, edge.orig_src.type) + + def flattenReverseGraph(vertex, inc_edge, reverseGraph): + """ + Flattens the reverseGraph, so that the vertexGraph node can get + processed to create a forwardGraph. + """ + if not isinstance(vertex, VertexGraph): + reverseGraph[vertex] = inc_edge + else: + reverseGraph[inc_edge.orig_tgt] = inc_edge + for vg, eg in inc_edge.tgt.internalMDST.items(): + flattenReverseGraph(vg, eg, reverseGraph) + if isinstance(inc_edge.src, VertexGraph): + for vg, eg in inc_edge.src.internalMDST.items(): + flattenReverseGraph(vg, eg, reverseGraph) + + def createForwardGraph(vertex, inc_edge, forwardGraph): + """ + Create a forwardGraph, keeping in mind that their can be vertexGraph + in the reverseGraph. + """ + if not isinstance(vertex, VertexGraph): + forwardGraph.setdefault(inc_edge.orig_src, []).append(inc_edge) + else: + forwardGraph.setdefault(inc_edge.orig_src, []).append(inc_edge) + for vg, eg in vertex.internalMDST.items(): + createForwardGraph(vg, eg, forwardGraph) + + MDST = [] + # pi_v contains {vertex, incoming_edge} + # we want to start from root and follow the outgoing edges + # so we have to build the forwardGraph graph for pi_v + # (Except for the root (has 0), each vertex has exactly one incoming edge, + # but might have multiple outgoing edges) + forwardGraph = {} # {vertex, [outgoing edge 1, ... ] } + reverseGraph = {} + + # flatten reverseGraph (for the vertexGraph elements) + for v, e in pi_v.items(): + flattenReverseGraph(v, e, reverseGraph) + + # create the forwardGraph + for vertex, edge in reverseGraph.items(): + createForwardGraph(vertex, edge, forwardGraph) + + # create the MDST in a best first manner (lowest value first) + current = SortedContainer() # allows easy walking true tree + for edge in forwardGraph[self.root]: + current.add(edge.orig_cost, edge) # use orig cost, not modified + while current.empty() != True: + p_op = current.pop() # p_op contains an outgoing edge + MDST.append(createPRIM_OP(p_op)) + for edge in forwardGraph.get(p_op.orig_tgt, []): + current.add(edge.orig_cost, edge) + return MDST \ No newline at end of file diff --git a/pattern_matching/run.sh b/pattern_matching/run.sh new file mode 100755 index 0000000..08e7247 --- /dev/null +++ b/pattern_matching/run.sh @@ -0,0 +1,8 @@ +#!/bin/sh + +python main.py +dot randomGraph.dot -Tsvg > randomGraph.svg +dot randomPattern.dot -Tsvg > randomPattern.svg + +firefox randomGraph.svg +firefox randomPattern.svg \ No newline at end of file diff --git a/pattern_matching/searchGraph.py b/pattern_matching/searchGraph.py new file mode 100644 index 0000000..cb3c359 --- /dev/null +++ b/pattern_matching/searchGraph.py @@ -0,0 +1,115 @@ +# coding: utf-8 + +""" +Author: Sten Vercamman + Univeristy of Antwerp + +Example code for paper: Efficient model transformations for novices +url: http://msdl.cs.mcgill.ca/people/hv/teaching/MSBDesign/projects/Sten.Vercammen + +The main goal of this code is to give an overview, and an understandable +implementation, of known techniques for pattern matching and solving the +sub-graph homomorphism problem. The presented techniques do not include +performance adaptations/optimizations. It is not optimized to be efficient +but rather for the ease of understanding the workings of the algorithms. +The paper does list some possible extensions/optimizations. + +It is intended as a guideline, even for novices, and provides an in-depth look +at the workings behind various techniques for efficient pattern matching. +""" + +from graph import * + +import math + +class SearchGraph(Graph): + """ + A SearchGraph is an extended Graph, it keeps traks of statistics + for creating the cost model when generating a search plan. + It stire the amount of edges for each edge.type per vertex.type. + """ + def __init__(self, orig=None, deepCopy=False): + Graph.__init__(self) + # member variables: + self.nr_of_inc_edges = {} # {vertex_type, {edge_type, nr of incoming edges of edge_type for vertex_type } } + self.nr_of_out_edges = {} # {vertex_type, {edge_type, nr of outgoing edges of edge_type for vertex_type } } + + if orig != None: + if not (isinstance(orig, Graph) or isinstance(orig, SearchGraph)): + raise TypeError('Can only create SearchGraph from Graph and SearchGraph types') + if not deepCopy: + # copy all memeber elements: + self.vertices = orig.vertices # this is a reference + self.edges = orig.edges # this is a reference + # udpate the edge counters for each edge + for _, edges in self.edges.items(): + for edge in edges: + self.addToEdgeCounters(edge) + else: # TODO: deepcopy (not really needed) + pass + + def addCreateEdge(self, src, tgt, str_type): + """ + Creates edge of str_type from src to tgt, and returns it, + so that properties can be added to the edge. + This also add the Edge to the Edge counters + """ + # call parent fucntion, this function is an extention + edge = Graph.addCreateEdge(self, src, tgt, str_type) + self.updateEdgeCounters(edge) + return edge + + def addToEdgeCounters(self, edge): + """ + Add the Edge to the Edge counters. + """ + # get {edge.type, counter} for tgt vertex of edge (or create it) + edge_counters = self.nr_of_inc_edges.setdefault(edge.tgt.type, {}) + # increase counter of edge.type by 1 + edge_counters[edge.type] = edge_counters.get(edge.type, 0) + 1 + # get {edge.type, counter} for src vertex of edge (or create it) + edge_counters = self.nr_of_out_edges.setdefault(edge.src.type, {}) + # increase counter of edge.type by 1 + edge_counters[edge.type] = edge_counters.get(edge.type, 0) + 1 + + def getCostLkp(self, type, is_vertex): + """ + Returns the cost of a lkp primitive operation (of a vertex or edge). + Returns None if vertex type or edge type not present in Host Graph + """ + if is_vertex: + cost = len(self.getVerticesOfType(type)) + else: + cost = len(self.getEdgesOfType(type)) + if cost == 0: + return None + # we use a logaritmic cost + return math.log(cost) + + def getCostInc(self, vertex_type, edge_type): + """ + Returns the cost of an in primitive operation. + Returns None if vertex_type or edge_type not present in Host Graph + """ + cost = float(self.nr_of_inc_edges.get(vertex_type, {}).get(edge_type)) + if cost != None: + nr_of_vertices_with_type = len(self.getVerticesOfType(vertex_type)) + if nr_of_vertices_with_type != 0: + cost /= len(self.getVerticesOfType(vertex_type)) + # we use a logaritmic cost + cost = math.log(cost) + return cost + + def getCostOut(self, vertex_type, edge_type): + """ + Returns the cost of an out primitive operation. + Returns None if vertex_type or edge_type not present in Host Graph + """ + cost = float(self.nr_of_out_edges.get(vertex_type, {}).get(edge_type)) + if cost != None: + nr_of_vertices_with_type = len(self.getVerticesOfType(vertex_type)) + if nr_of_vertices_with_type != 0: + cost /= len(self.getVerticesOfType(vertex_type)) + # we use a logaritmic cost + cost = math.log(cost) + return cost \ No newline at end of file