parser prints line numbers AND gives an error if you use anonymous objects/links in LHS of a rule

2024-12-11 20:32:55 +01:00 · 2024-12-11 20:32:55 +01:00 · 6314506ac0
commit 6314506ac0
parent c7288635f8
2 changed files with 102 additions and 46 deletions
--- a/concrete_syntax/textual_od/parser.py
+++ b/concrete_syntax/textual_od/parser.py
@ -1,10 +1,10 @@
 # Parser for Object Diagrams textual concrete syntax

-from lark import Lark, logger
+from lark import Lark, logger, Transformer
 from lark.indenter import Indenter
 from api.od import ODAPI
 from services.scd import SCD
-from concrete_syntax.common import _Code, TBase
+from concrete_syntax.common import _Code
 from uuid import UUID

 grammar = r"""
@ -41,11 +41,25 @@ rev_link_spec: "(" IDENTIFIER "<-" IDENTIFIER ")"
 slot: IDENTIFIER "=" literal ";"
 """

-parser = Lark(grammar, parser='lalr')
+parser = Lark(grammar, parser='lalr', propagate_positions=True)
+
+class DefaultNameGenerator:
+    def __init__(self):
+        self.counter = 0
+
+    def __call__(self, type_name):
+        name = f"__{type_name}_{self.counter}"
+        self.counter += 1
+        return name

 # given a concrete syntax text string, and a meta-model, parses the CS
 # Parameter 'type_transform' is useful for adding prefixes to the type names, when parsing a model and pretending it is an instance of a prefixed meta-model.
-def parse_od(state, m_text, mm, type_transform=lambda type_name: type_name):
+def parse_od(state,
+    m_text, # text to parse
+    mm, # meta-model of model that will be parsed. The meta-model must already have been parsed.
+    type_transform=lambda type_name: type_name,
+    name_generator=DefaultNameGenerator(), # exception to raise if anonymous (nameless) object/link occurs in the model. Main reason for this is to forbid them in LHS of transformation rules.
+):
    tree = parser.parse(m_text)

    m = state.create_node()
@ -56,47 +70,77 @@ def parse_od(state, m_text, mm, type_transform=lambda type_name: type_name):
            for type_name in ["Integer", "String", "Boolean", "ActionCode"]
    }

-    class T(TBase):
+    class T(Transformer):
        def __init__(self, visit_tokens):
            super().__init__(visit_tokens)
-            self.obj_counter = 0 # used for generating unique names for anonymous objects
+
+        def IDENTIFIER(self, token):
+            return (str(token), token.line)
+        
+        def INT(self, token):
+            return (int(token), token.line)
+
+        def BOOL(self, token):
+            return (token == "True", token.line)
+
+        def STR(self, token):
+            return (str(token[1:-1]), token.line) # strip the "" or ''
+
+        def CODE(self, token):
+            return (_Code(str(token[1:-1])), token.line) # strip the ``
+
+        def INDENTED_CODE(self, token):
+            skip = 4 # strip the ``` and the following newline character
+            space_count = 0
+            while token[skip+space_count] == " ":
+                space_count += 1
+            lines = token.split('\n')[1:-1]
+            for line in lines:
+                if len(line) >= space_count and line[0:space_count] != ' '*space_count:
+                    raise Exception("wrong indentation of INDENTED_CODE")
+            unindented_lines = [l[space_count:] for l in lines]
+            return (_Code('\n'.join(unindented_lines)), token.line)
+
+        def literal(self, el):
+            return el[0]

        def link_spec(self, el):
-            [src, tgt] = el
-            return (src, tgt)
+            [(src, src_line), (tgt, _)] = el
+            return (src, tgt, src_line)

        def rev_link_spec(self, el):
-            [tgt, src] = el # <-- reversed :)
-            return (src, tgt)
+            [(tgt, tgt_line), (src, _)] = el # <-- reversed :)
+            return (src, tgt, tgt_line)

        def type_name(self, el):
-            type_name = el[0]
+            type_name, line = el[0]
            if type_name in primitive_types:
-                return type_name
+                return (type_name, line)
            else:
-                return type_transform(el[0])
+                return (type_transform(type_name), line)
        
        def slot(self, el):
-            [attr_name, value] = el
-            return (attr_name, value)
+            [(attr_name, line), (value, _)] = el
+            return (attr_name, value, line)
        
        def object(self, el):
-            [obj_name, type_name, link] = el[0:3]
+            [obj, (type_name, line), link] = el[0:3]
            slots = el[3:]
+            try:
+                if obj != None:
+                    (obj_name, _) = obj
+                else:
+                    # anonymous object - auto-generate a name
+                    obj_name = name_generator(type_name)
                if state.read_dict(m, obj_name) != None:
                    msg = f"Element '{obj_name}:{type_name}': name '{obj_name}' already in use."
-                # raise Exception(msg + " Names must be unique")
-                print(msg + " Ignoring.")
+                    raise Exception(msg + " Names must be unique")
+                    # print(msg + " Ignoring.")
                    return
-            if obj_name == None:
-                # object/link names are optional
-                #  generate a unique name if no name given
-                obj_name = f"__{type_name}_{self.obj_counter}"
-                self.obj_counter += 1
                if link == None:
                    obj_node = od.create_object(obj_name, type_name)
                else:
-                src, tgt = link
+                    (src, tgt, _) = link
                    if tgt in primitive_types:
                        if state.read_dict(m, tgt) == None:
                            scd = SCD(m, state)
@ -105,13 +149,16 @@ def parse_od(state, m_text, mm, type_transform=lambda type_name: type_name):
                    tgt_obj = od.get(tgt)
                    obj_node = od.create_link(obj_name, type_name, src_obj, tgt_obj)
                # Create slots
-            for attr_name, value in slots:
+                for attr_name, value, line in slots:
                    if isinstance(value, _Code):
                        od.set_slot_value(obj_node, attr_name, value.code, is_code=True)
                    else:
                        od.set_slot_value(obj_node, attr_name, value)

                return obj_name
+            except Exception as e:
+                # raising a *new* exception (instead of adding a note to the existing exception) because Lark will also raise a new exception, and ignore our note:
+                raise Exception(f"at line {line}:\n  " + m_text.split('\n')[line-1] + "\n"+ str(e)) from e

    t = T(visit_tokens=True).transform(tree)

--- a/util/loader.py
+++ b/util/loader.py
@ -5,13 +5,14 @@ from concrete_syntax.common import indent
 from transformation.rule import Rule

 # parse model and check conformance
-def parse_and_check(state, m_cs, mm, descr: str, check_conformance=True, type_transform=lambda type_name: type_name):
+def parse_and_check(state, m_cs, mm, descr: str, check_conformance=True, type_transform=lambda type_name: type_name, name_generator=parser.DefaultNameGenerator()):
    try:
        m = parser.parse_od(
            state,
            m_text=m_cs,
            mm=mm,
            type_transform=type_transform,
+            name_generator=name_generator,
        )
    except Exception as e:
        e.add_note("While parsing model " + descr)
@ -35,6 +36,11 @@ def read_file(filename):

 KINDS = ["nac", "lhs", "rhs"]

+# Phony name generator that raises an error if you try to use it :)
+class LHSNameGenerator:
+    def __call__(self, type_name):
+        raise Exception(f"Error: Object or link of type '{type_name}' does not have a name.\nAnonymous objects/links are not allowed in the LHS of a rule, because they can have unintended consequences. Please give all of the elements in the LHS explicit names.")
+
 # load model transformation rules
 def load_rules(state, get_filename, rt_mm_ramified, rule_names, check_conformance=True):
    rules = {}
@ -62,8 +68,11 @@ def load_rules(state, get_filename, rt_mm_ramified, rule_names, check_conformanc
                    if suffix == "":
                        print(f"Warning: rule {rule_name} has no NAC ({filename} not found)")
                return nacs
-            elif kind == "lhs" or kind == "rhs":
+            else:
                try:
+                    if kind == "lhs":
+                        m = parse_and_check(state, read_file(filename), rt_mm_ramified, descr, check_conformance, name_generator=LHSNameGenerator())
+                    elif kind == "rhs":
                        m = parse_and_check(state, read_file(filename), rt_mm_ramified, descr, check_conformance)
                    files_read.append(filename)
                    return m