From 6314506ac0441d9c44c07f2d0448c6a79a188188 Mon Sep 17 00:00:00 2001
From: Joeri Exelmans <joeri.exelmans@gmail.com>
Date: Wed, 11 Dec 2024 20:32:55 +0100
Subject: [PATCH] parser prints line numbers AND gives an error if you use
 anonymous objects/links in LHS of a rule

---
 concrete_syntax/textual_od/parser.py | 133 ++++++++++++++++++---------
 util/loader.py                       |  15 ++-
 2 files changed, 102 insertions(+), 46 deletions(-)

diff --git a/concrete_syntax/textual_od/parser.py b/concrete_syntax/textual_od/parser.py
index 054f1bf..b679210 100644
--- a/concrete_syntax/textual_od/parser.py
+++ b/concrete_syntax/textual_od/parser.py
@@ -1,10 +1,10 @@
 # Parser for Object Diagrams textual concrete syntax
 
-from lark import Lark, logger
+from lark import Lark, logger, Transformer
 from lark.indenter import Indenter
 from api.od import ODAPI
 from services.scd import SCD
-from concrete_syntax.common import _Code, TBase
+from concrete_syntax.common import _Code
 from uuid import UUID
 
 grammar = r"""
@@ -41,11 +41,25 @@ rev_link_spec: "(" IDENTIFIER "<-" IDENTIFIER ")"
 slot: IDENTIFIER "=" literal ";"
 """
 
-parser = Lark(grammar, parser='lalr')
+parser = Lark(grammar, parser='lalr', propagate_positions=True)
+
+class DefaultNameGenerator:
+    def __init__(self):
+        self.counter = 0
+
+    def __call__(self, type_name):
+        name = f"__{type_name}_{self.counter}"
+        self.counter += 1
+        return name
 
 # given a concrete syntax text string, and a meta-model, parses the CS
 # Parameter 'type_transform' is useful for adding prefixes to the type names, when parsing a model and pretending it is an instance of a prefixed meta-model.
-def parse_od(state, m_text, mm, type_transform=lambda type_name: type_name):
+def parse_od(state,
+    m_text, # text to parse
+    mm, # meta-model of model that will be parsed. The meta-model must already have been parsed.
+    type_transform=lambda type_name: type_name,
+    name_generator=DefaultNameGenerator(), # exception to raise if anonymous (nameless) object/link occurs in the model. Main reason for this is to forbid them in LHS of transformation rules.
+):
     tree = parser.parse(m_text)
 
     m = state.create_node()
@@ -56,62 +70,95 @@ def parse_od(state, m_text, mm, type_transform=lambda type_name: type_name):
             for type_name in ["Integer", "String", "Boolean", "ActionCode"]
     }
 
-    class T(TBase):
+    class T(Transformer):
         def __init__(self, visit_tokens):
             super().__init__(visit_tokens)
-            self.obj_counter = 0 # used for generating unique names for anonymous objects
+
+        def IDENTIFIER(self, token):
+            return (str(token), token.line)
+        
+        def INT(self, token):
+            return (int(token), token.line)
+
+        def BOOL(self, token):
+            return (token == "True", token.line)
+
+        def STR(self, token):
+            return (str(token[1:-1]), token.line) # strip the "" or ''
+
+        def CODE(self, token):
+            return (_Code(str(token[1:-1])), token.line) # strip the ``
+
+        def INDENTED_CODE(self, token):
+            skip = 4 # strip the ``` and the following newline character
+            space_count = 0
+            while token[skip+space_count] == " ":
+                space_count += 1
+            lines = token.split('\n')[1:-1]
+            for line in lines:
+                if len(line) >= space_count and line[0:space_count] != ' '*space_count:
+                    raise Exception("wrong indentation of INDENTED_CODE")
+            unindented_lines = [l[space_count:] for l in lines]
+            return (_Code('\n'.join(unindented_lines)), token.line)
+
+        def literal(self, el):
+            return el[0]
 
         def link_spec(self, el):
-            [src, tgt] = el
-            return (src, tgt)
+            [(src, src_line), (tgt, _)] = el
+            return (src, tgt, src_line)
 
         def rev_link_spec(self, el):
-            [tgt, src] = el # <-- reversed :)
-            return (src, tgt)
+            [(tgt, tgt_line), (src, _)] = el # <-- reversed :)
+            return (src, tgt, tgt_line)
 
         def type_name(self, el):
-            type_name = el[0]
+            type_name, line = el[0]
             if type_name in primitive_types:
-                return type_name
+                return (type_name, line)
             else:
-                return type_transform(el[0])
+                return (type_transform(type_name), line)
         
         def slot(self, el):
-            [attr_name, value] = el
-            return (attr_name, value)
+            [(attr_name, line), (value, _)] = el
+            return (attr_name, value, line)
         
         def object(self, el):
-            [obj_name, type_name, link] = el[0:3]
+            [obj, (type_name, line), link] = el[0:3]
             slots = el[3:]
-            if state.read_dict(m, obj_name) != None:
-                msg = f"Element '{obj_name}:{type_name}': name '{obj_name}' already in use."
-                # raise Exception(msg + " Names must be unique")
-                print(msg + " Ignoring.")
-                return
-            if obj_name == None:
-                # object/link names are optional
-                #  generate a unique name if no name given
-                obj_name = f"__{type_name}_{self.obj_counter}"
-                self.obj_counter += 1
-            if link == None:
-                obj_node = od.create_object(obj_name, type_name)
-            else:
-                src, tgt = link
-                if tgt in primitive_types:
-                    if state.read_dict(m, tgt) == None:
-                        scd = SCD(m, state)
-                        scd.create_model_ref(tgt, primitive_types[tgt])
-                src_obj = od.get(src)
-                tgt_obj = od.get(tgt)
-                obj_node = od.create_link(obj_name, type_name, src_obj, tgt_obj)
-            # Create slots
-            for attr_name, value in slots:
-                if isinstance(value, _Code):
-                    od.set_slot_value(obj_node, attr_name, value.code, is_code=True)
+            try:
+                if obj != None:
+                    (obj_name, _) = obj
                 else:
-                    od.set_slot_value(obj_node, attr_name, value)
+                    # anonymous object - auto-generate a name
+                    obj_name = name_generator(type_name)
+                if state.read_dict(m, obj_name) != None:
+                    msg = f"Element '{obj_name}:{type_name}': name '{obj_name}' already in use."
+                    raise Exception(msg + " Names must be unique")
+                    # print(msg + " Ignoring.")
+                    return
+                if link == None:
+                    obj_node = od.create_object(obj_name, type_name)
+                else:
+                    (src, tgt, _) = link
+                    if tgt in primitive_types:
+                        if state.read_dict(m, tgt) == None:
+                            scd = SCD(m, state)
+                            scd.create_model_ref(tgt, primitive_types[tgt])
+                    src_obj = od.get(src)
+                    tgt_obj = od.get(tgt)
+                    obj_node = od.create_link(obj_name, type_name, src_obj, tgt_obj)
+                # Create slots
+                for attr_name, value, line in slots:
+                    if isinstance(value, _Code):
+                        od.set_slot_value(obj_node, attr_name, value.code, is_code=True)
+                    else:
+                        od.set_slot_value(obj_node, attr_name, value)
 
-            return obj_name
+                return obj_name
+            except Exception as e:
+                # raising a *new* exception (instead of adding a note to the existing exception) because Lark will also raise a new exception, and ignore our note:
+                raise Exception(f"at line {line}:\n  " + m_text.split('\n')[line-1] + "\n"+ str(e)) from e
 
     t = T(visit_tokens=True).transform(tree)
 
diff --git a/util/loader.py b/util/loader.py
index e9655c9..4a29d63 100644
--- a/util/loader.py
+++ b/util/loader.py
@@ -5,13 +5,14 @@ from concrete_syntax.common import indent
 from transformation.rule import Rule
 
 # parse model and check conformance
-def parse_and_check(state, m_cs, mm, descr: str, check_conformance=True, type_transform=lambda type_name: type_name):
+def parse_and_check(state, m_cs, mm, descr: str, check_conformance=True, type_transform=lambda type_name: type_name, name_generator=parser.DefaultNameGenerator()):
     try:
         m = parser.parse_od(
             state,
             m_text=m_cs,
             mm=mm,
             type_transform=type_transform,
+            name_generator=name_generator,
         )
     except Exception as e:
         e.add_note("While parsing model " + descr)
@@ -35,6 +36,11 @@ def read_file(filename):
 
 KINDS = ["nac", "lhs", "rhs"]
 
+# Phony name generator that raises an error if you try to use it :)
+class LHSNameGenerator:
+    def __call__(self, type_name):
+        raise Exception(f"Error: Object or link of type '{type_name}' does not have a name.\nAnonymous objects/links are not allowed in the LHS of a rule, because they can have unintended consequences. Please give all of the elements in the LHS explicit names.")
+
 # load model transformation rules
 def load_rules(state, get_filename, rt_mm_ramified, rule_names, check_conformance=True):
     rules = {}
@@ -62,9 +68,12 @@ def load_rules(state, get_filename, rt_mm_ramified, rule_names, check_conformanc
                     if suffix == "":
                         print(f"Warning: rule {rule_name} has no NAC ({filename} not found)")
                 return nacs
-            elif kind == "lhs" or kind == "rhs":
+            else:
                 try:
-                    m = parse_and_check(state, read_file(filename), rt_mm_ramified, descr, check_conformance)
+                    if kind == "lhs":
+                        m = parse_and_check(state, read_file(filename), rt_mm_ramified, descr, check_conformance, name_generator=LHSNameGenerator())
+                    elif kind == "rhs":
+                        m = parse_and_check(state, read_file(filename), rt_mm_ramified, descr, check_conformance)
                     files_read.append(filename)
                     return m
                 except FileNotFoundError as e: