From 8e6a90ef01ca747b9012daa5b61058264d33c767 Mon Sep 17 00:00:00 2001 From: Joeri Exelmans Date: Wed, 1 Mar 2023 17:58:05 +0100 Subject: [PATCH] Added abstract syntax and parser. --- .gitignore | 1 + README.md | 1 + src/xopp2py/__init__.py | 0 src/xopp2py/abstract_syntax.py | 43 +++++++++++++++++++ src/xopp2py/main.py | 12 ++++++ src/xopp2py/parser.py | 77 ++++++++++++++++++++++++++++++++++ 6 files changed, 134 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 src/xopp2py/__init__.py create mode 100644 src/xopp2py/abstract_syntax.py create mode 100644 src/xopp2py/main.py create mode 100644 src/xopp2py/parser.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ba0430d --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__/ \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..317d322 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +Python interface to .xopp (Xournal++) files. diff --git a/src/xopp2py/__init__.py b/src/xopp2py/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/xopp2py/abstract_syntax.py b/src/xopp2py/abstract_syntax.py new file mode 100644 index 0000000..e276ed6 --- /dev/null +++ b/src/xopp2py/abstract_syntax.py @@ -0,0 +1,43 @@ +# Abstract syntax of the concrete syntax of Xournal++ + +from dataclasses import dataclass +from decimal import Decimal + +@dataclass +class Header: + creator: str # e.g., "Xournal++ 1.1.2" + fileversion: int # e.g., 4 + +@dataclass +class Background: + type: str # could be enum + color: str # could be class + style: str # could be enum + +@dataclass +class Stroke: + values: str # Just the XML text in between the tags. Meaning: stroke positions. + attributes: str # just the XML attributes as encountered + +@dataclass +class Text: + text: str + attributes: dict[str,str] # just the XML attributes as encountered + +@dataclass +class Layer: + elements: list[Text | Stroke] + +@dataclass +class Page: + width: Decimal + height: Decimal + background: Background + layers: list[Layer] + +@dataclass +class XournalFile: + header: Header + title: str # obscure feature + preview: bytes # PNG-encoded preview of the (first page) of the file + pages: list[Page] \ No newline at end of file diff --git a/src/xopp2py/main.py b/src/xopp2py/main.py new file mode 100644 index 0000000..2fb8b3f --- /dev/null +++ b/src/xopp2py/main.py @@ -0,0 +1,12 @@ +# Command-line tool that demonstrates how to use this library. + +import argparse + +if __name__ == "__main__": + argparser = argparse.ArgumentParser( + description = "Python interface for Xournal++ (.xopp) files.") + argparser.add_argument('filename') + args = argparser.parse_args() # exits on error + + from .parser import parseFile + print(parseFile(args.filename)) diff --git a/src/xopp2py/parser.py b/src/xopp2py/parser.py new file mode 100644 index 0000000..defc091 --- /dev/null +++ b/src/xopp2py/parser.py @@ -0,0 +1,77 @@ +import abstract_syntax + +def parseFile(path) -> abstract_syntax.XournalFile: + """Parse a .xopp file.""" + + def parseLayer(context): + elements = [] + for (event, element) in context: + if event == "start": + if element.tag == "text": + elements.append( + abstract_syntax.Text( + text=element.text, attributes=element.attrib)) + elif element.tag == "stroke": + elements.append( + abstract_syntax.Stroke( + values=element.text, attributes=element.attrib)) + else: + raise Error("Unsupported tag:" + element.tag) + elif event == "end": + if element.tag == "layer": + return abstract_syntax.Layer(elements=elements) + + def parsePage(element, context): + from decimal import Decimal + width = Decimal(element.get("width")) + height = Decimal(element.get("height")) + layers = [] + for (event, element) in context: + if event == "start": + if element.tag == "background": + background = abstract_syntax.Background( + type=element.get("type"), + color=element.get("color"), + style=element.get("plain")) + elif element.tag == "layer": + layers.append(parseLayer(context)) + else: + raise Error("Unsupported tag:" + element.tag) + elif event == "end": + if element.tag == "page": + return abstract_syntax.Page( + width=width, + height=height, + background=background, + layers=layers) + + def parseXournal(context): + pages = [] + for (event, element) in context: + if event == "start": + if element.tag == "xournal": + header = abstract_syntax.Header( + creator=element.get("creator"), + fileversion=int(element.get("fileversion")), + ) + elif element.tag == "title": + title = element.text + elif element.tag == "preview": + import base64 + preview = base64.b64decode(element.text) + elif element.tag == "page": + pages.append(parsePage(element, context)) + else: + raise Error("Unsupported tag:" + element.tag) + + return abstract_syntax.XournalFile( + header=header, + title=title, + preview=preview, + pages=pages) + + import gzip + with gzip.open(path, mode='rt') as f: + from xml.etree import ElementTree + context = ElementTree.iterparse(f, events=["start", "end"]) + return parseXournal(context) \ No newline at end of file