parser for types + start moving all types to separate modules

This commit is contained in:
Joeri Exelmans 2025-05-06 23:41:12 +02:00
parent 8eec5b9239
commit 1d826ea8d4
11 changed files with 277 additions and 88 deletions

174
parser/parser.js Normal file
View file

@ -0,0 +1,174 @@
import { Bool, Char, Double, Int, Unit } from "../primitives/types.js";
import { dictType, fnType, lsType, prodType, setType, sumType } from "../structures/types.js";
const bracketOperators = new Map([
['(', [')', null]],
['[', [']', lsType]],
['{', ['}', setType]],
// can only occur at beginning
// we use these to extract the type variables
['∀', [':', null]],
]);
const infixOperators = new Map([
['+', sumType],
['|', sumType],
['', prodType],
['*', prodType],
['→', fnType],
['->', fnType],
['⇒', dictType],
['=>', dictType],
// only used for type variables (e.g., ∀a,b,c:)
[',', fnX => fnY => {
const x = fnX();
const y = fnY();
return Array.isArray(x) ? x.concat(y) : [x].concat(y)
}],
]);
const a = Symbol('a');
const b = Symbol('b');
const c = Symbol('c');
const d = Symbol('d');
const e = Symbol('e');
const primitives = new Map([
['Int', Int],
['Double', Double],
['Bool', Bool],
['Char', Char],
['Unit', Unit],
['a', a],
['b', b],
['c', c],
['d', d],
['e', e],
]);
const TOKENS = [
...bracketOperators.keys(),
...[...bracketOperators.values()].map(v => v[0]),
...infixOperators.keys(),
...primitives.keys(),
];
// console.log('TOKENS =', TOKENS);
const tokenize = expr => {
const tokens = [];
let i=0;
outerloop: while (i<expr.length) {
if (/\s/.test(expr[i])) {
i++;
continue outerloop; // skip whitespace
}
if (expr[i] === '#') {
const label = '#' + parseInt(expr.slice(i+1));
tokens.push(label);
i += label.length;
continue outerloop;
}
for (const token of TOKENS) {
if (expr.startsWith(token, i)) {
tokens.push(token);
i += token.length;
continue outerloop;
}
}
throw new Error(`Couldn't match any token at position ${i} in\n ${expr}\n ${' '.repeat(i)}^`);
}
// console.log({tokens});
return tokens;
}
const consumeGroup = (tokens) => {
const bracket = bracketOperators.get(tokens[0]);
if (bracket === undefined) {
// no group, just a single token:
const [firstToken, ...rest] = tokens;
return [[firstToken], null, rest];
}
else {
// find where group ends:
const [closing, fn] = bracket;
const opening = tokens[0]
let depth = 1;
let i = 1;
for (; i<tokens.length; i++) {
if (tokens[i] === opening) {
depth++;
}
else if (tokens[i] === closing) {
depth--;
}
if (depth === 0) {
break;
}
}
const tokensInGroup = tokens.slice(1, i); // don't include brackets
const rest = tokens.slice(i+1);
return [tokensInGroup, fn, rest];
}
}
const parseGroup = (tokensInGroup, fn, labels, label) => {
// console.log('parseGroup ', tokensInGroup, fn);
return (fn === null)
? __parse(tokensInGroup, labels, label)
: fn(self => {
return __parse(tokensInGroup, extendLabels(labels, label, self));
});
}
const extendLabels = (labels, label, self) => {
return (label === null) ? labels : new Map([...labels, [label, self]])
};
const __parse = (tokens, labels = new Map(), label = null) => {
// console.log('parse ', tokens);
if (tokens[0].startsWith('#')) {
if (labels.has(tokens[0])) {
return labels.get(tokens[0]);
}
else {
// pass label and parse 'rest'
return __parse(tokens.slice(1), labels, tokens[0]);
}
}
if (tokens.length === 1) {
return primitives.get(tokens[0]);
}
else {
const [lhsTokens, fnGrp, rest] = consumeGroup(tokens);
if (rest.length === 0) {
return parseGroup(lhsTokens, fnGrp, labels, label);
}
const [operator, ...rhsTokens] = rest;
for (const [operatorChar, fn] of infixOperators) {
if (operator === operatorChar) {
return fn
(self => {
return parseGroup(lhsTokens, fnGrp, extendLabels(labels, label, self));
})(self => {
return __parse(rhsTokens, extendLabels(labels, label, self));
});
}
}
throw new Error("unknown operator: "+operator)
}
};
export const parse = expr => {
const tokens = tokenize(expr);
if (tokens[0] === '∀') {
// generic type
const [typeVarTokens, _, rest] = consumeGroup(tokens);
const typeVars = [].concat(__parse(typeVarTokens))
const type = __parse(rest);
return { typeVars, type };
}
return __parse(tokens);
}