dope2/parser/parser.js

174 lines
4.3 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { Bool, Char, Double, Int, Unit } from "../primitives/types.js";
import { dictType, fnType, lsType, prodType, setType, sumType } from "../structures/types.js";
const bracketOperators = new Map([
['(', [')', null]],
['[', [']', lsType]],
['{', ['}', setType]],
// can only occur at beginning
// we use these to extract the type variables
['∀', [':', null]],
]);
const infixOperators = new Map([
['+', sumType],
['|', sumType],
['', prodType],
['*', prodType],
['→', fnType],
['->', fnType],
['⇒', dictType],
['=>', dictType],
// only used for type variables (e.g., ∀a,b,c:)
[',', fnX => fnY => {
const x = fnX();
const y = fnY();
return Array.isArray(x) ? x.concat(y) : [x].concat(y)
}],
]);
const a = Symbol('a');
const b = Symbol('b');
const c = Symbol('c');
const d = Symbol('d');
const e = Symbol('e');
const primitives = new Map([
['Int', Int],
['Double', Double],
['Bool', Bool],
['Char', Char],
['Unit', Unit],
['a', a],
['b', b],
['c', c],
['d', d],
['e', e],
]);
const TOKENS = [
...bracketOperators.keys(),
...[...bracketOperators.values()].map(v => v[0]),
...infixOperators.keys(),
...primitives.keys(),
];
// console.log('TOKENS =', TOKENS);
const tokenize = expr => {
const tokens = [];
let i=0;
outerloop: while (i<expr.length) {
if (/\s/.test(expr[i])) {
i++;
continue outerloop; // skip whitespace
}
if (expr[i] === '#') {
const label = '#' + parseInt(expr.slice(i+1));
tokens.push(label);
i += label.length;
continue outerloop;
}
for (const token of TOKENS) {
if (expr.startsWith(token, i)) {
tokens.push(token);
i += token.length;
continue outerloop;
}
}
throw new Error(`Couldn't match any token at position ${i} in\n ${expr}\n ${' '.repeat(i)}^`);
}
// console.log({tokens});
return tokens;
}
const consumeGroup = (tokens) => {
const bracket = bracketOperators.get(tokens[0]);
if (bracket === undefined) {
// no group, just a single token:
const [firstToken, ...rest] = tokens;
return [[firstToken], null, rest];
}
else {
// find where group ends:
const [closing, fn] = bracket;
const opening = tokens[0]
let depth = 1;
let i = 1;
for (; i<tokens.length; i++) {
if (tokens[i] === opening) {
depth++;
}
else if (tokens[i] === closing) {
depth--;
}
if (depth === 0) {
break;
}
}
const tokensInGroup = tokens.slice(1, i); // don't include brackets
const rest = tokens.slice(i+1);
return [tokensInGroup, fn, rest];
}
}
const parseGroup = (tokensInGroup, fn, labels, label) => {
// console.log('parseGroup ', tokensInGroup, fn);
return (fn === null)
? __parse(tokensInGroup, labels, label)
: fn(self => {
return __parse(tokensInGroup, extendLabels(labels, label, self));
});
}
const extendLabels = (labels, label, self) => {
return (label === null) ? labels : new Map([...labels, [label, self]])
};
const __parse = (tokens, labels = new Map(), label = null) => {
// console.log('parse ', tokens);
if (tokens[0].startsWith('#')) {
if (labels.has(tokens[0])) {
return labels.get(tokens[0]);
}
else {
// pass label and parse 'rest'
return __parse(tokens.slice(1), labels, tokens[0]);
}
}
if (tokens.length === 1) {
return primitives.get(tokens[0]);
}
else {
const [lhsTokens, fnGrp, rest] = consumeGroup(tokens);
if (rest.length === 0) {
return parseGroup(lhsTokens, fnGrp, labels, label);
}
const [operator, ...rhsTokens] = rest;
for (const [operatorChar, fn] of infixOperators) {
if (operator === operatorChar) {
return fn
(self => {
return parseGroup(lhsTokens, fnGrp, extendLabels(labels, label, self));
})(self => {
return __parse(rhsTokens, extendLabels(labels, label, self));
});
}
}
throw new Error("unknown operator: "+operator)
}
};
export const parse = expr => {
const tokens = tokenize(expr);
if (tokens[0] === '∀') {
// generic type
const [typeVarTokens, _, rest] = consumeGroup(tokens);
const typeVars = [].concat(__parse(typeVarTokens))
const type = __parse(rest);
return { typeVars, type };
}
return __parse(tokens);
}