parser for types + start moving all types to separate modules
This commit is contained in:
parent
8eec5b9239
commit
1d826ea8d4
11 changed files with 277 additions and 88 deletions
174
parser/parser.js
Normal file
174
parser/parser.js
Normal file
|
|
@ -0,0 +1,174 @@
|
|||
import { Bool, Char, Double, Int, Unit } from "../primitives/types.js";
|
||||
import { dictType, fnType, lsType, prodType, setType, sumType } from "../structures/types.js";
|
||||
|
||||
const bracketOperators = new Map([
|
||||
['(', [')', null]],
|
||||
['[', [']', lsType]],
|
||||
['{', ['}', setType]],
|
||||
|
||||
// can only occur at beginning
|
||||
// we use these to extract the type variables
|
||||
['∀', [':', null]],
|
||||
]);
|
||||
|
||||
const infixOperators = new Map([
|
||||
['+', sumType],
|
||||
['|', sumType],
|
||||
['⨯', prodType],
|
||||
['*', prodType],
|
||||
['→', fnType],
|
||||
['->', fnType],
|
||||
['⇒', dictType],
|
||||
['=>', dictType],
|
||||
|
||||
// only used for type variables (e.g., ∀a,b,c:)
|
||||
[',', fnX => fnY => {
|
||||
const x = fnX();
|
||||
const y = fnY();
|
||||
return Array.isArray(x) ? x.concat(y) : [x].concat(y)
|
||||
}],
|
||||
]);
|
||||
|
||||
const a = Symbol('a');
|
||||
const b = Symbol('b');
|
||||
const c = Symbol('c');
|
||||
const d = Symbol('d');
|
||||
const e = Symbol('e');
|
||||
|
||||
const primitives = new Map([
|
||||
['Int', Int],
|
||||
['Double', Double],
|
||||
['Bool', Bool],
|
||||
['Char', Char],
|
||||
['Unit', Unit],
|
||||
['a', a],
|
||||
['b', b],
|
||||
['c', c],
|
||||
['d', d],
|
||||
['e', e],
|
||||
]);
|
||||
|
||||
const TOKENS = [
|
||||
...bracketOperators.keys(),
|
||||
...[...bracketOperators.values()].map(v => v[0]),
|
||||
...infixOperators.keys(),
|
||||
...primitives.keys(),
|
||||
];
|
||||
|
||||
// console.log('TOKENS =', TOKENS);
|
||||
|
||||
const tokenize = expr => {
|
||||
const tokens = [];
|
||||
let i=0;
|
||||
outerloop: while (i<expr.length) {
|
||||
if (/\s/.test(expr[i])) {
|
||||
i++;
|
||||
continue outerloop; // skip whitespace
|
||||
}
|
||||
if (expr[i] === '#') {
|
||||
const label = '#' + parseInt(expr.slice(i+1));
|
||||
tokens.push(label);
|
||||
i += label.length;
|
||||
continue outerloop;
|
||||
}
|
||||
for (const token of TOKENS) {
|
||||
if (expr.startsWith(token, i)) {
|
||||
tokens.push(token);
|
||||
i += token.length;
|
||||
continue outerloop;
|
||||
}
|
||||
}
|
||||
throw new Error(`Couldn't match any token at position ${i} in\n ${expr}\n ${' '.repeat(i)}^`);
|
||||
}
|
||||
// console.log({tokens});
|
||||
return tokens;
|
||||
}
|
||||
|
||||
const consumeGroup = (tokens) => {
|
||||
const bracket = bracketOperators.get(tokens[0]);
|
||||
if (bracket === undefined) {
|
||||
// no group, just a single token:
|
||||
const [firstToken, ...rest] = tokens;
|
||||
return [[firstToken], null, rest];
|
||||
}
|
||||
else {
|
||||
// find where group ends:
|
||||
const [closing, fn] = bracket;
|
||||
const opening = tokens[0]
|
||||
let depth = 1;
|
||||
let i = 1;
|
||||
for (; i<tokens.length; i++) {
|
||||
if (tokens[i] === opening) {
|
||||
depth++;
|
||||
}
|
||||
else if (tokens[i] === closing) {
|
||||
depth--;
|
||||
}
|
||||
if (depth === 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
const tokensInGroup = tokens.slice(1, i); // don't include brackets
|
||||
const rest = tokens.slice(i+1);
|
||||
return [tokensInGroup, fn, rest];
|
||||
}
|
||||
}
|
||||
|
||||
const parseGroup = (tokensInGroup, fn, labels, label) => {
|
||||
// console.log('parseGroup ', tokensInGroup, fn);
|
||||
return (fn === null)
|
||||
? __parse(tokensInGroup, labels, label)
|
||||
: fn(self => {
|
||||
return __parse(tokensInGroup, extendLabels(labels, label, self));
|
||||
});
|
||||
}
|
||||
|
||||
const extendLabels = (labels, label, self) => {
|
||||
return (label === null) ? labels : new Map([...labels, [label, self]])
|
||||
};
|
||||
|
||||
const __parse = (tokens, labels = new Map(), label = null) => {
|
||||
// console.log('parse ', tokens);
|
||||
if (tokens[0].startsWith('#')) {
|
||||
if (labels.has(tokens[0])) {
|
||||
return labels.get(tokens[0]);
|
||||
}
|
||||
else {
|
||||
// pass label and parse 'rest'
|
||||
return __parse(tokens.slice(1), labels, tokens[0]);
|
||||
}
|
||||
}
|
||||
if (tokens.length === 1) {
|
||||
return primitives.get(tokens[0]);
|
||||
}
|
||||
else {
|
||||
const [lhsTokens, fnGrp, rest] = consumeGroup(tokens);
|
||||
if (rest.length === 0) {
|
||||
return parseGroup(lhsTokens, fnGrp, labels, label);
|
||||
}
|
||||
const [operator, ...rhsTokens] = rest;
|
||||
for (const [operatorChar, fn] of infixOperators) {
|
||||
if (operator === operatorChar) {
|
||||
return fn
|
||||
(self => {
|
||||
return parseGroup(lhsTokens, fnGrp, extendLabels(labels, label, self));
|
||||
})(self => {
|
||||
return __parse(rhsTokens, extendLabels(labels, label, self));
|
||||
});
|
||||
}
|
||||
}
|
||||
throw new Error("unknown operator: "+operator)
|
||||
}
|
||||
};
|
||||
|
||||
export const parse = expr => {
|
||||
const tokens = tokenize(expr);
|
||||
if (tokens[0] === '∀') {
|
||||
// generic type
|
||||
const [typeVarTokens, _, rest] = consumeGroup(tokens);
|
||||
const typeVars = [].concat(__parse(typeVarTokens))
|
||||
const type = __parse(rest);
|
||||
return { typeVars, type };
|
||||
}
|
||||
return __parse(tokens);
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue