hobbes
a language, embedded compiler, and runtime for efficient dynamic expression evaluation, data storage and analysis
lalr.H
Go to the documentation of this file.
1 /*
2  * lalr : an LALR(1) parser generator for abstract terminals
3  */
4 
5 #ifndef HOBBES_PARSE_LALR_HPP_INCLUDED
6 #define HOBBES_PARSE_LALR_HPP_INCLUDED
7 
9 #include <hobbes/parse/data.H>
10 #include <map>
11 #include <set>
12 #include <stack>
13 
14 namespace hobbes {
15 
16 // a grammar is a mapping from abstract terminals to the set of rules matching those terminals
17 typedef terminals rule;
18 typedef std::vector<rule> rules;
19 typedef std::map<terminal*, rules> grammar;
20 
21 // an item represents the state of recognizing a rule
22 struct item {
23  item(terminal* agg, nat p, nat r, const terminalset& la = terminalset());
24 
25  terminal* agg; // the 'non-terminal'
26  nat p; // the read position in the indexed rule
27  nat r; // the indexed rule
28 
29  terminalset la; // filled by the LALR(1) determination for items representing lookahead reduce actions
30 
31  bool operator<(const item& rhs) const;
32  bool operator==(const item& rhs) const;
33 };
34 
35 // an itemset is a partial description of the state of a parser
36 // (with no duplicate items)
37 typedef std::set<item> itemset;
38 typedef std::vector<itemset> itemsets;
39 
40 // transitions : a set of states to transition to indexed on terminals
41 typedef std::map<terminal*, nat> state_transitions;
42 
43 // each closed itemset represents a numbered parser state
44 typedef std::map<itemset, nat> parser_states;
45 
46 // the reverse parser state map
47 typedef std::map<nat, itemset> state_definitions;
48 
49 // a set of state transitions map values to target parser states (by index)
50 typedef std::map<terminal*, nat> state_transitions;
51 
52 // each state has its own set of state transitions
53 typedef std::map<nat, state_transitions> parser_state_transitions;
54 
55 // a parserdef is an ordered sequence of states, along with the transitions defined for each state
56 struct parserdef {
57  grammar g;
59  parser_states states;
60  state_definitions state_defs;
61  parser_state_transitions transitions;
63 };
64 
65 /*
66  * parser algorithms
67  */
68 
69 // remove rules that define symbols, refer to symbols, or both
70 void removeRuleDefs(terminal* s, grammar* g);
71 void removeRuleRefs(terminal* s, grammar* g);
72 void undefineSymbol(terminal* s, grammar* g);
73 
74 grammar removeRuleDefs(terminal* s, const grammar& g);
75 grammar removeRuleRefs(terminal* s, const grammar& g);
76 grammar undefineSymbol(terminal* s, const grammar& g);
77 
78 // the set of symbols with rule definitions
79 terminalset definedSymbols(const grammar& g);
80 
81 // the set of symbols
82 terminalset topLevelSymbols(const grammar& g);
83 
84 // the set of symbols used by this symbol
85 void symbolsUsed(const grammar& g, const rule& r, terminalset* ss);
86 void symbolsUsed(const grammar& g, const rules& rs, terminalset* ss);
87 void symbolsUsed(const grammar& g, terminal* s, terminalset* ss);
88 terminalset symbolsUsed(const grammar& g, terminal* s);
89 
90 // the set of symbols that use this symbol
91 terminalset symbolsUsing(const grammar& g, terminal* s);
92 
93 // is this a 'terminal' or 'non-terminal'?
94 bool hasRule(const grammar& g, terminal* t);
95 
96 // an aggregate symbol directly derives null if one of its rules does
97 bool directlyDerivesNull(const rule& r);
98 bool directlyDerivesNull(const grammar& g, terminal* s);
99 
100 // an aggregate symbol derives null if it directly derives null or if it has a rule consisting of a path of aggregate symbols that derive null
101 bool derivesNull(const terminalset& nullAggs, const rule& r);
102 bool derivesNull(const grammar& g, const terminalset& nullAggs, terminal* s);
103 bool derivesNull(const grammar& g, const terminalset& nullAggs, const terminals& ss);
104 bool derivesNull(const parserdef& p, terminal* s);
105 
106 // determine which aggregate symbols in a grammar are nullable (can derive the empty string) either directly or transitively
107 terminalset symbolsDerivingNull(const grammar& g);
108 
109 // increment an item's read position
110 item succ(const item& i);
111 
112 // the next value to read from an item, or the next values to read from a set of items
113 // (in the case of reading from an item, the next value may be null if the item represents a completed rule)
114 terminal* next(const grammar& g, const item& i);
115 terminalset next(const grammar& g, const itemset& is);
116 
117 // the set of _non-aggregate_ (primitive terminals) following an itemset
118 terminalset nextPrim(const grammar& g, const itemset& is);
119 
120 // the set of items that follow a given value (not closed)
121 itemset follow(const grammar& g, const itemset& is, terminal* v);
122 itemsets follow(const grammar& g, const itemset& is, const terminals& vs);
123 
124 // close an itemset
125 itemset closure(const grammar& g, const itemset& is);
126 void closure(const grammar& g, itemset* is);
127 
128 // find or create a state in a parser from a (closed) itemset
129 nat parserState(const itemset& s, parserdef* p);
130 
131 // find the (closed) state transitions from a closed itemset
132 state_transitions follow(const grammar& g, const itemset& is, const terminalset& vs, parserdef* p);
133 
134 // determine the parser state graph (ie: the LR(0) parser) for a grammar, given a start symbol
135 parserdef lr0parser(const grammar& g, terminal* s);
136 
137 // from a state q and symbol t, what state do you go to?
138 nat follow(const parserdef& p, nat q, terminal* t);
139 
140 // a transition of the LR(0) parser can be described by a state and terminal
141 typedef std::pair<nat, terminal*> transition;
142 typedef std::set<transition> transitionset;
143 
144 transitionset transitions(const parserdef& p);
145 
146 // does (q, r -> w) look back to the transition t?
147 // (that is, does sym(t) == r, and does state(t) follow w to get to state q?)
148 bool lookback(const parserdef& p, nat q, terminal* r, const rule& w, const transition& t);
149 
150 // x=(s0, A) includes y=(s1, B) if B -> aAm and m derives the empty string and s1 follows a to get to s0
151 bool includes(const parserdef& p, const transition& x, const transition& y);
152 
153 // x=(s0, A) reads y=(s1, B) if s0 follows A to s1 and B derives the empty string
154 bool reads(const parserdef& p, const transition& x, const transition& y);
155 
156 // t in directReads(x(s, A)) if s follows A to s' and t is in the set of non-aggregate terminals at s'
157 terminalset directReads(const parserdef& p, const transition& x);
158 
159 // for x in X, compute a function f : X -> Y from f' : X -> Y and + : Y -> Y -> Y and R : X -> X -> bool, such that f(x) = f'(x) + sum(f(y) | y in X, R(x,y))
160 template <typename X, typename Y, typename Fp, typename A, typename R>
161  class digraphF {
162  public:
163  typedef std::set<X> Xs;
164  typedef std::map<X, Y> F;
165 
166  static F map(const Xs& xs, const Fp& fp, const A& append, const R& r) {
167  return digraphF(xs, fp, append, r).map();
168  }
169  private:
170  const Xs& xs;
171  const Fp& fp;
172  const A& append;
173  const R& r;
174 
175  digraphF(const Xs& xs, const Fp& fp, const A& append, const R& r) : xs(xs), fp(fp), append(append), r(r) {
176  }
177 
178  typedef std::stack<X> Xstack;
179  typedef std::map<X, nat> Xidxs;
180 
181  F map() const {
182  F f;
183  Xstack s;
184  Xidxs n = freshIdxs(this->xs);
185 
186  for (typename Xs::const_iterator x = this->xs.begin(); x != this->xs.end(); ++x) {
187  if (n[*x] == 0) {
188  traverse(*x, &s, &n, &f);
189  }
190  }
191 
192  return f;
193  }
194 
195  void traverse(X x, Xstack* s, Xidxs* n, F* f) const {
196  s->push(x);
197  nat d = s->size();
198 
199  // base case, this node contains its immediate value
200  (*n)[x] = d;
201  (*f)[x] = this->fp(x);
202 
203  // and the values of any nodes in the input relation
204  for (typename Xs::const_iterator y = this->xs.begin(); y != this->xs.end(); ++y) {
205  if (this->r(x, *y)) {
206  // the node y is in the relation
207  // if it hasn't been visited yet, then recurse into it to determine its value
208  if ((*n)[*y] == 0) {
209  traverse(*y, s, n, f);
210  }
211 
212  // finally, add its value to the output for x
213  (*f)[x] = this->append((*f)[x], (*f)[*y]);
214  }
215  }
216 
217  // join all the nodes in this SCC
218  if ((*n)[x] == d) {
219  while (true) {
220  X cx = s->top();
221  s->pop();
222  (*n)[cx] = inf();
223  if (cx == x) {
224  break;
225  } else {
226  (*f)[cx] = (*f)[x];
227  }
228  }
229  }
230  }
231 
232  static Xidxs freshIdxs(const Xs& xs) {
233  Xidxs r;
234  for (typename Xs::const_iterator x = xs.begin(); x != xs.end(); ++x) {
235  r[*x] = 0;
236  }
237  return r;
238  }
239 
240  static nat inf() { return (nat)-1; }
241  };
242 
243 // the across/up sets of LALR(1) determination
244 typedef std::map<transition, terminalset> transition_lookahead;
245 
246 transition_lookahead Reads(const parserdef& p, const transitionset& ts);
247 transition_lookahead Follow(const parserdef& p, const transitionset& ts);
248 
249 // determine the LALR(1) parser for a grammar, given a start symbol
250 parserdef lalr1parser(const grammar& g, terminal* s);
251 
252 // an ambiguity conflict on a particular terminal
253 class ambiguity_conflict : public std::runtime_error {
254 public:
255  ambiguity_conflict(const std::string& ex, terminal* t) throw();
256 
257  terminal* failedTerminal() const;
258 private:
260 };
261 
262 // a failure to compile a grammar to an LALR table, with cause
263 class compile_table_failure : public std::runtime_error {
264 public:
265  compile_table_failure(const std::string& msg, const grammar& g, const itemset& faileditems, terminal* t) throw();
266 
267  const grammar& failedGrammar() const;
268  const itemset& failedItems() const;
269  terminal* failedTerminal() const;
270 
271  void print(std::ostream&) const;
272 private:
273  grammar g;
274  itemset faileditems;
276 
277  void print(std::ostream&,const item&) const;
278 };
279 
280 // an LR parser action (shift, reduce, or accept)
281 class action {
282 public:
283  // go to state s
284  static action goTo(nat s);
285 
286  // shift to state s
287  static action shift(nat s);
288 
289  // for symbol x, rule r, reduce n states
290  static action reduce(terminal* x, nat r, nat n);
291 
292  // the input has been accepted
293  static action accept();
294 
295  // what type of action is this?
296  bool isGoTo() const;
297  nat goToState() const;
298 
299  bool isShift() const;
300  nat shiftState() const;
301 
302  bool isReduce() const;
303  terminal* reduceSym() const;
304  nat reduceRule() const;
305  nat reduceSize() const;
306 
307  bool isAccept() const;
308 
309  // basic ops
310  bool operator==(const action& rhs) const;
311  bool operator!=(const action& rhs) const;
312 
313  std::ostream& fmt(std::ostream& out) const;
314 private:
315  uint8_t act;
316  union {
317  // for shift
319 
320  // for reduce
321  struct {
325  };
326  } d;
327 
328  action();
329 };
330 
331 std::ostream& operator<<(std::ostream& out, const action& act);
332 
333 // a parser state maps a set of terminals to actions
334 typedef std::map<terminal*, action> lrstate;
335 
336 // an ordered sequence of states describes the whole behavior of an LR parser
337 typedef std::vector<lrstate> lrtable;
338 
339 // determine the LR table, derived from the LALR(1) parser
340 lrtable lalrTable(const parserdef& pd, const precedence& p = precedence());
341 lrtable lalrTable(const grammar& g, terminal* s, const precedence& p = precedence());
342 
343 // show an LR table (useful for debugging)
344 void show(std::ostream&, const lrtable&);
345 
346 }
347 
348 #endif
349 
Definition: lalr.H:253
nat n
Definition: lalr.H:324
item(terminal *agg, nat p, nat r, const terminalset &la=terminalset())
Definition: lalr.C:210
bool reads(const parserdef &p, const transition &x, const transition &y)
Definition: lalr.C:513
transitionset transitions(const parserdef &p)
Definition: lalr.C:407
void traverse(X x, Xstack *s, Xidxs *n, F *f) const
Definition: lalr.H:195
terminal * x
Definition: lalr.H:322
std::vector< itemset > itemsets
Definition: lalr.H:38
Definition: lalr.H:161
void undefineSymbol(terminal *s, grammar *g)
Definition: lalr.C:29
nat parserState(const itemset &s, parserdef *p)
Definition: lalr.C:348
terminalset nullable
Definition: lalr.H:62
const T * is(const S *s)
Definition: ptr.H:107
bool operator<(const item &rhs) const
Definition: lalr.C:212
terminal * agg
Definition: lalr.H:25
std::vector< terminal * > terminals
Definition: terminal.H:28
std::vector< rule > rules
Definition: lalr.H:18
std::vector< lrstate > lrtable
Definition: lalr.H:337
void print(std::ostream &out, const NFA &nfa)
Definition: regex.C:525
parserdef lalr1parser(const grammar &g, terminal *s)
Definition: lalr.C:580
void removeRuleRefs(terminal *s, grammar *g)
Definition: lalr.C:17
unsigned int nat
Definition: data.H:20
Definition: terminal.H:19
static Xidxs freshIdxs(const Xs &xs)
Definition: lalr.H:232
std::map< X, nat > Xidxs
Definition: lalr.H:179
bool directlyDerivesNull(const rule &r)
Definition: lalr.C:143
static nat inf()
Definition: lalr.H:240
std::map< X, Y > F
Definition: lalr.H:164
parser_state_transitions transitions
Definition: lalr.H:61
Definition: lalr.H:22
transition_lookahead Follow(const parserdef &p, const transitionset &ts)
Definition: lalr.C:558
itemset follow(const grammar &g, const itemset &is, terminal *v)
Definition: lalr.C:309
terminal * t
Definition: lalr.H:259
const Fp & fp
Definition: lalr.H:171
Definition: boot.H:7
terminalset definedSymbols(const grammar &g)
Definition: lalr.C:52
std::set< X > Xs
Definition: lalr.H:163
static F map(const Xs &xs, const Fp &fp, const A &append, const R &r)
Definition: lalr.H:166
void append(std::vector< T > *xs, const std::vector< T > &ys)
Definition: array.H:292
lrtable lalrTable(const parserdef &pd, const precedence &p=precedence())
Definition: lalr.C:671
bool lookback(const parserdef &p, nat q, terminal *r, const rule &w, const transition &t)
Definition: lalr.C:462
std::ostream & operator<<(std::ostream &out, const array< char > *x)
Definition: hobbes.H:38
std::set< item > itemset
Definition: lalr.H:37
itemset closure(const grammar &g, const itemset &is)
Definition: lalr.C:342
void removeRuleDefs(terminal *s, grammar *g)
Definition: lalr.C:10
item succ(const item &i)
Definition: lalr.C:279
nat r
Definition: lalr.H:323
Definition: lalr.H:56
terminalset symbolsDerivingNull(const grammar &g)
Definition: lalr.C:189
terminalset topLevelSymbols(const grammar &g)
Definition: lalr.C:57
std::stack< X > Xstack
Definition: lalr.H:178
grammar g
Definition: lalr.H:273
transition_lookahead Reads(const parserdef &p, const transitionset &ts)
Definition: lalr.C:553
const R & r
Definition: lalr.H:173
uint8_t act
Definition: lalr.H:315
digraphF(const Xs &xs, const Fp &fp, const A &append, const R &r)
Definition: lalr.H:175
std::string show(const Expr &e)
Definition: expr.C:19
std::map< transition, terminalset > transition_lookahead
Definition: lalr.H:244
parser_states states
Definition: lalr.H:59
std::map< itemset, nat > parser_states
Definition: lalr.H:44
std::map< nat, itemset > state_definitions
Definition: lalr.H:47
bool derivesNull(const terminalset &nullAggs, const rule &r)
Definition: lalr.C:157
itemset faileditems
Definition: lalr.H:274
const Xs & xs
Definition: lalr.H:170
grammar g
Definition: lalr.H:57
nat r
Definition: lalr.H:27
terminal * next(const grammar &g, const item &i)
Definition: lalr.C:283
void w(const T &x, bytes *out)
Definition: net.H:282
terminalset la
Definition: lalr.H:29
std::set< transition > transitionset
Definition: lalr.H:142
terminalset directReads(const parserdef &p, const transition &x)
Definition: lalr.C:518
std::map< nat, state_transitions > parser_state_transitions
Definition: lalr.H:53
std::map< terminal *, rules > grammar
Definition: lalr.H:19
size_t rs(const reader::MetaData &md, size_t o, size_t n, uint8_t *b)
Definition: storage.H:1720
#define out
Definition: netio.H:19
std::map< terminal *, nat > state_transitions
Definition: lalr.H:41
const A & append
Definition: lalr.H:172
Definition: lalr.H:263
std::map< terminal *, prec > precedence
Definition: terminal.H:82
state_definitions state_defs
Definition: lalr.H:60
parserdef lr0parser(const grammar &g, terminal *s)
Definition: lalr.C:379
terminalset nextPrim(const grammar &g, const itemset &is)
Definition: lalr.C:297
terminalset symbolsUsing(const grammar &g, terminal *s)
Definition: lalr.C:101
void symbolsUsed(const grammar &g, const rule &r, terminalset *ss)
Definition: lalr.C:71
bool operator==(const item &rhs) const
Definition: lalr.C:222
F map() const
Definition: lalr.H:181
nat p
Definition: lalr.H:26
terminal * t
Definition: lalr.H:275
std::pair< nat, terminal * > transition
Definition: lalr.H:141
std::map< terminal *, action > lrstate
Definition: lalr.H:334
bool hasRule(const grammar &g, terminal *t)
Definition: lalr.C:114
terminal * s
Definition: lalr.H:58
bool includes(const parserdef &p, const transition &x, const transition &y)
Definition: lalr.C:499
Definition: lalr.H:281
std::set< terminal * > terminalset
Definition: terminal.H:29
nat s
Definition: lalr.H:318
terminals rule
Definition: lalr.H:17