hobbes
a language, embedded compiler, and runtime for efficient dynamic expression evaluation, data storage and analysis
regex.H
Go to the documentation of this file.
1 /*
2  * regex : constructs functions to perform regex matches in match expressions
3  */
4 
5 #ifndef HOBBES_LANG_PAT_REGEX_HPP_INCLUDED
6 #define HOBBES_LANG_PAT_REGEX_HPP_INCLUDED
7 
9 #include <hobbes/util/str.H>
10 #include <map>
11 #include <vector>
12 #include <string>
13 #include <set>
14 
15 namespace hobbes {
16 
17 class cc;
18 
19 /*
20  * makeRegexFn - generate a function that performs an 'ordered regex match'
21  *
22  * This generated function F will be defined:
23  * F :: (Array cs char) => cs -> int
24  *
25  * Such that a 'string' (e.g. [char], <std.string>, ...) can be tested to determine which regexes
26  * it matches (the 'int' result represents an index into a table determined here that classifies
27  * states representing one or more regex matches).
28  *
29  * inputs:
30  * cc* : a compiler context within which to generate this regex function
31  * regexes : a sequences of regexes in priority order to match against later input
32  *
33  * outputs:
34  * result : the name of the generated function
35  * rstates : a mapping of F return codes to input regex indexes
36  */
37 struct Regex {
38  virtual void show(std::ostream&) const = 0;
39 };
40 typedef std::shared_ptr<Regex> RegexPtr;
41 typedef std::vector<RegexPtr> Regexes;
42 RegexPtr parseRegex(const std::string&);
43 str::seq bindingNames(const RegexPtr&);
44 
45 typedef size_t RegexIdx;
46 typedef std::set<RegexIdx> RegexIdxs;
47 typedef std::map<size_t, RegexIdxs> RStates;
48 
49 typedef std::map<size_t, str::seq> CaptureVarsAt;
50 
51 struct CRegexes {
52  std::string fname; // the low-level function that evaluates a set of regular expressions against an input
53  ExprPtr captureBuffer; // an expression producing a buffer for recording capture groups
54  CaptureVarsAt captureVarsAt; // capture variable names by regex index
55  RStates rstates; // regex result state -> set of input regex ids (to determine which outer match rows to select for a given regex match)
56 };
57 
58 CRegexes makeRegexFn(cc*, const Regexes&, const LexicalAnnotation&);
59 
60 typedef std::pair<std::string, ExprPtr> CVarDef;
61 typedef std::vector<CVarDef> CVarDefs;
62 
63 CVarDefs unpackCaptureVars(const std::string& strVar, const std::string& bufferVar, const CRegexes&, size_t state, const LexicalAnnotation&);
64 
65 }
66 
67 #endif
68 
std::vector< CVarDef > CVarDefs
Definition: regex.H:61
std::string fname
Definition: regex.H:52
std::shared_ptr< Regex > RegexPtr
Definition: regex.H:40
ExprPtr captureBuffer
Definition: regex.H:53
Definition: regex.H:37
std::map< size_t, RegexIdxs > RStates
Definition: regex.H:47
size_t RegexIdx
Definition: regex.H:45
Definition: regex.H:51
Definition: boot.H:7
str::seq bindingNames(const RegexPtr &)
Definition: regex.C:363
std::vector< RegexPtr > Regexes
Definition: regex.H:41
std::set< RegexIdx > RegexIdxs
Definition: regex.H:46
std::shared_ptr< Expr > ExprPtr
Definition: expr.H:58
Definition: cc.H:64
Definition: lannotation.H:22
std::vector< std::string > seq
Definition: str.H:19
uint32_t state
Definition: regex.C:372
CaptureVarsAt captureVarsAt
Definition: regex.H:54
virtual void show(std::ostream &) const =0
std::pair< std::string, ExprPtr > CVarDef
Definition: regex.H:60
CVarDefs unpackCaptureVars(const std::string &strVar, const std::string &bufferVar, const CRegexes &, size_t state, const LexicalAnnotation &)
Definition: regex.C:1212
CRegexes makeRegexFn(cc *, const Regexes &, const LexicalAnnotation &)
Definition: regex.C:1172
std::map< size_t, str::seq > CaptureVarsAt
Definition: regex.H:49
RegexPtr parseRegex(const std::string &)
Definition: regex.C:330
RStates rstates
Definition: regex.H:55