hobbes
a language, embedded compiler, and runtime for efficient dynamic expression evaluation, data storage and analysis
str.H
Go to the documentation of this file.
1 
2 #ifndef HOBBES_UTIL_STR_HPP_INCLUDED
3 #define HOBBES_UTIL_STR_HPP_INCLUDED
4 
5 #include <hobbes/util/array.H>
6 #include <string>
7 #include <sstream>
8 #include <vector>
9 #include <cxxabi.h>
10 #include <stdlib.h>
11 #include <set>
12 #include <map>
13 
14 namespace hobbes { namespace str {
15 
16 std::string env(const std::string& varname);
17 void env(const std::string& varname, const std::string& value);
18 
19 typedef std::vector<std::string> seq;
20 typedef std::vector<seq> seqs;
21 typedef std::vector<unsigned int> lengths;
22 
23 void repeat(unsigned int n, const std::string& s, seq* out);
24 seq repeat(unsigned int n, const std::string& s);
25 
26 template <typename C>
27  unsigned int maxSize(unsigned int d, const std::vector<C>& vs) {
28  if (vs.size() == 0) {
29  return d;
30  } else {
31  unsigned int r = vs[0].size();
32  for (unsigned int i = 1; i < vs.size(); ++i) {
33  r = std::max<unsigned int>(r, vs[i].size());
34  }
35  return r;
36  }
37  }
38 
39 template <typename C>
40  unsigned int minSize(unsigned int d, const std::vector<C>& vs) {
41  if (vs.size() == 0) {
42  return d;
43  } else {
44  unsigned int r = vs[0].size();
45  for (unsigned int i = 1; i < vs.size(); ++i) {
46  r = std::min<unsigned int>(r, vs[i].size());
47  }
48  return r;
49  }
50  }
51 
52 unsigned int tableCols(const seqs& tbl);
53 unsigned int tableRows(const seqs& tbl);
54 unsigned int maxStrLen(const seq& col);
55 lengths maxStrLen(const seqs& tbl);
56 std::string pad(size_t n);
57 std::string leftAlign(size_t w, const std::string& x);
58 std::string rightAlign(size_t w, const std::string& x);
59 seq leftAlign(const seq& col);
60 seq rightAlign(const seq& col);
61 seqs leftAlign(const seqs& tbl);
62 seqs rightAlign(const seqs& tbl);
63 void printLeftAlignedTable(std::ostream& out, const seqs& tbl);
64 void printRightAlignedTable(std::ostream& out, const seqs& tbl);
65 std::string showLeftAlignedTable(const seqs& tbl);
66 std::string showRightAlignedTable(const seqs& tbl);
67 void printHeadlessLeftAlignedTable(std::ostream& out, const seqs& tbl);
68 void printHeadlessRightAlignedTable(std::ostream& out, const seqs& tbl);
69 
70 template <typename T>
71  bool is(const std::string& x) {
72  std::istringstream ss(x);
73  T dummy;
74  ss >> dummy;
75  return bool(ss);
76  }
77 
78 template <typename T>
79  T to(const std::string& x) {
80  std::istringstream ss(x);
81  T r = T();
82  ss >> r;
83  return r;
84  }
85 
86 template <typename T>
87  bool to(const std::string& x, T& out) {
88  std::istringstream ss(x);
89  ss >> out;
90  return bool(ss);
91  }
92 
93 template <typename T>
94  bool to(const char* b, const char* e, T* out) {
95  std::istringstream ss(std::string(b, e));
96  ss >> *out;
97  return bool(ss);
98  }
99 
100 template <typename T>
101  std::string from(const T& x) {
102  std::ostringstream ss;
103  ss << x;
104  return ss.str();
105  }
106 
107 std::string demangle(const char* tn);
108 std::string demangle(const std::type_info& ti);
109 
110 template <typename T>
111  std::string demangle() {
112  return demangle(typeid(T));
113  }
114 
115 template <typename Char>
116  std::basic_string<Char> trim(const std::basic_string<Char>& s) {
117  if (s.empty()) return s;
118 
119  unsigned int begin = 0;
120  unsigned int end = 0;
121 
122  for (int i = 0; i < s.size(); ++i) {
123  int c = (int)s[i];
124  if (!std::isspace(c) && c != 0) {
125  begin = i;
126  break;
127  }
128  }
129 
130  // finally, find the last non-whitespace character
131  for (int i = s.size() - 1; i >= 0; --i) {
132  int c = (int)s[i];
133  if (!std::isspace(c) && c != 0) {
134  end = i + 1;
135  break;
136  }
137  }
138 
139  if (begin == end) {
140  return "";
141  } else {
142  return s.substr(begin, end - begin);
143  }
144  }
145 
146 template <typename Char>
147  std::pair< std::basic_string<Char>, std::basic_string<Char> > trim(const std::pair< std::basic_string<Char>, std::basic_string<Char> >& p) {
148  return std::pair< std::basic_string<Char>, std::basic_string<Char> >(trim<Char>(p.first), trim<Char>(p.second));
149  }
150 
151 template <typename Char>
152  std::vector< std::basic_string<Char> > trim(const std::vector< std::basic_string<Char> >& ss) {
153  std::vector< std::basic_string<Char> > result;
154  for (typename std::vector< std::basic_string<Char> >::const_iterator s = ss.begin(); s != ss.end(); ++s) {
155  result.push_back(trim<Char>(*s));
156  }
157  return result;
158  }
159 
160 template <typename Char>
161  std::basic_string<Char> trimq(const std::basic_string<Char>& s, Char q = (Char)'"') {
162  if (s.size() == 0) {
163  return s;
164  } else if (s.size() == 1) {
165  return (s[0] == q) ? std::basic_string<Char>() : s;
166  } else {
167  const Char* b = s.c_str();
168  const Char* e = b + s.size() - 1;
169  if (*b == q) {
170  ++b;
171  }
172  if (*e == q) {
173  --e;
174  }
175  return std::basic_string<Char>(b, e + 1);
176  }
177  }
178 
179 // replace [a b c b] [b c] [e] -> [a e b]
180 template <typename Char>
181  std::basic_string<Char> replace(const std::basic_string<Char>& src, const std::basic_string<Char>& old_substr, const std::basic_string<Char>& new_substr) {
182  if (old_substr.empty()) {
183  return src;
184  }
185 
186  typedef typename std::basic_string<Char>::size_type SZT;
187  std::basic_string<Char> result;
188  SZT sz = src.find(old_substr);
189  SZT lsz = 0;
190 
191  while (sz != std::basic_string<Char>::npos) {
192  result += std::string(src.begin() + lsz, src.begin() + sz);
193  result += new_substr;
194 
195  lsz = sz + old_substr.size();
196  sz = src.find(old_substr, lsz);
197  }
198 
199  if (lsz != std::basic_string<Char>::npos) {
200  result += std::string(src.begin() + lsz, src.end());
201  }
202 
203  return result;
204  }
205 
206 bool isNyb(char);
207 char denyb(char);
208 unsigned char dehex(const std::string&);
209 std::vector<unsigned char> dehexs(const std::string&);
210 char nyb(unsigned char x);
211 std::string hex(unsigned char);
212 std::string hex(const std::vector<unsigned char>&);
213 std::string hex(const unsigned char*, size_t);
214 
215 std::string escape(const std::string&);
216 std::string unescape(const std::string&);
217 
218 bool endsWith(const std::string& s, const std::string& sfx);
219 
220 typedef std::pair<std::string, std::string> pair;
221 
222 inline pair trim(const pair& p) {
223  return pair(trim(p.first), trim(p.second));
224 }
225 
226 pair splitAt(const std::string& s, unsigned int i);
227 pair lsplit(const std::string& s, const std::string& ss);
228 pair rsplit(const std::string& s, const std::string& ss);
229 seq csplit(const std::string& s, const std::string& ss);
230 pair readWhile(bool (*P)(char), const std::string& s);
231 
232 bool isDigit(char c);
233 bool isNotDigit(char c);
234 
235 unsigned int firstFailIndex(bool (*P)(char), const std::string& s);
236 
237 // reverse csplit
238 std::string cdelim(const seq& ss, const std::string& d);
239 
240 // common containers for strings
241 typedef std::set<std::string> set;
242 std::string show(const set&);
243 
244 typedef std::map<std::string, std::string> named_strings;
245 std::string show(const named_strings&);
246 
247 // process a string by 'expanding' embedded variables/expressions and unescaping sequences
248 template <typename T>
249  T foldWithFormat(const std::string& str, const T& s, T (*constF)(const T&, const std::string&), T (*expF)(const T&, const std::string&)) {
250  T r = s;
251 
252  std::ostringstream b;
253  int x = 0;
254  int bc = 0;
255 
256  for (size_t i = 0; i < str.size(); ++i) {
257  char c = str[i];
258 
259  switch (x) {
260  // process unformatted text (decide whether to start variables or unescape)
261  case 0: {
262  switch (c) {
263  case '\\':
264  x = 1;
265  break;
266 
267  case '$':
268  r = constF(r, b.str());
269  b.str("");
270  x = 2;
271  break;
272 
273  default:
274  b << c;
275  break;
276  }
277  break;
278  }
279  // unescape
280  case 1: {
281  b << c;
282  x = 0;
283  break;
284  }
285  // process variables
286  case 2: {
287  if (c != '{') {
288  b << c;
289  x = 3;
290  } else {
291  bc = 1;
292  x = 4;
293  }
294  break;
295  }
296  // process variable short-names (may only be alphanumeric)
297  case 3: {
298  if (std::isalnum(c) != 0 || c == '_') {
299  b << c;
300  } else {
301  --i;
302  r = expF(r, b.str());
303  b.str("");
304  x = 0;
305  }
306  break;
307  }
308  // process variable 'expressions' (may be anything delimited by braces)
309  case 4: {
310  if (c == '}') {
311  if (bc == 1) {
312  r = expF(r, b.str());
313  b.str("");
314  x = 0;
315  } else {
316  --bc;
317  b << '}';
318  }
319  } else {
320  if (c == '{') {
321  ++bc;
322  }
323  b << c;
324  }
325  break;
326  }
327  }
328  }
329 
330  if (x < 3) {
331  r = constF(r, b.str());
332  } else if (x == 3) {
333  r = expF(r, b.str());
334  }
335 
336  return r;
337  }
338 
339 // slurp the entire contents of an input stream into a string
340 inline std::string slurp(std::istream& in) {
341  std::ostringstream ss;
342  ss << in.rdbuf();
343  return ss.str();
344 }
345 
346 // generate?
347 inline seq strings() { return seq(); }
348 inline seq strings(const std::string& a0) { seq r; r.push_back(a0); return r; }
349 inline seq strings(const std::string& a0, const std::string& a1) { seq r; r.push_back(a0); r.push_back(a1); return r; }
350 inline seq strings(const std::string& a0, const std::string& a1, const std::string& a2) { seq r; r.push_back(a0); r.push_back(a1); r.push_back(a2); return r; }
351 inline seq strings(const std::string& a0, const std::string& a1, const std::string& a2, const std::string& a3) { seq r; r.push_back(a0); r.push_back(a1); r.push_back(a2); r.push_back(a3); return r; }
352 inline seq strings(const std::string& a0, const std::string& a1, const std::string& a2, const std::string& a3, const std::string& a4) { seq r; r.push_back(a0); r.push_back(a1); r.push_back(a2); r.push_back(a3); r.push_back(a4); return r; }
353 inline seq strings(const std::string& a0, const std::string& a1, const std::string& a2, const std::string& a3, const std::string& a4, const std::string& a5) { seq r; r.push_back(a0); r.push_back(a1); r.push_back(a2); r.push_back(a3); r.push_back(a4); r.push_back(a5); return r; }
354 
355 // read a char definition like 'c' or 'd' or '\0'
356 char readCharDef(const std::string&);
357 
358 // convenience functions for expanding environment variable references in strings and paths
359 std::string expandVars(const std::string&);
360 std::string expandPath(const std::string&); // same as 'expandVars' but expand '~' to home directory
361 
362 // display a byte count in typical units
363 std::string showDataSize(size_t bytes);
364 
365 // char set utilities
366 inline std::string charRange(char low, char high) {
367  std::ostringstream ss;
368  if (low < high) {
369  for (char c = low; c <= high; ++c) {
370  ss << c;
371  }
372  } else {
373  for (char c = low; c >= high; --c) {
374  ss << c;
375  }
376  }
377  return ss.str();
378 }
379 
380 inline std::string printableChars() {
381  return charRange(0x20, 0x7e);
382 }
383 
384 inline std::string difference(const std::string& x, const std::string& y) {
385  return fromSet<std::string>(setDifference(toSet(x), toSet(y)));
386 }
387 
388 // a set of strings represented as a prefix tree
389 class ptnode;
390 
391 class prefix_tree {
392 public:
393  prefix_tree(const seq&);
394  prefix_tree(const set&);
395  ~prefix_tree();
396 
397  std::map<size_t, seq> rankedMatches(const std::string&, size_t maxDist) const;
398  seq closestMatches(const std::string&, size_t maxDist) const;
399 private:
401 };
402 
403 // how far is one string from another string?
404 size_t editDistance(const std::string&, const std::string&);
405 
406 // find the strings out of a set that are at most the given edit distance away from an input string
407 seq closestMatches(const std::string&, const set&, size_t maxDist);
408 seq closestMatches(const std::string&, const seq&, size_t maxDist);
409 
410 // ensure that a string has a given suffix (add it if it's not there)
411 std::string mustEndWith(const std::string&, const std::string&);
412 
413 // get a set of filesystem objects matching a pattern
414 str::seq paths(const std::string& p);
415 
416 }}
417 
418 #endif
bool is(const std::string &x)
Definition: str.H:71
std::basic_string< Char > trimq(const std::basic_string< Char > &s, Char q=(Char)'"')
Definition: str.H:161
Definition: str.H:391
Definition: expr.H:148
std::string mustEndWith(const std::string &, const std::string &)
Definition: str.C:699
std::string rightAlign(size_t w, const std::string &x)
Definition: str.C:62
std::string showLeftAlignedTable(const seqs &tbl)
Definition: str.C:174
std::vector< unsigned char > dehexs(const std::string &)
Definition: str.C:284
prefix_tree(const seq &)
Definition: str.C:653
std::string unescape(const std::string &)
Definition: str.C:358
Definition: str.H:14
pair lsplit(const std::string &s, const std::string &ss)
Definition: str.C:215
unsigned int maxSize(unsigned int d, const std::vector< C > &vs)
Definition: str.H:27
std::string expandPath(const std::string &)
Definition: str.C:513
std::basic_string< Char > trim(const std::basic_string< Char > &s)
Definition: str.H:116
std::string charRange(char low, char high)
Definition: str.H:366
unsigned int tableRows(const seqs &tbl)
Definition: str.C:34
unsigned int firstFailIndex(bool(*P)(char), const std::string &s)
Definition: str.C:475
pair readWhile(bool(*P)(char), const std::string &s)
Definition: str.C:254
pair rsplit(const std::string &s, const std::string &ss)
Definition: str.C:224
std::string leftAlign(size_t w, const std::string &x)
Definition: str.C:54
std::string slurp(std::istream &in)
Definition: str.H:340
bool isNyb(char)
Definition: str.C:258
char nyb(unsigned char x)
Definition: str.C:294
std::pair< std::string, std::string > pair
Definition: str.H:220
seq strings()
Definition: str.H:347
std::vector< uint8_t > bytes
Definition: array.H:21
str::seq paths(const std::string &p)
Definition: str.C:708
std::string from(const T &x)
Definition: str.H:101
std::vector< seq > seqs
Definition: str.H:20
Definition: boot.H:7
bool endsWith(const std::string &s, const std::string &sfx)
Definition: str.C:396
seq closestMatches(const std::string &, size_t maxDist) const
Definition: str.C:673
std::map< size_t, seq > rankedMatches(const std::string &, size_t maxDist) const
Definition: str.C:669
char denyb(char)
Definition: str.C:262
void printLeftAlignedTable(std::ostream &out, const seqs &tbl)
Definition: str.C:147
std::set< T > setDifference(const std::set< T > &lhs, const std::set< T > &rhs)
Definition: array.H:185
std::string showDataSize(size_t bytes)
Definition: str.C:525
std::basic_string< Char > replace(const std::basic_string< Char > &src, const std::basic_string< Char > &old_substr, const std::basic_string< Char > &new_substr)
Definition: str.H:181
std::string difference(const std::string &x, const std::string &y)
Definition: str.H:384
std::string cdelim(const seq &ss, const std::string &d)
Definition: str.C:486
void printRightAlignedTable(std::ostream &out, const seqs &tbl)
Definition: str.C:151
std::string hex(unsigned char)
Definition: str.C:299
T foldWithFormat(const std::string &str, const T &s, T(*constF)(const T &, const std::string &), T(*expF)(const T &, const std::string &))
Definition: str.H:249
unsigned char dehex(const std::string &)
Definition: str.C:274
const T * end(const array< T > *d)
Definition: tylift.H:88
std::map< std::string, std::string > named_strings
Definition: str.H:244
T to(const std::string &x)
Definition: str.H:79
const T * begin(const array< T > *d)
Definition: tylift.H:87
size_t r(const reader::MetaData &md, size_t o, T *t)
Definition: storage.H:1730
std::string expandVars(const std::string &)
Definition: str.C:503
void w(const T &x, bytes *out)
Definition: net.H:282
std::vector< std::string > seq
Definition: str.H:19
std::string show(const set &)
Definition: str.C:400
unsigned int maxStrLen(const seq &col)
Definition: str.C:38
#define out
Definition: netio.H:19
void printHeadlessLeftAlignedTable(std::ostream &out, const seqs &tbl)
Definition: str.C:166
std::string env(const std::string &varname)
Definition: str.C:9
ptnode * root
Definition: str.H:400
uint32_t result
Definition: regex.C:376
std::string demangle(const char *tn)
Definition: str.C:186
pair splitAt(const std::string &s, unsigned int i)
Definition: str.C:207
~prefix_tree()
Definition: str.C:665
unsigned int minSize(unsigned int d, const std::vector< C > &vs)
Definition: str.H:40
std::string printableChars()
Definition: str.H:380
char readCharDef(const std::string &)
Definition: str.C:438
unsigned int tableCols(const seqs &tbl)
Definition: str.C:30
std::vector< unsigned int > lengths
Definition: str.H:21
std::set< typename CT::value_type > toSet(const CT &xs)
Definition: array.H:151
seq csplit(const std::string &s, const std::string &ss)
Definition: str.C:233
std::string escape(const std::string &)
Definition: str.C:325
size_t editDistance(const std::string &, const std::string &)
Definition: str.C:678
std::string showRightAlignedTable(const seqs &tbl)
Definition: str.C:180
Definition: str.C:547
std::string pad(size_t n)
Definition: str.C:50
bool in(T x, const std::set< T > &xs)
Definition: array.H:47
bool isDigit(char c)
Definition: str.C:467
void repeat(unsigned int n, const std::string &s, seq *out)
Definition: str.C:18
bool isNotDigit(char c)
Definition: str.C:471
void printHeadlessRightAlignedTable(std::ostream &out, const seqs &tbl)
Definition: str.C:170