Cockatrice 2025-11-30-Development-2.11.0-beta.38
A cross-platform virtual tabletop for multiplayer card games
Loading...
Searching...
No Matches
peglib.h
Go to the documentation of this file.
1//
2// peglib.h
3//
4// Copyright (c) 2022 Yuji Hirose. All rights reserved.
5// MIT License
6//
7
8#pragma once
9
10/*
11 * Configuration
12 */
13
14#ifndef CPPPEGLIB_HEURISTIC_ERROR_TOKEN_MAX_CHAR_COUNT
15#define CPPPEGLIB_HEURISTIC_ERROR_TOKEN_MAX_CHAR_COUNT 32
16#endif
17
18#include <algorithm>
19#include <any>
20#include <cassert>
21#include <cctype>
22#if __has_include(<charconv>)
23#include <charconv>
24#endif
25#include <cstring>
26#include <functional>
27#include <initializer_list>
28#include <iostream>
29#include <limits>
30#include <map>
31#include <memory>
32#include <mutex>
33#include <set>
34#include <sstream>
35#include <string>
36#include <unordered_map>
37#include <unordered_set>
38#include <vector>
39
40#if !defined(__cplusplus) || __cplusplus < 201703L
41#error "Requires complete C++17 support"
42#endif
43
44namespace peg {
45
46/*-----------------------------------------------------------------------------
47 * scope_exit
48 *---------------------------------------------------------------------------*/
49
50// This is based on
51// "http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4189".
52
53template <typename EF> struct scope_exit {
54 explicit scope_exit(EF &&f)
55 : exit_function(std::move(f)), execute_on_destruction{true} {}
56
58 : exit_function(std::move(rhs.exit_function)),
60 rhs.release();
61 }
62
65 }
66
67 void release() { this->execute_on_destruction = false; }
68
69private:
70 scope_exit(const scope_exit &) = delete;
71 void operator=(const scope_exit &) = delete;
73
76};
77
78/*-----------------------------------------------------------------------------
79 * UTF8 functions
80 *---------------------------------------------------------------------------*/
81
82inline size_t codepoint_length(const char *s8, size_t l) {
83 if (l) {
84 auto b = static_cast<uint8_t>(s8[0]);
85 if ((b & 0x80) == 0) {
86 return 1;
87 } else if ((b & 0xE0) == 0xC0 && l >= 2) {
88 return 2;
89 } else if ((b & 0xF0) == 0xE0 && l >= 3) {
90 return 3;
91 } else if ((b & 0xF8) == 0xF0 && l >= 4) {
92 return 4;
93 }
94 }
95 return 0;
96}
97
98inline size_t codepoint_count(const char *s8, size_t l) {
99 size_t count = 0;
100 for (size_t i = 0; i < l; i += codepoint_length(s8 + i, l - i)) {
101 count++;
102 }
103 return count;
104}
105
106inline size_t encode_codepoint(char32_t cp, char *buff) {
107 if (cp < 0x0080) {
108 buff[0] = static_cast<char>(cp & 0x7F);
109 return 1;
110 } else if (cp < 0x0800) {
111 buff[0] = static_cast<char>(0xC0 | ((cp >> 6) & 0x1F));
112 buff[1] = static_cast<char>(0x80 | (cp & 0x3F));
113 return 2;
114 } else if (cp < 0xD800) {
115 buff[0] = static_cast<char>(0xE0 | ((cp >> 12) & 0xF));
116 buff[1] = static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
117 buff[2] = static_cast<char>(0x80 | (cp & 0x3F));
118 return 3;
119 } else if (cp < 0xE000) {
120 // D800 - DFFF is invalid...
121 return 0;
122 } else if (cp < 0x10000) {
123 buff[0] = static_cast<char>(0xE0 | ((cp >> 12) & 0xF));
124 buff[1] = static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
125 buff[2] = static_cast<char>(0x80 | (cp & 0x3F));
126 return 3;
127 } else if (cp < 0x110000) {
128 buff[0] = static_cast<char>(0xF0 | ((cp >> 18) & 0x7));
129 buff[1] = static_cast<char>(0x80 | ((cp >> 12) & 0x3F));
130 buff[2] = static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
131 buff[3] = static_cast<char>(0x80 | (cp & 0x3F));
132 return 4;
133 }
134 return 0;
135}
136
137inline std::string encode_codepoint(char32_t cp) {
138 char buff[4];
139 auto l = encode_codepoint(cp, buff);
140 return std::string(buff, l);
141}
142
143inline bool decode_codepoint(const char *s8, size_t l, size_t &bytes,
144 char32_t &cp) {
145 if (l) {
146 auto b = static_cast<uint8_t>(s8[0]);
147 if ((b & 0x80) == 0) {
148 bytes = 1;
149 cp = b;
150 return true;
151 } else if ((b & 0xE0) == 0xC0) {
152 if (l >= 2) {
153 bytes = 2;
154 cp = ((static_cast<char32_t>(s8[0] & 0x1F)) << 6) |
155 (static_cast<char32_t>(s8[1] & 0x3F));
156 return true;
157 }
158 } else if ((b & 0xF0) == 0xE0) {
159 if (l >= 3) {
160 bytes = 3;
161 cp = ((static_cast<char32_t>(s8[0] & 0x0F)) << 12) |
162 ((static_cast<char32_t>(s8[1] & 0x3F)) << 6) |
163 (static_cast<char32_t>(s8[2] & 0x3F));
164 return true;
165 }
166 } else if ((b & 0xF8) == 0xF0) {
167 if (l >= 4) {
168 bytes = 4;
169 cp = ((static_cast<char32_t>(s8[0] & 0x07)) << 18) |
170 ((static_cast<char32_t>(s8[1] & 0x3F)) << 12) |
171 ((static_cast<char32_t>(s8[2] & 0x3F)) << 6) |
172 (static_cast<char32_t>(s8[3] & 0x3F));
173 return true;
174 }
175 }
176 }
177 return false;
178}
179
180inline size_t decode_codepoint(const char *s8, size_t l, char32_t &cp) {
181 size_t bytes;
182 if (decode_codepoint(s8, l, bytes, cp)) { return bytes; }
183 return 0;
184}
185
186inline char32_t decode_codepoint(const char *s8, size_t l) {
187 char32_t cp = 0;
188 decode_codepoint(s8, l, cp);
189 return cp;
190}
191
192inline std::u32string decode(const char *s8, size_t l) {
193 std::u32string out;
194 size_t i = 0;
195 while (i < l) {
196 auto beg = i++;
197 while (i < l && (s8[i] & 0xc0) == 0x80) {
198 i++;
199 }
200 out += decode_codepoint(&s8[beg], (i - beg));
201 }
202 return out;
203}
204
205template <typename T> const char *u8(const T *s) {
206 return reinterpret_cast<const char *>(s);
207}
208
209/*-----------------------------------------------------------------------------
210 * escape_characters
211 *---------------------------------------------------------------------------*/
212
213inline std::string escape_characters(const char *s, size_t n) {
214 std::string str;
215 for (size_t i = 0; i < n; i++) {
216 auto c = s[i];
217 switch (c) {
218 case '\f': str += "\\f"; break;
219 case '\n': str += "\\n"; break;
220 case '\r': str += "\\r"; break;
221 case '\t': str += "\\t"; break;
222 case '\v': str += "\\v"; break;
223 default: str += c; break;
224 }
225 }
226 return str;
227}
228
229inline std::string escape_characters(std::string_view sv) {
230 return escape_characters(sv.data(), sv.size());
231}
232
233/*-----------------------------------------------------------------------------
234 * resolve_escape_sequence
235 *---------------------------------------------------------------------------*/
236
237inline bool is_hex(char c, int &v) {
238 if ('0' <= c && c <= '9') {
239 v = c - '0';
240 return true;
241 } else if ('a' <= c && c <= 'f') {
242 v = c - 'a' + 10;
243 return true;
244 } else if ('A' <= c && c <= 'F') {
245 v = c - 'A' + 10;
246 return true;
247 }
248 return false;
249}
250
251inline bool is_digit(char c, int &v) {
252 if ('0' <= c && c <= '9') {
253 v = c - '0';
254 return true;
255 }
256 return false;
257}
258
259inline std::pair<int, size_t> parse_hex_number(const char *s, size_t n,
260 size_t i) {
261 int ret = 0;
262 int val;
263 while (i < n && is_hex(s[i], val)) {
264 ret = static_cast<int>(ret * 16 + val);
265 i++;
266 }
267 return std::pair(ret, i);
268}
269
270inline std::pair<int, size_t> parse_octal_number(const char *s, size_t n,
271 size_t i) {
272 int ret = 0;
273 int val;
274 while (i < n && is_digit(s[i], val)) {
275 ret = static_cast<int>(ret * 8 + val);
276 i++;
277 }
278 return std::pair(ret, i);
279}
280
281inline std::string resolve_escape_sequence(const char *s, size_t n) {
282 std::string r;
283 r.reserve(n);
284
285 size_t i = 0;
286 while (i < n) {
287 auto ch = s[i];
288 if (ch == '\\') {
289 i++;
290 if (i == n) { throw std::runtime_error("Invalid escape sequence..."); }
291 switch (s[i]) {
292 case 'f':
293 r += '\f';
294 i++;
295 break;
296 case 'n':
297 r += '\n';
298 i++;
299 break;
300 case 'r':
301 r += '\r';
302 i++;
303 break;
304 case 't':
305 r += '\t';
306 i++;
307 break;
308 case 'v':
309 r += '\v';
310 i++;
311 break;
312 case '\'':
313 r += '\'';
314 i++;
315 break;
316 case '"':
317 r += '"';
318 i++;
319 break;
320 case '[':
321 r += '[';
322 i++;
323 break;
324 case ']':
325 r += ']';
326 i++;
327 break;
328 case '\\':
329 r += '\\';
330 i++;
331 break;
332 case 'x':
333 case 'u': {
334 char32_t cp;
335 std::tie(cp, i) = parse_hex_number(s, n, i + 1);
336 r += encode_codepoint(cp);
337 break;
338 }
339 default: {
340 char32_t cp;
341 std::tie(cp, i) = parse_octal_number(s, n, i);
342 r += encode_codepoint(cp);
343 break;
344 }
345 }
346 } else {
347 r += ch;
348 i++;
349 }
350 }
351 return r;
352}
353
354/*-----------------------------------------------------------------------------
355 * token_to_number_ - This function should be removed eventually
356 *---------------------------------------------------------------------------*/
357
358template <typename T> T token_to_number_(std::string_view sv) {
359 T n = 0;
360#if __has_include(<charconv>)
361 if constexpr (!std::is_floating_point<T>::value) {
362 std::from_chars(sv.data(), sv.data() + sv.size(), n);
363#else
364 if constexpr (false) {
365#endif
366 } else {
367 auto s = std::string(sv);
368 std::istringstream ss(s);
369 ss >> n;
370 }
371 return n;
372}
373
374/*-----------------------------------------------------------------------------
375 * Trie
376 *---------------------------------------------------------------------------*/
377
378class Trie {
379public:
380 Trie(const std::vector<std::string> &items, bool ignore_case)
381 : ignore_case_(ignore_case) {
382 size_t id = 0;
383 for (const auto &item : items) {
384 const auto &s = ignore_case ? to_lower(item) : item;
385 for (size_t len = 1; len <= item.size(); len++) {
386 auto last = len == item.size();
387 std::string_view sv(s.data(), len);
388 auto it = dic_.find(sv);
389 if (it == dic_.end()) {
390 dic_.emplace(sv, Info{last, last, id});
391 } else if (last) {
392 it->second.match = true;
393 } else {
394 it->second.done = false;
395 }
396 }
397 id++;
398 }
399 }
400
401 size_t match(const char *text, size_t text_len, size_t &id) const {
402 std::string lower_text;
403 if (ignore_case_) {
404 lower_text = to_lower(text);
405 text = lower_text.data();
406 }
407
408 size_t match_len = 0;
409 auto done = false;
410 size_t len = 1;
411 while (!done && len <= text_len) {
412 std::string_view sv(text, len);
413 auto it = dic_.find(sv);
414 if (it == dic_.end()) {
415 done = true;
416 } else {
417 if (it->second.match) {
418 match_len = len;
419 id = it->second.id;
420 }
421 if (it->second.done) { done = true; }
422 }
423 len += 1;
424 }
425 return match_len;
426 }
427
428 size_t size() const { return dic_.size(); }
429
430private:
431 std::string to_lower(std::string s) const {
432 for (char &c : s) {
433 c = std::tolower(c);
434 }
435 return s;
436 }
437
438 struct Info {
439 bool done;
440 bool match;
441 size_t id;
442 };
443
444 // TODO: Use unordered_map when heterogeneous lookup is supported in C++20
445 // std::unordered_map<std::string, Info> dic_;
446 std::map<std::string, Info, std::less<>> dic_;
447
449};
450
451/*-----------------------------------------------------------------------------
452 * PEG
453 *---------------------------------------------------------------------------*/
454
455/*
456 * Line information utility function
457 */
458inline std::pair<size_t, size_t> line_info(const char *start, const char *cur) {
459 auto p = start;
460 auto col_ptr = p;
461 auto no = 1;
462
463 while (p < cur) {
464 if (*p == '\n') {
465 no++;
466 col_ptr = p + 1;
467 }
468 p++;
469 }
470
471 auto col = codepoint_count(col_ptr, p - col_ptr) + 1;
472
473 return std::pair(no, col);
474}
475
476/*
477 * String tag
478 */
479inline constexpr unsigned int str2tag_core(const char *s, size_t l,
480 unsigned int h) {
481 return (l == 0) ? h
482 : str2tag_core(s + 1, l - 1,
483 (h * 33) ^ static_cast<unsigned char>(*s));
484}
485
486inline constexpr unsigned int str2tag(std::string_view sv) {
487 return str2tag_core(sv.data(), sv.size(), 0);
488}
489
490namespace udl {
491
492inline constexpr unsigned int operator"" _(const char *s, size_t l) {
493 return str2tag_core(s, l, 0);
494}
495
496} // namespace udl
497
498/*
499 * Semantic values
500 */
501class Context;
502
503struct SemanticValues : protected std::vector<std::any> {
504 SemanticValues() = default;
506
507 // Input text
508 const char *path = nullptr;
509 const char *ss = nullptr;
510
511 // Matched string
512 std::string_view sv() const { return sv_; }
513
514 // Definition name
515 const std::string &name() const { return name_; }
516
517 std::vector<unsigned int> tags;
518
519 // Line number and column at which the matched string is
520 std::pair<size_t, size_t> line_info() const;
521
522 // Choice count
523 size_t choice_count() const { return choice_count_; }
524
525 // Choice number (0 based index)
526 size_t choice() const { return choice_; }
527
528 // Tokens
529 std::vector<std::string_view> tokens;
530
531 std::string_view token(size_t id = 0) const {
532 if (tokens.empty()) { return sv_; }
533 assert(id < tokens.size());
534 return tokens[id];
535 }
536
537 // Token conversion
538 std::string token_to_string(size_t id = 0) const {
539 return std::string(token(id));
540 }
541
542 template <typename T> T token_to_number() const {
543 return token_to_number_<T>(token());
544 }
545
546 // Transform the semantic value vector to another vector
547 template <typename T>
548 std::vector<T> transform(size_t beg = 0,
549 size_t end = static_cast<size_t>(-1)) const {
550 std::vector<T> r;
551 end = (std::min)(end, size());
552 for (size_t i = beg; i < end; i++) {
553 r.emplace_back(std::any_cast<T>((*this)[i]));
554 }
555 return r;
556 }
557
558 void append(SemanticValues &chvs) {
559 sv_ = chvs.sv_;
560 for (auto &v : chvs) {
561 emplace_back(std::move(v));
562 }
563 for (auto &tag : chvs.tags) {
564 tags.emplace_back(std::move(tag));
565 }
566 for (auto &tok : chvs.tokens) {
567 tokens.emplace_back(std::move(tok));
568 }
569 }
570
571 using std::vector<std::any>::iterator;
572 using std::vector<std::any>::const_iterator;
573 using std::vector<std::any>::size;
574 using std::vector<std::any>::empty;
575 using std::vector<std::any>::assign;
576 using std::vector<std::any>::begin;
577 using std::vector<std::any>::end;
578 using std::vector<std::any>::rbegin;
579 using std::vector<std::any>::rend;
580 using std::vector<std::any>::operator[];
581 using std::vector<std::any>::at;
582 using std::vector<std::any>::resize;
583 using std::vector<std::any>::front;
584 using std::vector<std::any>::back;
585 using std::vector<std::any>::push_back;
586 using std::vector<std::any>::pop_back;
587 using std::vector<std::any>::insert;
588 using std::vector<std::any>::erase;
589 using std::vector<std::any>::clear;
590 using std::vector<std::any>::swap;
591 using std::vector<std::any>::emplace;
592 using std::vector<std::any>::emplace_back;
593
594private:
595 friend class Context;
596 friend class Dictionary;
597 friend class Sequence;
598 friend class PrioritizedChoice;
599 friend class Repetition;
600 friend class Holder;
601 friend class PrecedenceClimbing;
602
603 Context *c_ = nullptr;
604 std::string_view sv_;
605 size_t choice_count_ = 0;
606 size_t choice_ = 0;
607 std::string name_;
608};
609
610/*
611 * Semantic action
612 */
613template <typename F, typename... Args> std::any call(F fn, Args &&...args) {
614 using R = decltype(fn(std::forward<Args>(args)...));
615 if constexpr (std::is_void<R>::value) {
616 fn(std::forward<Args>(args)...);
617 return std::any();
618 } else if constexpr (std::is_same<typename std::remove_cv<R>::type,
619 std::any>::value) {
620 return fn(std::forward<Args>(args)...);
621 } else {
622 return std::any(fn(std::forward<Args>(args)...));
623 }
624}
625
626template <typename T>
627struct argument_count : argument_count<decltype(&T::operator())> {};
628template <typename R, typename... Args>
629struct argument_count<R (*)(Args...)>
630 : std::integral_constant<unsigned, sizeof...(Args)> {};
631template <typename R, typename C, typename... Args>
632struct argument_count<R (C::*)(Args...)>
633 : std::integral_constant<unsigned, sizeof...(Args)> {};
634template <typename R, typename C, typename... Args>
635struct argument_count<R (C::*)(Args...) const>
636 : std::integral_constant<unsigned, sizeof...(Args)> {};
637
638class Action {
639public:
640 Action() = default;
641 Action(Action &&rhs) = default;
642 template <typename F> Action(F fn) : fn_(make_adaptor(fn)) {}
643 template <typename F> void operator=(F fn) { fn_ = make_adaptor(fn); }
644 Action &operator=(const Action &rhs) = default;
645
646 operator bool() const { return bool(fn_); }
647
648 std::any operator()(SemanticValues &vs, std::any &dt) const {
649 return fn_(vs, dt);
650 }
651
652private:
653 using Fty = std::function<std::any(SemanticValues &vs, std::any &dt)>;
654
655 template <typename F> Fty make_adaptor(F fn) {
656 if constexpr (argument_count<F>::value == 1) {
657 return [fn](auto &vs, auto & /*dt*/) { return call(fn, vs); };
658 } else {
659 return [fn](auto &vs, auto &dt) { return call(fn, vs, dt); };
660 }
661 }
662
664};
665
666/*
667 * Parse result helper
668 */
669inline bool success(size_t len) { return len != static_cast<size_t>(-1); }
670
671inline bool fail(size_t len) { return len == static_cast<size_t>(-1); }
672
673/*
674 * Log
675 */
676using Log = std::function<void(size_t line, size_t col, const std::string &msg,
677 const std::string &rule)>;
678
679/*
680 * ErrorInfo
681 */
682class Definition;
683
684struct ErrorInfo {
685 const char *error_pos = nullptr;
686 std::vector<std::pair<const char *, const Definition *>> expected_tokens;
687 const char *message_pos = nullptr;
688 std::string message;
689 std::string label;
690 const char *last_output_pos = nullptr;
692
693 void clear() {
694 error_pos = nullptr;
695 expected_tokens.clear();
696 message_pos = nullptr;
697 message.clear();
698 }
699
700 void add(const char *error_literal, const Definition *error_rule) {
701 for (const auto &[t, r] : expected_tokens) {
702 if (t == error_literal && r == error_rule) { return; }
703 }
704 expected_tokens.emplace_back(error_literal, error_rule);
705 }
706
707 void output_log(const Log &log, const char *s, size_t n);
708
709private:
710 int cast_char(char c) const { return static_cast<unsigned char>(c); }
711
712 std::string heuristic_error_token(const char *s, size_t n,
713 const char *pos) const {
714 auto len = n - std::distance(s, pos);
715 if (len) {
716 size_t i = 0;
717 auto c = cast_char(pos[i++]);
718 if (!std::ispunct(c) && !std::isspace(c)) {
719 while (i < len && !std::ispunct(cast_char(pos[i])) &&
720 !std::isspace(cast_char(pos[i]))) {
721 i++;
722 }
723 }
724
726 size_t j = 0;
727 while (count > 0 && j < i) {
728 j += codepoint_length(&pos[j], i - j);
729 count--;
730 }
731
732 return escape_characters(pos, j);
733 }
734 return std::string();
735 }
736
737 std::string replace_all(std::string str, const std::string &from,
738 const std::string &to) const {
739 size_t pos = 0;
740 while ((pos = str.find(from, pos)) != std::string::npos) {
741 str.replace(pos, from.length(), to);
742 pos += to.length();
743 }
744 return str;
745 }
746};
747
748/*
749 * Context
750 */
751class Ope;
752
753using TracerEnter = std::function<void(
754 const Ope &name, const char *s, size_t n, const SemanticValues &vs,
755 const Context &c, const std::any &dt, std::any &trace_data)>;
756
757using TracerLeave = std::function<void(
758 const Ope &ope, const char *s, size_t n, const SemanticValues &vs,
759 const Context &c, const std::any &dt, size_t, std::any &trace_data)>;
760
761using TracerStartOrEnd = std::function<void(std::any &trace_data)>;
762
763class Context {
764public:
765 const char *path;
766 const char *s;
767 const size_t l;
768
770 bool recovered = false;
771
772 std::vector<std::shared_ptr<SemanticValues>> value_stack;
774
775 std::vector<Definition *> rule_stack;
776 std::vector<std::vector<std::shared_ptr<Ope>>> args_stack;
777
779
780 std::shared_ptr<Ope> whitespaceOpe;
781 bool in_whitespace = false;
782
783 std::shared_ptr<Ope> wordOpe;
784
785 std::vector<std::map<std::string_view, std::string>> capture_scope_stack;
787
788 std::vector<bool> cut_stack;
789
790 const size_t def_count;
792 std::vector<bool> cache_registered;
793 std::vector<bool> cache_success;
794
795 std::map<std::pair<size_t, size_t>, std::tuple<size_t, std::any>>
797
800 std::any trace_data;
801 const bool verbose_trace;
802
804
820
823
824 assert(!value_stack_size);
826 assert(cut_stack.empty());
827 }
828
829 Context(const Context &) = delete;
830 Context(Context &&) = delete;
831 Context operator=(const Context &) = delete;
832
833 template <typename T>
834 void packrat(const char *a_s, size_t def_id, size_t &len, std::any &val,
835 T fn) {
837 fn(val);
838 return;
839 }
840
841 auto col = a_s - s;
842 auto idx = def_count * static_cast<size_t>(col) + def_id;
843
844 if (cache_registered[idx]) {
845 if (cache_success[idx]) {
846 auto key = std::pair(col, def_id);
847 std::tie(len, val) = cache_values[key];
848 return;
849 } else {
850 len = static_cast<size_t>(-1);
851 return;
852 }
853 } else {
854 fn(val);
855 cache_registered[idx] = true;
856 cache_success[idx] = success(len);
857 if (success(len)) {
858 auto key = std::pair(col, def_id);
859 cache_values[key] = std::pair(len, val);
860 }
861 return;
862 }
863 }
864
869
870 void pop() {
873 }
874
875 // Semantic values
877 assert(value_stack_size <= value_stack.size());
878 if (value_stack_size == value_stack.size()) {
879 value_stack.emplace_back(std::make_shared<SemanticValues>(this));
880 } else {
881 auto &vs = *value_stack[value_stack_size];
882 if (!vs.empty()) {
883 vs.clear();
884 if (!vs.tags.empty()) { vs.tags.clear(); }
885 }
886 vs.sv_ = std::string_view();
887 vs.choice_count_ = 0;
888 vs.choice_ = 0;
889 if (!vs.tokens.empty()) { vs.tokens.clear(); }
890 }
891
892 auto &vs = *value_stack[value_stack_size++];
893 vs.path = path;
894 vs.ss = s;
895 return vs;
896 }
897
899
900 // Arguments
901 void push_args(std::vector<std::shared_ptr<Ope>> &&args) {
902 args_stack.emplace_back(args);
903 }
904
905 void pop_args() { args_stack.pop_back(); }
906
907 const std::vector<std::shared_ptr<Ope>> &top_args() const {
908 return args_stack[args_stack.size() - 1];
909 }
910
911 // Capture scope
915 capture_scope_stack.emplace_back(
916 std::map<std::string_view, std::string>());
917 } else {
919 if (!cs.empty()) { cs.clear(); }
920 }
922 }
923
925
927 assert(capture_scope_stack_size >= 2);
929 auto prev = curr - 1;
930 for (const auto &[k, v] : *curr) {
931 (*prev)[k] = v;
932 }
933 }
934
935 // Error
936 void set_error_pos(const char *a_s, const char *literal = nullptr);
937
938 // Trace
939 void trace_enter(const Ope &ope, const char *a_s, size_t n,
940 const SemanticValues &vs, std::any &dt);
941 void trace_leave(const Ope &ope, const char *a_s, size_t n,
942 const SemanticValues &vs, std::any &dt, size_t len);
943 bool is_traceable(const Ope &ope) const;
944
945 // Line info
946 std::pair<size_t, size_t> line_info(const char *cur) const {
947 std::call_once(source_line_index_init_, [this]() {
948 for (size_t pos = 0; pos < l; pos++) {
949 if (s[pos] == '\n') { source_line_index.push_back(pos); }
950 }
951 source_line_index.push_back(l);
952 });
953
954 auto pos = static_cast<size_t>(std::distance(s, cur));
955
956 auto it = std::lower_bound(
957 source_line_index.begin(), source_line_index.end(), pos,
958 [](size_t element, size_t value) { return element < value; });
959
960 auto id = static_cast<size_t>(std::distance(source_line_index.begin(), it));
961 auto off = pos - (id == 0 ? 0 : source_line_index[id - 1] + 1);
962 return std::pair(id + 1, off + 1);
963 }
964
965 size_t next_trace_id = 0;
966 std::vector<size_t> trace_ids;
967 bool ignore_trace_state = false;
968 mutable std::once_flag source_line_index_init_;
969 mutable std::vector<size_t> source_line_index;
970};
971
972/*
973 * Parser operators
974 */
975class Ope {
976public:
977 struct Visitor;
978
979 virtual ~Ope() = default;
980 size_t parse(const char *s, size_t n, SemanticValues &vs, Context &c,
981 std::any &dt) const;
982 virtual size_t parse_core(const char *s, size_t n, SemanticValues &vs,
983 Context &c, std::any &dt) const = 0;
984 virtual void accept(Visitor &v) = 0;
985};
986
987class Sequence : public Ope {
988public:
989 template <typename... Args>
990 Sequence(const Args &...args)
991 : opes_{static_cast<std::shared_ptr<Ope>>(args)...} {}
992 Sequence(const std::vector<std::shared_ptr<Ope>> &opes) : opes_(opes) {}
993 Sequence(std::vector<std::shared_ptr<Ope>> &&opes) : opes_(opes) {}
994
995 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
996 std::any &dt) const override {
997 auto &chvs = c.push_semantic_values_scope();
998 auto se = scope_exit([&]() { c.pop_semantic_values_scope(); });
999 size_t i = 0;
1000 for (const auto &ope : opes_) {
1001 auto len = ope->parse(s + i, n - i, chvs, c, dt);
1002 if (fail(len)) { return len; }
1003 i += len;
1004 }
1005 vs.append(chvs);
1006 return i;
1007 }
1008
1009 void accept(Visitor &v) override;
1010
1011 std::vector<std::shared_ptr<Ope>> opes_;
1012};
1013
1014class PrioritizedChoice : public Ope {
1015public:
1016 template <typename... Args>
1017 PrioritizedChoice(bool for_label, const Args &...args)
1018 : opes_{static_cast<std::shared_ptr<Ope>>(args)...},
1019 for_label_(for_label) {}
1020 PrioritizedChoice(const std::vector<std::shared_ptr<Ope>> &opes)
1021 : opes_(opes) {}
1022 PrioritizedChoice(std::vector<std::shared_ptr<Ope>> &&opes) : opes_(opes) {}
1023
1024 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1025 std::any &dt) const override {
1026 size_t len = static_cast<size_t>(-1);
1027
1028 if (!for_label_) { c.cut_stack.push_back(false); }
1029 auto se1 = scope_exit([&]() {
1030 if (!for_label_) { c.cut_stack.pop_back(); }
1031 });
1032
1033 size_t id = 0;
1034 for (const auto &ope : opes_) {
1035 if (!c.cut_stack.empty()) { c.cut_stack.back() = false; }
1036
1037 auto &chvs = c.push();
1039 auto se2 = scope_exit([&]() {
1040 c.pop();
1042 });
1043
1044 len = ope->parse(s, n, chvs, c, dt);
1045
1046 if (success(len)) {
1047 vs.append(chvs);
1048 vs.choice_count_ = opes_.size();
1049 vs.choice_ = id;
1051 break;
1052 } else if (!c.cut_stack.empty() && c.cut_stack.back()) {
1053 break;
1054 }
1055
1056 id++;
1057 }
1058
1059 return len;
1060 }
1061
1062 void accept(Visitor &v) override;
1063
1064 size_t size() const { return opes_.size(); }
1065
1066 std::vector<std::shared_ptr<Ope>> opes_;
1067 bool for_label_ = false;
1068};
1069
1070class Repetition : public Ope {
1071public:
1072 Repetition(const std::shared_ptr<Ope> &ope, size_t min, size_t max)
1073 : ope_(ope), min_(min), max_(max) {}
1074
1075 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1076 std::any &dt) const override {
1077 size_t count = 0;
1078 size_t i = 0;
1079 while (count < min_) {
1080 auto &chvs = c.push();
1081 auto se = scope_exit([&]() { c.pop(); });
1082
1083 auto len = ope_->parse(s + i, n - i, chvs, c, dt);
1084
1085 if (success(len)) {
1086 vs.append(chvs);
1088 } else {
1089 return len;
1090 }
1091 i += len;
1092 count++;
1093 }
1094
1095 while (count < max_) {
1096 auto &chvs = c.push();
1097 auto se = scope_exit([&]() { c.pop(); });
1098
1099 auto len = ope_->parse(s + i, n - i, chvs, c, dt);
1100
1101 if (success(len)) {
1102 vs.append(chvs);
1104 } else {
1105 break;
1106 }
1107 i += len;
1108 count++;
1109 }
1110 return i;
1111 }
1112
1113 void accept(Visitor &v) override;
1114
1115 bool is_zom() const {
1116 return min_ == 0 && max_ == std::numeric_limits<size_t>::max();
1117 }
1118
1119 static std::shared_ptr<Repetition> zom(const std::shared_ptr<Ope> &ope) {
1120 return std::make_shared<Repetition>(ope, 0,
1121 std::numeric_limits<size_t>::max());
1122 }
1123
1124 static std::shared_ptr<Repetition> oom(const std::shared_ptr<Ope> &ope) {
1125 return std::make_shared<Repetition>(ope, 1,
1126 std::numeric_limits<size_t>::max());
1127 }
1128
1129 static std::shared_ptr<Repetition> opt(const std::shared_ptr<Ope> &ope) {
1130 return std::make_shared<Repetition>(ope, 0, 1);
1131 }
1132
1133 std::shared_ptr<Ope> ope_;
1134 size_t min_;
1135 size_t max_;
1136};
1137
1138class AndPredicate : public Ope {
1139public:
1140 AndPredicate(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1141
1142 size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1143 Context &c, std::any &dt) const override {
1144 auto &chvs = c.push();
1145 auto se = scope_exit([&]() { c.pop(); });
1146
1147 auto len = ope_->parse(s, n, chvs, c, dt);
1148
1149 if (success(len)) {
1150 return 0;
1151 } else {
1152 return len;
1153 }
1154 }
1155
1156 void accept(Visitor &v) override;
1157
1158 std::shared_ptr<Ope> ope_;
1159};
1160
1161class NotPredicate : public Ope {
1162public:
1163 NotPredicate(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1164
1165 size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1166 Context &c, std::any &dt) const override {
1167 auto &chvs = c.push();
1168 auto se = scope_exit([&]() { c.pop(); });
1169 auto len = ope_->parse(s, n, chvs, c, dt);
1170 if (success(len)) {
1171 c.set_error_pos(s);
1172 return static_cast<size_t>(-1);
1173 } else {
1174 return 0;
1175 }
1176 }
1177
1178 void accept(Visitor &v) override;
1179
1180 std::shared_ptr<Ope> ope_;
1181};
1182
1183class Dictionary : public Ope, public std::enable_shared_from_this<Dictionary> {
1184public:
1185 Dictionary(const std::vector<std::string> &v, bool ignore_case)
1186 : trie_(v, ignore_case) {}
1187
1188 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1189 std::any &dt) const override;
1190
1191 void accept(Visitor &v) override;
1192
1194};
1195
1196class LiteralString : public Ope,
1197 public std::enable_shared_from_this<LiteralString> {
1198public:
1199 LiteralString(std::string &&s, bool ignore_case)
1200 : lit_(s), ignore_case_(ignore_case), is_word_(false) {}
1201
1202 LiteralString(const std::string &s, bool ignore_case)
1203 : lit_(s), ignore_case_(ignore_case), is_word_(false) {}
1204
1205 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1206 std::any &dt) const override;
1207
1208 void accept(Visitor &v) override;
1209
1210 std::string lit_;
1212 mutable std::once_flag init_is_word_;
1213 mutable bool is_word_;
1214};
1215
1216class CharacterClass : public Ope,
1217 public std::enable_shared_from_this<CharacterClass> {
1218public:
1219 CharacterClass(const std::string &s, bool negated, bool ignore_case)
1220 : negated_(negated), ignore_case_(ignore_case) {
1221 auto chars = decode(s.data(), s.length());
1222 auto i = 0u;
1223 while (i < chars.size()) {
1224 if (i + 2 < chars.size() && chars[i + 1] == '-') {
1225 auto cp1 = chars[i];
1226 auto cp2 = chars[i + 2];
1227 ranges_.emplace_back(std::pair(cp1, cp2));
1228 i += 3;
1229 } else {
1230 auto cp = chars[i];
1231 ranges_.emplace_back(std::pair(cp, cp));
1232 i += 1;
1233 }
1234 }
1235 assert(!ranges_.empty());
1236 }
1237
1238 CharacterClass(const std::vector<std::pair<char32_t, char32_t>> &ranges,
1239 bool negated, bool ignore_case)
1240 : ranges_(ranges), negated_(negated), ignore_case_(ignore_case) {
1241 assert(!ranges_.empty());
1242 }
1243
1244 size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1245 Context &c, std::any & /*dt*/) const override {
1246 if (n < 1) {
1247 c.set_error_pos(s);
1248 return static_cast<size_t>(-1);
1249 }
1250
1251 char32_t cp = 0;
1252 auto len = decode_codepoint(s, n, cp);
1253
1254 for (const auto &range : ranges_) {
1255 if (in_range(range, cp)) {
1256 if (negated_) {
1257 c.set_error_pos(s);
1258 return static_cast<size_t>(-1);
1259 } else {
1260 return len;
1261 }
1262 }
1263 }
1264
1265 if (negated_) {
1266 return len;
1267 } else {
1268 c.set_error_pos(s);
1269 return static_cast<size_t>(-1);
1270 }
1271 }
1272
1273 void accept(Visitor &v) override;
1274
1275private:
1276 bool in_range(const std::pair<char32_t, char32_t> &range, char32_t cp) const {
1277 if (ignore_case_) {
1278 auto cpl = std::tolower(cp);
1279 return std::tolower(range.first) <= cpl &&
1280 cpl <= std::tolower(range.second);
1281 } else {
1282 return range.first <= cp && cp <= range.second;
1283 }
1284 }
1285
1286 std::vector<std::pair<char32_t, char32_t>> ranges_;
1289};
1290
1291class Character : public Ope, public std::enable_shared_from_this<Character> {
1292public:
1293 Character(char ch) : ch_(ch) {}
1294
1295 size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1296 Context &c, std::any & /*dt*/) const override {
1297 if (n < 1 || s[0] != ch_) {
1298 c.set_error_pos(s);
1299 return static_cast<size_t>(-1);
1300 }
1301 return 1;
1302 }
1303
1304 void accept(Visitor &v) override;
1305
1306 char ch_;
1307};
1308
1309class AnyCharacter : public Ope,
1310 public std::enable_shared_from_this<AnyCharacter> {
1311public:
1312 size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1313 Context &c, std::any & /*dt*/) const override {
1314 auto len = codepoint_length(s, n);
1315 if (len < 1) {
1316 c.set_error_pos(s);
1317 return static_cast<size_t>(-1);
1318 }
1319 return len;
1320 }
1321
1322 void accept(Visitor &v) override;
1323};
1324
1325class CaptureScope : public Ope {
1326public:
1327 CaptureScope(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1328
1329 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1330 std::any &dt) const override {
1332 auto se = scope_exit([&]() { c.pop_capture_scope(); });
1333 return ope_->parse(s, n, vs, c, dt);
1334 }
1335
1336 void accept(Visitor &v) override;
1337
1338 std::shared_ptr<Ope> ope_;
1339};
1340
1341class Capture : public Ope {
1342public:
1343 using MatchAction = std::function<void(const char *s, size_t n, Context &c)>;
1344
1345 Capture(const std::shared_ptr<Ope> &ope, MatchAction ma)
1346 : ope_(ope), match_action_(ma) {}
1347
1348 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1349 std::any &dt) const override {
1350 auto len = ope_->parse(s, n, vs, c, dt);
1351 if (success(len) && match_action_) { match_action_(s, len, c); }
1352 return len;
1353 }
1354
1355 void accept(Visitor &v) override;
1356
1357 std::shared_ptr<Ope> ope_;
1359};
1360
1361class TokenBoundary : public Ope {
1362public:
1363 TokenBoundary(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1364
1365 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1366 std::any &dt) const override;
1367
1368 void accept(Visitor &v) override;
1369
1370 std::shared_ptr<Ope> ope_;
1371};
1372
1373class Ignore : public Ope {
1374public:
1375 Ignore(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1376
1377 size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1378 Context &c, std::any &dt) const override {
1379 auto &chvs = c.push_semantic_values_scope();
1380 auto se = scope_exit([&]() { c.pop_semantic_values_scope(); });
1381 return ope_->parse(s, n, chvs, c, dt);
1382 }
1383
1384 void accept(Visitor &v) override;
1385
1386 std::shared_ptr<Ope> ope_;
1387};
1388
1389using Parser = std::function<size_t(const char *s, size_t n, SemanticValues &vs,
1390 std::any &dt)>;
1391
1392class User : public Ope {
1393public:
1394 User(Parser fn) : fn_(fn) {}
1395 size_t parse_core(const char *s, size_t n, SemanticValues &vs,
1396 Context & /*c*/, std::any &dt) const override {
1397 assert(fn_);
1398 return fn_(s, n, vs, dt);
1399 }
1400 void accept(Visitor &v) override;
1401 std::function<size_t(const char *s, size_t n, SemanticValues &vs,
1402 std::any &dt)>
1404};
1405
1406class WeakHolder : public Ope {
1407public:
1408 WeakHolder(const std::shared_ptr<Ope> &ope) : weak_(ope) {}
1409
1410 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1411 std::any &dt) const override {
1412 auto ope = weak_.lock();
1413 assert(ope);
1414 return ope->parse(s, n, vs, c, dt);
1415 }
1416
1417 void accept(Visitor &v) override;
1418
1419 std::weak_ptr<Ope> weak_;
1420};
1421
1422class Holder : public Ope {
1423public:
1424 Holder(Definition *outer) : outer_(outer) {}
1425
1426 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1427 std::any &dt) const override;
1428
1429 void accept(Visitor &v) override;
1430
1431 std::any reduce(SemanticValues &vs, std::any &dt) const;
1432
1433 const std::string &name() const;
1434 const std::string &trace_name() const;
1435
1436 std::shared_ptr<Ope> ope_;
1438 mutable std::once_flag trace_name_init_;
1439 mutable std::string trace_name_;
1440
1441 friend class Definition;
1442};
1443
1444using Grammar = std::unordered_map<std::string, Definition>;
1445
1446class Reference : public Ope, public std::enable_shared_from_this<Reference> {
1447public:
1448 Reference(const Grammar &grammar, const std::string &name, const char *s,
1449 bool is_macro, const std::vector<std::shared_ptr<Ope>> &args)
1450 : grammar_(grammar), name_(name), s_(s), is_macro_(is_macro), args_(args),
1451 rule_(nullptr), iarg_(0) {}
1452
1453 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1454 std::any &dt) const override;
1455
1456 void accept(Visitor &v) override;
1457
1458 std::shared_ptr<Ope> get_core_operator() const;
1459
1461 const std::string name_;
1462 const char *s_;
1463
1464 const bool is_macro_;
1465 const std::vector<std::shared_ptr<Ope>> args_;
1466
1468 size_t iarg_;
1469};
1470
1471class Whitespace : public Ope {
1472public:
1473 Whitespace(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1474
1475 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1476 std::any &dt) const override {
1477 if (c.in_whitespace) { return 0; }
1478 c.in_whitespace = true;
1479 auto se = scope_exit([&]() { c.in_whitespace = false; });
1480 return ope_->parse(s, n, vs, c, dt);
1481 }
1482
1483 void accept(Visitor &v) override;
1484
1485 std::shared_ptr<Ope> ope_;
1486};
1487
1488class BackReference : public Ope {
1489public:
1490 BackReference(std::string &&name) : name_(name) {}
1491
1492 BackReference(const std::string &name) : name_(name) {}
1493
1494 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1495 std::any &dt) const override;
1496
1497 void accept(Visitor &v) override;
1498
1499 std::string name_;
1500};
1501
1502class PrecedenceClimbing : public Ope {
1503public:
1504 using BinOpeInfo = std::map<std::string_view, std::pair<size_t, char>>;
1505
1506 PrecedenceClimbing(const std::shared_ptr<Ope> &atom,
1507 const std::shared_ptr<Ope> &binop, const BinOpeInfo &info,
1508 const Definition &rule)
1509 : atom_(atom), binop_(binop), info_(info), rule_(rule) {}
1510
1511 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1512 std::any &dt) const override {
1513 return parse_expression(s, n, vs, c, dt, 0);
1514 }
1515
1516 void accept(Visitor &v) override;
1517
1518 std::shared_ptr<Ope> atom_;
1519 std::shared_ptr<Ope> binop_;
1522
1523private:
1524 size_t parse_expression(const char *s, size_t n, SemanticValues &vs,
1525 Context &c, std::any &dt, size_t min_prec) const;
1526
1528};
1529
1530class Recovery : public Ope {
1531public:
1532 Recovery(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1533
1534 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1535 std::any &dt) const override;
1536
1537 void accept(Visitor &v) override;
1538
1539 std::shared_ptr<Ope> ope_;
1540};
1541
1542class Cut : public Ope, public std::enable_shared_from_this<Cut> {
1543public:
1544 size_t parse_core(const char * /*s*/, size_t /*n*/, SemanticValues & /*vs*/,
1545 Context &c, std::any & /*dt*/) const override {
1546 if (!c.cut_stack.empty()) { c.cut_stack.back() = true; }
1547 return 0;
1548 }
1549
1550 void accept(Visitor &v) override;
1551};
1552
1553/*
1554 * Factories
1555 */
1556template <typename... Args> std::shared_ptr<Ope> seq(Args &&...args) {
1557 return std::make_shared<Sequence>(static_cast<std::shared_ptr<Ope>>(args)...);
1558}
1559
1560template <typename... Args> std::shared_ptr<Ope> cho(Args &&...args) {
1561 return std::make_shared<PrioritizedChoice>(
1562 false, static_cast<std::shared_ptr<Ope>>(args)...);
1563}
1564
1565template <typename... Args> std::shared_ptr<Ope> cho4label_(Args &&...args) {
1566 return std::make_shared<PrioritizedChoice>(
1567 true, static_cast<std::shared_ptr<Ope>>(args)...);
1568}
1569
1570inline std::shared_ptr<Ope> zom(const std::shared_ptr<Ope> &ope) {
1571 return Repetition::zom(ope);
1572}
1573
1574inline std::shared_ptr<Ope> oom(const std::shared_ptr<Ope> &ope) {
1575 return Repetition::oom(ope);
1576}
1577
1578inline std::shared_ptr<Ope> opt(const std::shared_ptr<Ope> &ope) {
1579 return Repetition::opt(ope);
1580}
1581
1582inline std::shared_ptr<Ope> rep(const std::shared_ptr<Ope> &ope, size_t min,
1583 size_t max) {
1584 return std::make_shared<Repetition>(ope, min, max);
1585}
1586
1587inline std::shared_ptr<Ope> apd(const std::shared_ptr<Ope> &ope) {
1588 return std::make_shared<AndPredicate>(ope);
1589}
1590
1591inline std::shared_ptr<Ope> npd(const std::shared_ptr<Ope> &ope) {
1592 return std::make_shared<NotPredicate>(ope);
1593}
1594
1595inline std::shared_ptr<Ope> dic(const std::vector<std::string> &v,
1596 bool ignore_case) {
1597 return std::make_shared<Dictionary>(v, ignore_case);
1598}
1599
1600inline std::shared_ptr<Ope> lit(std::string &&s) {
1601 return std::make_shared<LiteralString>(s, false);
1602}
1603
1604inline std::shared_ptr<Ope> liti(std::string &&s) {
1605 return std::make_shared<LiteralString>(s, true);
1606}
1607
1608inline std::shared_ptr<Ope> cls(const std::string &s) {
1609 return std::make_shared<CharacterClass>(s, false, false);
1610}
1611
1612inline std::shared_ptr<Ope>
1613cls(const std::vector<std::pair<char32_t, char32_t>> &ranges,
1614 bool ignore_case = false) {
1615 return std::make_shared<CharacterClass>(ranges, false, ignore_case);
1616}
1617
1618inline std::shared_ptr<Ope> ncls(const std::string &s) {
1619 return std::make_shared<CharacterClass>(s, true, false);
1620}
1621
1622inline std::shared_ptr<Ope>
1623ncls(const std::vector<std::pair<char32_t, char32_t>> &ranges,
1624 bool ignore_case = false) {
1625 return std::make_shared<CharacterClass>(ranges, true, ignore_case);
1626}
1627
1628inline std::shared_ptr<Ope> chr(char dt) {
1629 return std::make_shared<Character>(dt);
1630}
1631
1632inline std::shared_ptr<Ope> dot() { return std::make_shared<AnyCharacter>(); }
1633
1634inline std::shared_ptr<Ope> csc(const std::shared_ptr<Ope> &ope) {
1635 return std::make_shared<CaptureScope>(ope);
1636}
1637
1638inline std::shared_ptr<Ope> cap(const std::shared_ptr<Ope> &ope,
1640 return std::make_shared<Capture>(ope, ma);
1641}
1642
1643inline std::shared_ptr<Ope> tok(const std::shared_ptr<Ope> &ope) {
1644 return std::make_shared<TokenBoundary>(ope);
1645}
1646
1647inline std::shared_ptr<Ope> ign(const std::shared_ptr<Ope> &ope) {
1648 return std::make_shared<Ignore>(ope);
1649}
1650
1651inline std::shared_ptr<Ope>
1652usr(std::function<size_t(const char *s, size_t n, SemanticValues &vs,
1653 std::any &dt)>
1654 fn) {
1655 return std::make_shared<User>(fn);
1656}
1657
1658inline std::shared_ptr<Ope> ref(const Grammar &grammar, const std::string &name,
1659 const char *s, bool is_macro,
1660 const std::vector<std::shared_ptr<Ope>> &args) {
1661 return std::make_shared<Reference>(grammar, name, s, is_macro, args);
1662}
1663
1664inline std::shared_ptr<Ope> wsp(const std::shared_ptr<Ope> &ope) {
1665 return std::make_shared<Whitespace>(std::make_shared<Ignore>(ope));
1666}
1667
1668inline std::shared_ptr<Ope> bkr(std::string &&name) {
1669 return std::make_shared<BackReference>(name);
1670}
1671
1672inline std::shared_ptr<Ope> pre(const std::shared_ptr<Ope> &atom,
1673 const std::shared_ptr<Ope> &binop,
1675 const Definition &rule) {
1676 return std::make_shared<PrecedenceClimbing>(atom, binop, info, rule);
1677}
1678
1679inline std::shared_ptr<Ope> rec(const std::shared_ptr<Ope> &ope) {
1680 return std::make_shared<Recovery>(ope);
1681}
1682
1683inline std::shared_ptr<Ope> cut() { return std::make_shared<Cut>(); }
1684
1685/*
1686 * Visitor
1687 */
1689 virtual ~Visitor() {}
1690 virtual void visit(Sequence &) {}
1691 virtual void visit(PrioritizedChoice &) {}
1692 virtual void visit(Repetition &) {}
1693 virtual void visit(AndPredicate &) {}
1694 virtual void visit(NotPredicate &) {}
1695 virtual void visit(Dictionary &) {}
1696 virtual void visit(LiteralString &) {}
1697 virtual void visit(CharacterClass &) {}
1698 virtual void visit(Character &) {}
1699 virtual void visit(AnyCharacter &) {}
1700 virtual void visit(CaptureScope &) {}
1701 virtual void visit(Capture &) {}
1702 virtual void visit(TokenBoundary &) {}
1703 virtual void visit(Ignore &) {}
1704 virtual void visit(User &) {}
1705 virtual void visit(WeakHolder &) {}
1706 virtual void visit(Holder &) {}
1707 virtual void visit(Reference &) {}
1708 virtual void visit(Whitespace &) {}
1709 virtual void visit(BackReference &) {}
1710 virtual void visit(PrecedenceClimbing &) {}
1711 virtual void visit(Recovery &) {}
1712 virtual void visit(Cut &) {}
1713};
1714
1716 using Ope::Visitor::visit;
1717
1718 void visit(Sequence &) override { name_ = "Sequence"; }
1719 void visit(PrioritizedChoice &) override { name_ = "PrioritizedChoice"; }
1720 void visit(Repetition &) override { name_ = "Repetition"; }
1721 void visit(AndPredicate &) override { name_ = "AndPredicate"; }
1722 void visit(NotPredicate &) override { name_ = "NotPredicate"; }
1723 void visit(Dictionary &) override { name_ = "Dictionary"; }
1724 void visit(LiteralString &) override { name_ = "LiteralString"; }
1725 void visit(CharacterClass &) override { name_ = "CharacterClass"; }
1726 void visit(Character &) override { name_ = "Character"; }
1727 void visit(AnyCharacter &) override { name_ = "AnyCharacter"; }
1728 void visit(CaptureScope &) override { name_ = "CaptureScope"; }
1729 void visit(Capture &) override { name_ = "Capture"; }
1730 void visit(TokenBoundary &) override { name_ = "TokenBoundary"; }
1731 void visit(Ignore &) override { name_ = "Ignore"; }
1732 void visit(User &) override { name_ = "User"; }
1733 void visit(WeakHolder &) override { name_ = "WeakHolder"; }
1734 void visit(Holder &ope) override { name_ = ope.trace_name().data(); }
1735 void visit(Reference &) override { name_ = "Reference"; }
1736 void visit(Whitespace &) override { name_ = "Whitespace"; }
1737 void visit(BackReference &) override { name_ = "BackReference"; }
1738 void visit(PrecedenceClimbing &) override { name_ = "PrecedenceClimbing"; }
1739 void visit(Recovery &) override { name_ = "Recovery"; }
1740 void visit(Cut &) override { name_ = "Cut"; }
1741
1742 static std::string get(Ope &ope) {
1743 TraceOpeName vis;
1744 ope.accept(vis);
1745 return vis.name_;
1746 }
1747
1748private:
1749 const char *name_ = nullptr;
1750};
1751
1753 using Ope::Visitor::visit;
1754
1755 void visit(Sequence &ope) override {
1756 for (auto op : ope.opes_) {
1757 op->accept(*this);
1758 }
1759 }
1760 void visit(PrioritizedChoice &ope) override {
1761 for (auto op : ope.opes_) {
1762 op->accept(*this);
1763 }
1764 }
1765 void visit(Repetition &ope) override { ope.ope_->accept(*this); }
1766 void visit(AndPredicate &ope) override { ope.ope_->accept(*this); }
1767 void visit(NotPredicate &ope) override { ope.ope_->accept(*this); }
1768 void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
1769 void visit(Capture &ope) override { ope.ope_->accept(*this); }
1770 void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
1771 void visit(Ignore &ope) override { ope.ope_->accept(*this); }
1772 void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
1773 void visit(Holder &ope) override;
1774 void visit(Reference &ope) override;
1775 void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
1776 void visit(PrecedenceClimbing &ope) override;
1777 void visit(Recovery &ope) override { ope.ope_->accept(*this); }
1778
1779 std::unordered_map<void *, size_t> ids;
1780};
1781
1783 using Ope::Visitor::visit;
1784
1785 void visit(PrioritizedChoice &ope) override {
1786 for (auto op : ope.opes_) {
1787 if (!IsLiteralToken::check(*op)) { return; }
1788 }
1789 result_ = true;
1790 }
1791
1792 void visit(Dictionary &) override { result_ = true; }
1793 void visit(LiteralString &) override { result_ = true; }
1794
1795 static bool check(Ope &ope) {
1796 IsLiteralToken vis;
1797 ope.accept(vis);
1798 return vis.result_;
1799 }
1800
1801private:
1802 bool result_ = false;
1803};
1804
1806 using Ope::Visitor::visit;
1807
1808 void visit(Sequence &ope) override {
1809 for (auto op : ope.opes_) {
1810 op->accept(*this);
1811 }
1812 }
1813 void visit(PrioritizedChoice &ope) override {
1814 for (auto op : ope.opes_) {
1815 op->accept(*this);
1816 }
1817 }
1818 void visit(Repetition &ope) override { ope.ope_->accept(*this); }
1819 void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
1820 void visit(Capture &ope) override { ope.ope_->accept(*this); }
1821 void visit(TokenBoundary &) override { has_token_boundary_ = true; }
1822 void visit(Ignore &ope) override { ope.ope_->accept(*this); }
1823 void visit(WeakHolder &) override { has_rule_ = true; }
1824 void visit(Holder &ope) override { ope.ope_->accept(*this); }
1825 void visit(Reference &ope) override;
1826 void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
1827 void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); }
1828 void visit(Recovery &ope) override { ope.ope_->accept(*this); }
1829
1830 static bool is_token(Ope &ope) {
1831 if (IsLiteralToken::check(ope)) { return true; }
1832
1833 TokenChecker vis;
1834 ope.accept(vis);
1835 return vis.has_token_boundary_ || !vis.has_rule_;
1836 }
1837
1838private:
1840 bool has_rule_ = false;
1841};
1842
1844 using Ope::Visitor::visit;
1845
1846 void visit(LiteralString &ope) override { token_ = ope.lit_.data(); }
1847 void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
1848 void visit(Ignore &ope) override { ope.ope_->accept(*this); }
1849 void visit(Reference &ope) override;
1850 void visit(Recovery &ope) override { ope.ope_->accept(*this); }
1851
1852 static const char *token(Ope &ope) {
1853 FindLiteralToken vis;
1854 ope.accept(vis);
1855 return vis.token_;
1856 }
1857
1858private:
1859 const char *token_ = nullptr;
1860};
1861
1863 using Ope::Visitor::visit;
1864
1865 DetectLeftRecursion(const std::string &name) : name_(name) {}
1866
1867 void visit(Sequence &ope) override {
1868 for (auto op : ope.opes_) {
1869 op->accept(*this);
1870 if (done_) {
1871 break;
1872 } else if (error_s) {
1873 done_ = true;
1874 break;
1875 }
1876 }
1877 }
1878 void visit(PrioritizedChoice &ope) override {
1879 for (auto op : ope.opes_) {
1880 op->accept(*this);
1881 if (error_s) {
1882 done_ = true;
1883 break;
1884 }
1885 }
1886 }
1887 void visit(Repetition &ope) override {
1888 ope.ope_->accept(*this);
1889 done_ = ope.min_ > 0;
1890 }
1891 void visit(AndPredicate &ope) override {
1892 ope.ope_->accept(*this);
1893 done_ = false;
1894 }
1895 void visit(NotPredicate &ope) override {
1896 ope.ope_->accept(*this);
1897 done_ = false;
1898 }
1899 void visit(Dictionary &) override { done_ = true; }
1900 void visit(LiteralString &ope) override { done_ = !ope.lit_.empty(); }
1901 void visit(CharacterClass &) override { done_ = true; }
1902 void visit(Character &) override { done_ = true; }
1903 void visit(AnyCharacter &) override { done_ = true; }
1904 void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
1905 void visit(Capture &ope) override { ope.ope_->accept(*this); }
1906 void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
1907 void visit(Ignore &ope) override { ope.ope_->accept(*this); }
1908 void visit(User &) override { done_ = true; }
1909 void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
1910 void visit(Holder &ope) override { ope.ope_->accept(*this); }
1911 void visit(Reference &ope) override;
1912 void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
1913 void visit(BackReference &) override { done_ = true; }
1914 void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); }
1915 void visit(Recovery &ope) override { ope.ope_->accept(*this); }
1916 void visit(Cut &) override { done_ = true; }
1917
1918 const char *error_s = nullptr;
1919
1920private:
1921 std::string name_;
1922 std::unordered_set<std::string> refs_;
1923 bool done_ = false;
1924};
1925
1927 using Ope::Visitor::visit;
1928
1929 HasEmptyElement(std::vector<std::pair<const char *, std::string>> &refs,
1930 std::unordered_map<std::string, bool> &has_error_cache)
1931 : refs_(refs), has_error_cache_(has_error_cache) {}
1932
1933 void visit(Sequence &ope) override;
1934 void visit(PrioritizedChoice &ope) override {
1935 for (auto op : ope.opes_) {
1936 op->accept(*this);
1937 if (is_empty) { return; }
1938 }
1939 }
1940 void visit(Repetition &ope) override {
1941 if (ope.min_ == 0) {
1942 set_error();
1943 } else {
1944 ope.ope_->accept(*this);
1945 }
1946 }
1947 void visit(AndPredicate &) override { set_error(); }
1948 void visit(NotPredicate &) override { set_error(); }
1949 void visit(LiteralString &ope) override {
1950 if (ope.lit_.empty()) { set_error(); }
1951 }
1952 void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
1953 void visit(Capture &ope) override { ope.ope_->accept(*this); }
1954 void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
1955 void visit(Ignore &ope) override { ope.ope_->accept(*this); }
1956 void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
1957 void visit(Holder &ope) override { ope.ope_->accept(*this); }
1958 void visit(Reference &ope) override;
1959 void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
1960 void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); }
1961 void visit(Recovery &ope) override { ope.ope_->accept(*this); }
1962
1963 bool is_empty = false;
1964 const char *error_s = nullptr;
1965 std::string error_name;
1966
1967private:
1968 void set_error() {
1969 is_empty = true;
1970 tie(error_s, error_name) = refs_.back();
1971 }
1972 std::vector<std::pair<const char *, std::string>> &refs_;
1973 std::unordered_map<std::string, bool> &has_error_cache_;
1974};
1975
1977 using Ope::Visitor::visit;
1978
1979 DetectInfiniteLoop(const char *s, const std::string &name,
1980 std::vector<std::pair<const char *, std::string>> &refs,
1981 std::unordered_map<std::string, bool> &has_error_cache)
1982 : refs_(refs), has_error_cache_(has_error_cache) {
1983 refs_.emplace_back(s, name);
1984 }
1985
1986 DetectInfiniteLoop(std::vector<std::pair<const char *, std::string>> &refs,
1987 std::unordered_map<std::string, bool> &has_error_cache)
1988 : refs_(refs), has_error_cache_(has_error_cache) {}
1989
1990 void visit(Sequence &ope) override {
1991 for (auto op : ope.opes_) {
1992 op->accept(*this);
1993 if (has_error) { return; }
1994 }
1995 }
1996 void visit(PrioritizedChoice &ope) override {
1997 for (auto op : ope.opes_) {
1998 op->accept(*this);
1999 if (has_error) { return; }
2000 }
2001 }
2002 void visit(Repetition &ope) override {
2003 if (ope.max_ == std::numeric_limits<size_t>::max()) {
2005 ope.ope_->accept(vis);
2006 if (vis.is_empty) {
2007 has_error = true;
2008 error_s = vis.error_s;
2009 error_name = vis.error_name;
2010 }
2011 } else {
2012 ope.ope_->accept(*this);
2013 }
2014 }
2015 void visit(AndPredicate &ope) override { ope.ope_->accept(*this); }
2016 void visit(NotPredicate &ope) override { ope.ope_->accept(*this); }
2017 void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
2018 void visit(Capture &ope) override { ope.ope_->accept(*this); }
2019 void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
2020 void visit(Ignore &ope) override { ope.ope_->accept(*this); }
2021 void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
2022 void visit(Holder &ope) override { ope.ope_->accept(*this); }
2023 void visit(Reference &ope) override;
2024 void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
2025 void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); }
2026 void visit(Recovery &ope) override { ope.ope_->accept(*this); }
2027
2028 bool has_error = false;
2029 const char *error_s = nullptr;
2030 std::string error_name;
2031
2032private:
2033 std::vector<std::pair<const char *, std::string>> &refs_;
2034 std::unordered_map<std::string, bool> &has_error_cache_;
2035};
2036
2038 using Ope::Visitor::visit;
2039
2041 const std::vector<std::string> &params)
2042 : grammar_(grammar), params_(params) {}
2043
2044 void visit(Sequence &ope) override {
2045 for (auto op : ope.opes_) {
2046 op->accept(*this);
2047 }
2048 }
2049 void visit(PrioritizedChoice &ope) override {
2050 for (auto op : ope.opes_) {
2051 op->accept(*this);
2052 }
2053 }
2054 void visit(Repetition &ope) override { ope.ope_->accept(*this); }
2055 void visit(AndPredicate &ope) override { ope.ope_->accept(*this); }
2056 void visit(NotPredicate &ope) override { ope.ope_->accept(*this); }
2057 void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
2058 void visit(Capture &ope) override { ope.ope_->accept(*this); }
2059 void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
2060 void visit(Ignore &ope) override { ope.ope_->accept(*this); }
2061 void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
2062 void visit(Holder &ope) override { ope.ope_->accept(*this); }
2063 void visit(Reference &ope) override;
2064 void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
2065 void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); }
2066 void visit(Recovery &ope) override { ope.ope_->accept(*this); }
2067
2068 std::unordered_map<std::string, const char *> error_s;
2069 std::unordered_map<std::string, std::string> error_message;
2070 std::unordered_set<std::string> referenced;
2071
2072private:
2074 const std::vector<std::string> &params_;
2075};
2076
2078 using Ope::Visitor::visit;
2079
2080 LinkReferences(Grammar &grammar, const std::vector<std::string> &params)
2081 : grammar_(grammar), params_(params) {}
2082
2083 void visit(Sequence &ope) override {
2084 for (auto op : ope.opes_) {
2085 op->accept(*this);
2086 }
2087 }
2088 void visit(PrioritizedChoice &ope) override {
2089 for (auto op : ope.opes_) {
2090 op->accept(*this);
2091 }
2092 }
2093 void visit(Repetition &ope) override { ope.ope_->accept(*this); }
2094 void visit(AndPredicate &ope) override { ope.ope_->accept(*this); }
2095 void visit(NotPredicate &ope) override { ope.ope_->accept(*this); }
2096 void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
2097 void visit(Capture &ope) override { ope.ope_->accept(*this); }
2098 void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
2099 void visit(Ignore &ope) override { ope.ope_->accept(*this); }
2100 void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
2101 void visit(Holder &ope) override { ope.ope_->accept(*this); }
2102 void visit(Reference &ope) override;
2103 void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
2104 void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); }
2105 void visit(Recovery &ope) override { ope.ope_->accept(*this); }
2106
2107private:
2109 const std::vector<std::string> &params_;
2110};
2111
2113 using Ope::Visitor::visit;
2114
2115 FindReference(const std::vector<std::shared_ptr<Ope>> &args,
2116 const std::vector<std::string> &params)
2117 : args_(args), params_(params) {}
2118
2119 void visit(Sequence &ope) override {
2120 std::vector<std::shared_ptr<Ope>> opes;
2121 for (auto o : ope.opes_) {
2122 o->accept(*this);
2123 opes.push_back(found_ope);
2124 }
2125 found_ope = std::make_shared<Sequence>(opes);
2126 }
2127 void visit(PrioritizedChoice &ope) override {
2128 std::vector<std::shared_ptr<Ope>> opes;
2129 for (auto o : ope.opes_) {
2130 o->accept(*this);
2131 opes.push_back(found_ope);
2132 }
2133 found_ope = std::make_shared<PrioritizedChoice>(opes);
2134 }
2135 void visit(Repetition &ope) override {
2136 ope.ope_->accept(*this);
2137 found_ope = rep(found_ope, ope.min_, ope.max_);
2138 }
2139 void visit(AndPredicate &ope) override {
2140 ope.ope_->accept(*this);
2142 }
2143 void visit(NotPredicate &ope) override {
2144 ope.ope_->accept(*this);
2146 }
2147 void visit(Dictionary &ope) override { found_ope = ope.shared_from_this(); }
2148 void visit(LiteralString &ope) override {
2149 found_ope = ope.shared_from_this();
2150 }
2151 void visit(CharacterClass &ope) override {
2152 found_ope = ope.shared_from_this();
2153 }
2154 void visit(Character &ope) override { found_ope = ope.shared_from_this(); }
2155 void visit(AnyCharacter &ope) override { found_ope = ope.shared_from_this(); }
2156 void visit(CaptureScope &ope) override {
2157 ope.ope_->accept(*this);
2159 }
2160 void visit(Capture &ope) override {
2161 ope.ope_->accept(*this);
2163 }
2164 void visit(TokenBoundary &ope) override {
2165 ope.ope_->accept(*this);
2167 }
2168 void visit(Ignore &ope) override {
2169 ope.ope_->accept(*this);
2171 }
2172 void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
2173 void visit(Holder &ope) override { ope.ope_->accept(*this); }
2174 void visit(Reference &ope) override;
2175 void visit(Whitespace &ope) override {
2176 ope.ope_->accept(*this);
2178 }
2179 void visit(PrecedenceClimbing &ope) override {
2180 ope.atom_->accept(*this);
2182 }
2183 void visit(Recovery &ope) override {
2184 ope.ope_->accept(*this);
2186 }
2187 void visit(Cut &ope) override { found_ope = ope.shared_from_this(); }
2188
2189 std::shared_ptr<Ope> found_ope;
2190
2191private:
2192 const std::vector<std::shared_ptr<Ope>> &args_;
2193 const std::vector<std::string> &params_;
2194};
2195
2196/*
2197 * Keywords
2198 */
2199static const char *WHITESPACE_DEFINITION_NAME = "%whitespace";
2200static const char *WORD_DEFINITION_NAME = "%word";
2201static const char *RECOVER_DEFINITION_NAME = "%recover";
2202
2203/*
2204 * Definition
2205 */
2207public:
2208 struct Result {
2209 bool ret;
2211 size_t len;
2213 };
2214
2215 Definition() : holder_(std::make_shared<Holder>(this)) {}
2216
2217 Definition(const Definition &rhs) : name(rhs.name), holder_(rhs.holder_) {
2218 holder_->outer_ = this;
2219 }
2220
2221 Definition(const std::shared_ptr<Ope> &ope)
2222 : holder_(std::make_shared<Holder>(this)) {
2223 *this <= ope;
2224 }
2225
2226 operator std::shared_ptr<Ope>() {
2227 return std::make_shared<WeakHolder>(holder_);
2228 }
2229
2230 Definition &operator<=(const std::shared_ptr<Ope> &ope) {
2231 holder_->ope_ = ope;
2232 return *this;
2233 }
2234
2235 Result parse(const char *s, size_t n, const char *path = nullptr,
2236 Log log = nullptr) const {
2237 SemanticValues vs;
2238 std::any dt;
2239 return parse_core(s, n, vs, dt, path, log);
2240 }
2241
2242 Result parse(const char *s, const char *path = nullptr,
2243 Log log = nullptr) const {
2244 auto n = strlen(s);
2245 return parse(s, n, path, log);
2246 }
2247
2248 Result parse(const char *s, size_t n, std::any &dt,
2249 const char *path = nullptr, Log log = nullptr) const {
2250 SemanticValues vs;
2251 return parse_core(s, n, vs, dt, path, log);
2252 }
2253
2254 Result parse(const char *s, std::any &dt, const char *path = nullptr,
2255 Log log = nullptr) const {
2256 auto n = strlen(s);
2257 return parse(s, n, dt, path, log);
2258 }
2259
2260 template <typename T>
2261 Result parse_and_get_value(const char *s, size_t n, T &val,
2262 const char *path = nullptr,
2263 Log log = nullptr) const {
2264 SemanticValues vs;
2265 std::any dt;
2266 auto r = parse_core(s, n, vs, dt, path, log);
2267 if (r.ret && !vs.empty() && vs.front().has_value()) {
2268 val = std::any_cast<T>(vs[0]);
2269 }
2270 return r;
2271 }
2272
2273 template <typename T>
2274 Result parse_and_get_value(const char *s, T &val, const char *path = nullptr,
2275 Log log = nullptr) const {
2276 auto n = strlen(s);
2277 return parse_and_get_value(s, n, val, path, log);
2278 }
2279
2280 template <typename T>
2281 Result parse_and_get_value(const char *s, size_t n, std::any &dt, T &val,
2282 const char *path = nullptr,
2283 Log log = nullptr) const {
2284 SemanticValues vs;
2285 auto r = parse_core(s, n, vs, dt, path, log);
2286 if (r.ret && !vs.empty() && vs.front().has_value()) {
2287 val = std::any_cast<T>(vs[0]);
2288 }
2289 return r;
2290 }
2291
2292 template <typename T>
2293 Result parse_and_get_value(const char *s, std::any &dt, T &val,
2294 const char *path = nullptr,
2295 Log log = nullptr) const {
2296 auto n = strlen(s);
2297 return parse_and_get_value(s, n, dt, val, path, log);
2298 }
2299
2300#if defined(__cpp_lib_char8_t)
2301 Result parse(const char8_t *s, size_t n, const char *path = nullptr,
2302 Log log = nullptr) const {
2303 return parse(reinterpret_cast<const char *>(s), n, path, log);
2304 }
2305
2306 Result parse(const char8_t *s, const char *path = nullptr,
2307 Log log = nullptr) const {
2308 return parse(reinterpret_cast<const char *>(s), path, log);
2309 }
2310
2311 Result parse(const char8_t *s, size_t n, std::any &dt,
2312 const char *path = nullptr, Log log = nullptr) const {
2313 return parse(reinterpret_cast<const char *>(s), n, dt, path, log);
2314 }
2315
2316 Result parse(const char8_t *s, std::any &dt, const char *path = nullptr,
2317 Log log = nullptr) const {
2318 return parse(reinterpret_cast<const char *>(s), dt, path, log);
2319 }
2320
2321 template <typename T>
2322 Result parse_and_get_value(const char8_t *s, size_t n, T &val,
2323 const char *path = nullptr,
2324 Log log = nullptr) const {
2325 return parse_and_get_value(reinterpret_cast<const char *>(s), n, val, *path,
2326 log);
2327 }
2328
2329 template <typename T>
2330 Result parse_and_get_value(const char8_t *s, T &val,
2331 const char *path = nullptr,
2332 Log log = nullptr) const {
2333 return parse_and_get_value(reinterpret_cast<const char *>(s), val, *path,
2334 log);
2335 }
2336
2337 template <typename T>
2338 Result parse_and_get_value(const char8_t *s, size_t n, std::any &dt, T &val,
2339 const char *path = nullptr,
2340 Log log = nullptr) const {
2341 return parse_and_get_value(reinterpret_cast<const char *>(s), n, dt, val,
2342 *path, log);
2343 }
2344
2345 template <typename T>
2346 Result parse_and_get_value(const char8_t *s, std::any &dt, T &val,
2347 const char *path = nullptr,
2348 Log log = nullptr) const {
2349 return parse_and_get_value(reinterpret_cast<const char *>(s), dt, val,
2350 *path, log);
2351 }
2352#endif
2353
2354 void operator=(Action a) { action = a; }
2355
2356 template <typename T> Definition &operator,(T fn) {
2357 operator=(fn);
2358 return *this;
2359 }
2360
2362 ignoreSemanticValue = true;
2363 return *this;
2364 }
2365
2366 void accept(Ope::Visitor &v) { holder_->accept(v); }
2367
2368 std::shared_ptr<Ope> get_core_operator() const { return holder_->ope_; }
2369
2370 bool is_token() const {
2371 std::call_once(is_token_init_, [this]() {
2373 });
2374 return is_token_;
2375 }
2376
2377 std::string name;
2378 const char *s_ = nullptr;
2379 std::pair<size_t, size_t> line_ = {1, 1};
2380
2381 std::function<bool(const SemanticValues &vs, const std::any &dt,
2382 std::string &msg)>
2384
2385 size_t id = 0;
2387 std::function<void(const Context &c, const char *s, size_t n, std::any &dt)>
2389 std::function<void(const Context &c, const char *s, size_t n, size_t matchlen,
2390 std::any &value, std::any &dt)>
2393 std::shared_ptr<Ope> whitespaceOpe;
2394 std::shared_ptr<Ope> wordOpe;
2396 bool is_macro = false;
2397 std::vector<std::string> params;
2398 bool disable_action = false;
2399
2402 bool verbose_trace = false;
2405
2406 std::string error_message;
2407 bool no_ast_opt = false;
2408
2409 bool eoi_check = true;
2410
2411private:
2412 friend class Reference;
2413 friend class ParserGenerator;
2414
2417
2419 std::call_once(definition_ids_init_, [&]() {
2421 holder_->accept(vis);
2422 if (whitespaceOpe) { whitespaceOpe->accept(vis); }
2423 if (wordOpe) { wordOpe->accept(vis); }
2424 definition_ids_.swap(vis.ids);
2425 });
2426 }
2427
2428 Result parse_core(const char *s, size_t n, SemanticValues &vs, std::any &dt,
2429 const char *path, Log log) const {
2431
2432 std::shared_ptr<Ope> ope = holder_;
2433
2434 std::any trace_data;
2435 if (tracer_start) { tracer_start(trace_data); }
2436 auto se1 = scope_exit([&]() {
2437 if (tracer_end) { tracer_end(trace_data); }
2438 });
2439
2440 Context c(path, s, n, definition_ids_.size(), whitespaceOpe, wordOpe,
2442 verbose_trace, log);
2443
2444 size_t i = 0;
2445
2446 if (whitespaceOpe) {
2447 auto save_ignore_trace_state = c.ignore_trace_state;
2448 c.ignore_trace_state = !c.verbose_trace;
2449 auto se2 =
2450 scope_exit([&]() { c.ignore_trace_state = save_ignore_trace_state; });
2451
2452 auto len = whitespaceOpe->parse(s, n, vs, c, dt);
2453 if (fail(len)) { return Result{false, c.recovered, i, c.error_info}; }
2454
2455 i = len;
2456 }
2457
2458 auto len = ope->parse(s + i, n - i, vs, c, dt);
2459 auto ret = success(len);
2460 if (ret) {
2461 i += len;
2462 if (eoi_check) {
2463 if (i < n) {
2464 if (c.error_info.error_pos - c.s < s + i - c.s) {
2465 c.error_info.message_pos = s + i;
2466 c.error_info.message = "expected end of input";
2467 }
2468 ret = false;
2469 }
2470 }
2471 }
2472 return Result{ret, c.recovered, i, c.error_info};
2473 }
2474
2475 std::shared_ptr<Holder> holder_;
2476 mutable std::once_flag is_token_init_;
2477 mutable bool is_token_ = false;
2478 mutable std::once_flag assign_id_to_definition_init_;
2479 mutable std::once_flag definition_ids_init_;
2480 mutable std::unordered_map<void *, size_t> definition_ids_;
2481};
2482
2483/*
2484 * Implementations
2485 */
2486
2487inline size_t parse_literal(const char *s, size_t n, SemanticValues &vs,
2488 Context &c, std::any &dt, const std::string &lit,
2489 std::once_flag &init_is_word, bool &is_word,
2490 bool ignore_case) {
2491 size_t i = 0;
2492 for (; i < lit.size(); i++) {
2493 if (i >= n || (ignore_case ? (std::tolower(s[i]) != std::tolower(lit[i]))
2494 : (s[i] != lit[i]))) {
2495 c.set_error_pos(s, lit.data());
2496 return static_cast<size_t>(-1);
2497 }
2498 }
2499
2500 // Word check
2501 if (c.wordOpe) {
2502 auto save_ignore_trace_state = c.ignore_trace_state;
2504 auto se =
2505 scope_exit([&]() { c.ignore_trace_state = save_ignore_trace_state; });
2506
2507 std::call_once(init_is_word, [&]() {
2508 SemanticValues dummy_vs;
2509 Context dummy_c(nullptr, c.s, c.l, 0, nullptr, nullptr, false, nullptr,
2510 nullptr, nullptr, false, nullptr);
2511 std::any dummy_dt;
2512
2513 auto len =
2514 c.wordOpe->parse(lit.data(), lit.size(), dummy_vs, dummy_c, dummy_dt);
2515 is_word = success(len);
2516 });
2517
2518 if (is_word) {
2519 SemanticValues dummy_vs;
2520 Context dummy_c(nullptr, c.s, c.l, 0, nullptr, nullptr, false, nullptr,
2521 nullptr, nullptr, false, nullptr);
2522 std::any dummy_dt;
2523
2524 NotPredicate ope(c.wordOpe);
2525 auto len = ope.parse(s + i, n - i, dummy_vs, dummy_c, dummy_dt);
2526 if (fail(len)) {
2527 c.set_error_pos(s, lit.data());
2528 return len;
2529 }
2530 i += len;
2531 }
2532 }
2533
2534 // Skip whitespace
2536 auto save_ignore_trace_state = c.ignore_trace_state;
2538 auto se =
2539 scope_exit([&]() { c.ignore_trace_state = save_ignore_trace_state; });
2540
2541 auto len = c.whitespaceOpe->parse(s + i, n - i, vs, c, dt);
2542 if (fail(len)) { return len; }
2543 i += len;
2544 }
2545
2546 return i;
2547}
2548
2549inline std::pair<size_t, size_t> SemanticValues::line_info() const {
2550 assert(c_);
2551 return c_->line_info(sv_.data());
2552}
2553
2554inline void ErrorInfo::output_log(const Log &log, const char *s, size_t n) {
2555 if (message_pos) {
2558 auto line = line_info(s, message_pos);
2559 std::string msg;
2560 if (auto unexpected_token = heuristic_error_token(s, n, message_pos);
2561 !unexpected_token.empty()) {
2562 msg = replace_all(message, "%t", unexpected_token);
2563
2564 auto unexpected_char = unexpected_token.substr(
2565 0,
2566 codepoint_length(unexpected_token.data(), unexpected_token.size()));
2567
2568 msg = replace_all(msg, "%c", unexpected_char);
2569 } else {
2570 msg = message;
2571 }
2572 log(line.first, line.second, msg, label);
2573 }
2574 } else if (error_pos) {
2575 if (error_pos > last_output_pos) {
2577 auto line = line_info(s, error_pos);
2578
2579 std::string msg;
2580 if (expected_tokens.empty()) {
2581 msg = "syntax error.";
2582 } else {
2583 msg = "syntax error";
2584
2585 // unexpected token
2586 if (auto unexpected_token = heuristic_error_token(s, n, error_pos);
2587 !unexpected_token.empty()) {
2588 msg += ", unexpected '";
2589 msg += unexpected_token;
2590 msg += "'";
2591 }
2592
2593 auto first_item = true;
2594 size_t i = 0;
2595 while (i < expected_tokens.size()) {
2596 auto [error_literal, error_rule] = expected_tokens[i];
2597
2598 // Skip rules start with '_'
2599 if (!(error_rule && error_rule->name[0] == '_')) {
2600 msg += (first_item ? ", expecting " : ", ");
2601 if (error_literal) {
2602 msg += "'";
2603 msg += error_literal;
2604 msg += "'";
2605 } else {
2606 msg += "<" + error_rule->name + ">";
2607 if (label.empty()) { label = error_rule->name; }
2608 }
2609 first_item = false;
2610 }
2611
2612 i++;
2613 }
2614 msg += ".";
2615 }
2616 log(line.first, line.second, msg, label);
2617 }
2618 }
2619}
2620
2621inline void Context::set_error_pos(const char *a_s, const char *literal) {
2622 if (log) {
2623 if (error_info.error_pos <= a_s) {
2624 if (error_info.error_pos < a_s || !error_info.keep_previous_token) {
2625 error_info.error_pos = a_s;
2626 error_info.expected_tokens.clear();
2627 }
2628
2629 const char *error_literal = nullptr;
2630 const Definition *error_rule = nullptr;
2631
2632 if (literal) {
2633 error_literal = literal;
2634 } else if (!rule_stack.empty()) {
2635 auto rule = rule_stack.back();
2636 auto ope = rule->get_core_operator();
2637 if (auto token = FindLiteralToken::token(*ope);
2638 token && token[0] != '\0') {
2639 error_literal = token;
2640 }
2641 }
2642
2643 for (auto r : rule_stack) {
2644 error_rule = r;
2645 if (r->is_token()) { break; }
2646 }
2647
2648 if (error_literal || error_rule) {
2649 error_info.add(error_literal, error_rule);
2650 }
2651 }
2652 }
2653}
2654
2655inline void Context::trace_enter(const Ope &ope, const char *a_s, size_t n,
2656 const SemanticValues &vs, std::any &dt) {
2657 trace_ids.push_back(next_trace_id++);
2658 tracer_enter(ope, a_s, n, vs, *this, dt, trace_data);
2659}
2660
2661inline void Context::trace_leave(const Ope &ope, const char *a_s, size_t n,
2662 const SemanticValues &vs, std::any &dt,
2663 size_t len) {
2664 tracer_leave(ope, a_s, n, vs, *this, dt, len, trace_data);
2665 trace_ids.pop_back();
2666}
2667
2668inline bool Context::is_traceable(const Ope &ope) const {
2669 if (tracer_enter && tracer_leave) {
2670 if (ignore_trace_state) { return false; }
2671 return !dynamic_cast<const peg::Reference *>(&ope);
2672 }
2673 return false;
2674}
2675
2676inline size_t Ope::parse(const char *s, size_t n, SemanticValues &vs,
2677 Context &c, std::any &dt) const {
2678 if (c.is_traceable(*this)) {
2679 c.trace_enter(*this, s, n, vs, dt);
2680 auto len = parse_core(s, n, vs, c, dt);
2681 c.trace_leave(*this, s, n, vs, dt, len);
2682 return len;
2683 }
2684 return parse_core(s, n, vs, c, dt);
2685}
2686
2687inline size_t Dictionary::parse_core(const char *s, size_t n,
2688 SemanticValues &vs, Context &c,
2689 std::any &dt) const {
2690 size_t id;
2691 auto i = trie_.match(s, n, id);
2692
2693 if (i == 0) {
2694 c.set_error_pos(s);
2695 return static_cast<size_t>(-1);
2696 }
2697
2698 vs.choice_count_ = trie_.size();
2699 vs.choice_ = id;
2700
2701 // Word check
2702 if (c.wordOpe) {
2703 auto save_ignore_trace_state = c.ignore_trace_state;
2705 auto se =
2706 scope_exit([&]() { c.ignore_trace_state = save_ignore_trace_state; });
2707
2708 {
2709 SemanticValues dummy_vs;
2710 Context dummy_c(nullptr, c.s, c.l, 0, nullptr, nullptr, false, nullptr,
2711 nullptr, nullptr, false, nullptr);
2712 std::any dummy_dt;
2713
2714 NotPredicate ope(c.wordOpe);
2715 auto len = ope.parse(s + i, n - i, dummy_vs, dummy_c, dummy_dt);
2716 if (fail(len)) {
2717 c.set_error_pos(s);
2718 return len;
2719 }
2720 i += len;
2721 }
2722 }
2723
2724 // Skip whitespace
2726 auto save_ignore_trace_state = c.ignore_trace_state;
2728 auto se =
2729 scope_exit([&]() { c.ignore_trace_state = save_ignore_trace_state; });
2730
2731 auto len = c.whitespaceOpe->parse(s + i, n - i, vs, c, dt);
2732 if (fail(len)) { return len; }
2733 i += len;
2734 }
2735
2736 return i;
2737}
2738
2739inline size_t LiteralString::parse_core(const char *s, size_t n,
2740 SemanticValues &vs, Context &c,
2741 std::any &dt) const {
2742 return parse_literal(s, n, vs, c, dt, lit_, init_is_word_, is_word_,
2743 ignore_case_);
2744}
2745
2746inline size_t TokenBoundary::parse_core(const char *s, size_t n,
2747 SemanticValues &vs, Context &c,
2748 std::any &dt) const {
2749 auto save_ignore_trace_state = c.ignore_trace_state;
2751 auto se1 =
2752 scope_exit([&]() { c.ignore_trace_state = save_ignore_trace_state; });
2753
2754 size_t len;
2755 {
2757 auto se2 = scope_exit([&]() { c.in_token_boundary_count--; });
2758 len = ope_->parse(s, n, vs, c, dt);
2759 }
2760
2761 if (success(len)) {
2762 vs.tokens.emplace_back(std::string_view(s, len));
2763
2764 if (!c.in_token_boundary_count) {
2765 if (c.whitespaceOpe) {
2766 auto l = c.whitespaceOpe->parse(s + len, n - len, vs, c, dt);
2767 if (fail(l)) { return l; }
2768 len += l;
2769 }
2770 }
2771 }
2772 return len;
2773}
2774
2775inline size_t Holder::parse_core(const char *s, size_t n, SemanticValues &vs,
2776 Context &c, std::any &dt) const {
2777 if (!ope_) {
2778 throw std::logic_error("Uninitialized definition ope was used...");
2779 }
2780
2781 // Macro reference
2782 if (outer_->is_macro) {
2783 c.rule_stack.push_back(outer_);
2784 auto len = ope_->parse(s, n, vs, c, dt);
2785 c.rule_stack.pop_back();
2786 return len;
2787 }
2788
2789 size_t len;
2790 std::any val;
2791
2792 c.packrat(s, outer_->id, len, val, [&](std::any &a_val) {
2793 if (outer_->enter) { outer_->enter(c, s, n, dt); }
2794 auto &chvs = c.push_semantic_values_scope();
2795 auto se = scope_exit([&]() {
2797 if (outer_->leave) { outer_->leave(c, s, n, len, a_val, dt); }
2798 });
2799
2800 c.rule_stack.push_back(outer_);
2801 len = ope_->parse(s, n, chvs, c, dt);
2802 c.rule_stack.pop_back();
2803
2804 // Invoke action
2805 if (success(len)) {
2806 chvs.sv_ = std::string_view(s, len);
2807 chvs.name_ = outer_->name;
2808
2809 auto ope_ptr = ope_.get();
2810 {
2811 auto tok_ptr = dynamic_cast<const peg::TokenBoundary *>(ope_ptr);
2812 if (tok_ptr) { ope_ptr = tok_ptr->ope_.get(); }
2813 }
2814 if (!dynamic_cast<const peg::PrioritizedChoice *>(ope_ptr) &&
2815 !dynamic_cast<const peg::Dictionary *>(ope_ptr)) {
2816 chvs.choice_count_ = 0;
2817 chvs.choice_ = 0;
2818 }
2819
2820 std::string msg;
2821 if (outer_->predicate && !outer_->predicate(chvs, dt, msg)) {
2822 if (c.log && !msg.empty() && c.error_info.message_pos < s) {
2823 c.error_info.message_pos = s;
2824 c.error_info.message = msg;
2825 c.error_info.label = outer_->name;
2826 }
2827 len = static_cast<size_t>(-1);
2828 }
2829
2830 if (success(len)) {
2831 if (!c.recovered) { a_val = reduce(chvs, dt); }
2832 } else {
2833 if (c.log && !msg.empty() && c.error_info.message_pos < s) {
2834 c.error_info.message_pos = s;
2835 c.error_info.message = msg;
2836 c.error_info.label = outer_->name;
2837 }
2838 }
2839 } else {
2840 if (c.log && !outer_->error_message.empty() &&
2841 c.error_info.message_pos < s) {
2842 c.error_info.message_pos = s;
2843 c.error_info.message = outer_->error_message;
2844 c.error_info.label = outer_->name;
2845 }
2846 }
2847 });
2848
2849 if (success(len)) {
2850 if (!outer_->ignoreSemanticValue) {
2851 vs.emplace_back(std::move(val));
2852 vs.tags.emplace_back(str2tag(outer_->name));
2853 }
2854 }
2855
2856 return len;
2857}
2858
2859inline std::any Holder::reduce(SemanticValues &vs, std::any &dt) const {
2860 if (outer_->action && !outer_->disable_action) {
2861 return outer_->action(vs, dt);
2862 } else if (vs.empty()) {
2863 return std::any();
2864 } else {
2865 return std::move(vs.front());
2866 }
2867}
2868
2869inline const std::string &Holder::name() const { return outer_->name; }
2870
2871inline const std::string &Holder::trace_name() const {
2872 std::call_once(trace_name_init_,
2873 [this]() { trace_name_ = "[" + outer_->name + "]"; });
2874 return trace_name_;
2875}
2876
2877inline size_t Reference::parse_core(const char *s, size_t n, SemanticValues &vs,
2878 Context &c, std::any &dt) const {
2879 auto save_ignore_trace_state = c.ignore_trace_state;
2880 if (rule_ && rule_->ignoreSemanticValue) {
2882 }
2883 auto se1 =
2884 scope_exit([&]() { c.ignore_trace_state = save_ignore_trace_state; });
2885
2886 if (rule_) {
2887 // Reference rule
2888 if (rule_->is_macro) {
2889 // Macro
2890 FindReference vis(c.top_args(), c.rule_stack.back()->params);
2891
2892 // Collect arguments
2893 std::vector<std::shared_ptr<Ope>> args;
2894 for (auto arg : args_) {
2895 arg->accept(vis);
2896 args.emplace_back(std::move(vis.found_ope));
2897 }
2898
2899 c.push_args(std::move(args));
2900 auto se2 = scope_exit([&]() { c.pop_args(); });
2901 auto ope = get_core_operator();
2902 return ope->parse(s, n, vs, c, dt);
2903 } else {
2904 // Definition
2905 c.push_args(std::vector<std::shared_ptr<Ope>>());
2906 auto se3 = scope_exit([&]() { c.pop_args(); });
2907 auto ope = get_core_operator();
2908 return ope->parse(s, n, vs, c, dt);
2909 }
2910 } else {
2911 // Reference parameter in macro
2912 const auto &args = c.top_args();
2913 return args[iarg_]->parse(s, n, vs, c, dt);
2914 }
2915}
2916
2917inline std::shared_ptr<Ope> Reference::get_core_operator() const {
2918 return rule_->holder_;
2919}
2920
2921inline size_t BackReference::parse_core(const char *s, size_t n,
2922 SemanticValues &vs, Context &c,
2923 std::any &dt) const {
2924 auto size = static_cast<int>(c.capture_scope_stack_size);
2925 for (auto i = size - 1; i >= 0; i--) {
2926 auto index = static_cast<size_t>(i);
2927 const auto &cs = c.capture_scope_stack[index];
2928 if (cs.find(name_) != cs.end()) {
2929 const auto &lit = cs.at(name_);
2930 std::once_flag init_is_word;
2931 auto is_word = false;
2932 return parse_literal(s, n, vs, c, dt, lit, init_is_word, is_word, false);
2933 }
2934 }
2935
2936 c.error_info.message_pos = s;
2937 c.error_info.message = "undefined back reference '$" + name_ + "'...";
2938 return static_cast<size_t>(-1);
2939}
2940
2941inline Definition &
2943 if (rule_.is_macro) {
2944 // Reference parameter in macro
2945 const auto &args = c.top_args();
2946 auto iarg = dynamic_cast<Reference &>(*binop_).iarg_;
2947 auto arg = args[iarg];
2948 return *dynamic_cast<Reference &>(*arg).rule_;
2949 }
2950
2951 return *dynamic_cast<Reference &>(*binop_).rule_;
2952}
2953
2954inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n,
2955 SemanticValues &vs,
2956 Context &c, std::any &dt,
2957 size_t min_prec) const {
2958 auto len = atom_->parse(s, n, vs, c, dt);
2959 if (fail(len)) { return len; }
2960
2961 std::string tok;
2962 auto &rule = get_reference_for_binop(c);
2963 auto action = std::move(rule.action);
2964
2965 rule.action = [&](SemanticValues &vs2, std::any &dt2) {
2966 tok = vs2.token();
2967 if (action) {
2968 return action(vs2, dt2);
2969 } else if (!vs2.empty()) {
2970 return vs2[0];
2971 }
2972 return std::any();
2973 };
2974 auto action_se = scope_exit([&]() { rule.action = std::move(action); });
2975
2976 auto i = len;
2977 while (i < n) {
2978 std::vector<std::any> save_values(vs.begin(), vs.end());
2979 auto save_tokens = vs.tokens;
2980
2981 auto chvs = c.push_semantic_values_scope();
2982 auto chlen = binop_->parse(s + i, n - i, chvs, c, dt);
2984
2985 if (fail(chlen)) { break; }
2986
2987 auto it = info_.find(tok);
2988 if (it == info_.end()) { break; }
2989
2990 auto level = std::get<0>(it->second);
2991 auto assoc = std::get<1>(it->second);
2992
2993 if (level < min_prec) { break; }
2994
2995 vs.emplace_back(std::move(chvs[0]));
2996 i += chlen;
2997
2998 auto next_min_prec = level;
2999 if (assoc == 'L') { next_min_prec = level + 1; }
3000
3001 chvs = c.push_semantic_values_scope();
3002 chlen = parse_expression(s + i, n - i, chvs, c, dt, next_min_prec);
3004
3005 if (fail(chlen)) {
3006 vs.assign(save_values.begin(), save_values.end());
3007 vs.tokens = save_tokens;
3008 i = chlen;
3009 break;
3010 }
3011
3012 vs.emplace_back(std::move(chvs[0]));
3013 i += chlen;
3014
3015 std::any val;
3016 if (rule_.action) {
3017 vs.sv_ = std::string_view(s, i);
3018 val = rule_.action(vs, dt);
3019 } else if (!vs.empty()) {
3020 val = vs[0];
3021 }
3022 vs.clear();
3023 vs.emplace_back(std::move(val));
3024 }
3025
3026 return i;
3027}
3028
3029inline size_t Recovery::parse_core(const char *s, size_t n,
3030 SemanticValues & /*vs*/, Context &c,
3031 std::any & /*dt*/) const {
3032 const auto &rule = dynamic_cast<Reference &>(*ope_);
3033
3034 // Custom error message
3035 if (c.log) {
3036 auto label = dynamic_cast<Reference *>(rule.args_[0].get());
3037 if (label && !label->rule_->error_message.empty()) {
3038 c.error_info.message_pos = s;
3039 c.error_info.message = label->rule_->error_message;
3040 c.error_info.label = label->rule_->name;
3041 }
3042 }
3043
3044 // Recovery
3045 auto len = static_cast<size_t>(-1);
3046 {
3047 auto save_log = c.log;
3048 c.log = nullptr;
3049 auto se = scope_exit([&]() { c.log = save_log; });
3050
3051 SemanticValues dummy_vs;
3052 std::any dummy_dt;
3053
3054 len = rule.parse(s, n, dummy_vs, c, dummy_dt);
3055 }
3056
3057 if (success(len)) {
3058 c.recovered = true;
3059
3060 if (c.log) {
3061 c.error_info.output_log(c.log, c.s, c.l);
3062 c.error_info.clear();
3063 }
3064 }
3065
3066 // Cut
3067 if (!c.cut_stack.empty()) {
3068 c.cut_stack.back() = true;
3069
3070 if (c.cut_stack.size() == 1) {
3071 // TODO: Remove unneeded entries in packrat memoise table
3072 }
3073 }
3074
3075 return len;
3076}
3077
3078inline void Sequence::accept(Visitor &v) { v.visit(*this); }
3079inline void PrioritizedChoice::accept(Visitor &v) { v.visit(*this); }
3080inline void Repetition::accept(Visitor &v) { v.visit(*this); }
3081inline void AndPredicate::accept(Visitor &v) { v.visit(*this); }
3082inline void NotPredicate::accept(Visitor &v) { v.visit(*this); }
3083inline void Dictionary::accept(Visitor &v) { v.visit(*this); }
3084inline void LiteralString::accept(Visitor &v) { v.visit(*this); }
3085inline void CharacterClass::accept(Visitor &v) { v.visit(*this); }
3086inline void Character::accept(Visitor &v) { v.visit(*this); }
3087inline void AnyCharacter::accept(Visitor &v) { v.visit(*this); }
3088inline void CaptureScope::accept(Visitor &v) { v.visit(*this); }
3089inline void Capture::accept(Visitor &v) { v.visit(*this); }
3090inline void TokenBoundary::accept(Visitor &v) { v.visit(*this); }
3091inline void Ignore::accept(Visitor &v) { v.visit(*this); }
3092inline void User::accept(Visitor &v) { v.visit(*this); }
3093inline void WeakHolder::accept(Visitor &v) { v.visit(*this); }
3094inline void Holder::accept(Visitor &v) { v.visit(*this); }
3095inline void Reference::accept(Visitor &v) { v.visit(*this); }
3096inline void Whitespace::accept(Visitor &v) { v.visit(*this); }
3097inline void BackReference::accept(Visitor &v) { v.visit(*this); }
3098inline void PrecedenceClimbing::accept(Visitor &v) { v.visit(*this); }
3099inline void Recovery::accept(Visitor &v) { v.visit(*this); }
3100inline void Cut::accept(Visitor &v) { v.visit(*this); }
3101
3103 auto p = static_cast<void *>(ope.outer_);
3104 if (ids.count(p)) { return; }
3105 auto id = ids.size();
3106 ids[p] = id;
3107 ope.outer_->id = id;
3108 ope.ope_->accept(*this);
3109}
3110
3112 if (ope.rule_) {
3113 for (auto arg : ope.args_) {
3114 arg->accept(*this);
3115 }
3116 ope.rule_->accept(*this);
3117 }
3118}
3119
3121 ope.atom_->accept(*this);
3122 ope.binop_->accept(*this);
3123}
3124
3126 if (ope.is_macro_) {
3127 for (auto arg : ope.args_) {
3128 arg->accept(*this);
3129 }
3130 } else {
3131 has_rule_ = true;
3132 }
3133}
3134
3136 if (ope.is_macro_) {
3137 ope.rule_->accept(*this);
3138 for (auto arg : ope.args_) {
3139 arg->accept(*this);
3140 }
3141 }
3142}
3143
3145 if (ope.name_ == name_) {
3146 error_s = ope.s_;
3147 } else if (!refs_.count(ope.name_)) {
3148 refs_.insert(ope.name_);
3149 if (ope.rule_) {
3150 ope.rule_->accept(*this);
3151 if (done_ == false) { return; }
3152 }
3153 }
3154 done_ = true;
3155}
3156
3158 auto save_is_empty = false;
3159 const char *save_error_s = nullptr;
3160 std::string save_error_name;
3161
3162 auto it = ope.opes_.begin();
3163 while (it != ope.opes_.end()) {
3164 (*it)->accept(*this);
3165 if (!is_empty) {
3166 ++it;
3167 while (it != ope.opes_.end()) {
3169 (*it)->accept(vis);
3170 if (vis.has_error) {
3171 is_empty = true;
3172 error_s = vis.error_s;
3173 error_name = vis.error_name;
3174 }
3175 ++it;
3176 }
3177 return;
3178 }
3179
3180 save_is_empty = is_empty;
3181 save_error_s = error_s;
3182 save_error_name = error_name;
3183
3184 is_empty = false;
3185 error_name.clear();
3186 ++it;
3187 }
3188
3189 is_empty = save_is_empty;
3190 error_s = save_error_s;
3191 error_name = save_error_name;
3192}
3193
3195 auto it = std::find_if(refs_.begin(), refs_.end(),
3196 [&](const std::pair<const char *, std::string> &ref) {
3197 return ope.name_ == ref.second;
3198 });
3199 if (it != refs_.end()) { return; }
3200
3201 if (ope.rule_) {
3202 refs_.emplace_back(ope.s_, ope.name_);
3203 ope.rule_->accept(*this);
3204 refs_.pop_back();
3205 }
3206}
3207
3209 auto it1 = std::find_if(refs_.begin(), refs_.end(),
3210 [&](const std::pair<const char *, std::string> &ref) {
3211 return ope.name_ == ref.second;
3212 });
3213 if (it1 != refs_.end()) { return; }
3214
3215 if (ope.rule_) {
3216 auto it = has_error_cache_.find(ope.name_);
3217 if (it != has_error_cache_.end()) {
3218 has_error = it->second;
3219 } else {
3220 refs_.emplace_back(ope.s_, ope.name_);
3221 ope.rule_->accept(*this);
3222 refs_.pop_back();
3224 }
3225 }
3226
3227 if (ope.is_macro_) {
3228 for (auto arg : ope.args_) {
3229 arg->accept(*this);
3230 }
3231 }
3232}
3233
3235 auto it = std::find(params_.begin(), params_.end(), ope.name_);
3236 if (it != params_.end()) { return; }
3237
3238 if (!grammar_.count(ope.name_)) {
3239 error_s[ope.name_] = ope.s_;
3240 error_message[ope.name_] = "'" + ope.name_ + "' is not defined.";
3241 } else {
3242 if (!referenced.count(ope.name_)) { referenced.insert(ope.name_); }
3243 const auto &rule = grammar_.at(ope.name_);
3244 if (rule.is_macro) {
3245 if (!ope.is_macro_ || ope.args_.size() != rule.params.size()) {
3246 error_s[ope.name_] = ope.s_;
3247 error_message[ope.name_] = "incorrect number of arguments.";
3248 }
3249 } else if (ope.is_macro_) {
3250 error_s[ope.name_] = ope.s_;
3251 error_message[ope.name_] = "'" + ope.name_ + "' is not macro.";
3252 }
3253 for (auto arg : ope.args_) {
3254 arg->accept(*this);
3255 }
3256 }
3257}
3258
3260 // Check if the reference is a macro parameter
3261 auto found_param = false;
3262 for (size_t i = 0; i < params_.size(); i++) {
3263 const auto &param = params_[i];
3264 if (param == ope.name_) {
3265 ope.iarg_ = i;
3266 found_param = true;
3267 break;
3268 }
3269 }
3270
3271 // Check if the reference is a definition rule
3272 if (!found_param && grammar_.count(ope.name_)) {
3273 auto &rule = grammar_.at(ope.name_);
3274 ope.rule_ = &rule;
3275 }
3276
3277 for (auto arg : ope.args_) {
3278 arg->accept(*this);
3279 }
3280}
3281
3283 for (size_t i = 0; i < args_.size(); i++) {
3284 const auto &name = params_[i];
3285 if (name == ope.name_) {
3286 found_ope = args_[i];
3287 return;
3288 }
3289 }
3290 found_ope = ope.shared_from_this();
3291}
3292
3293/*-----------------------------------------------------------------------------
3294 * PEG parser generator
3295 *---------------------------------------------------------------------------*/
3296
3297using Rules = std::unordered_map<std::string, std::shared_ptr<Ope>>;
3298
3300public:
3302 std::shared_ptr<Grammar> grammar;
3303 std::string start;
3305 };
3306
3307 static ParserContext parse(const char *s, size_t n, const Rules &rules,
3308 Log log, std::string_view start) {
3309 return get_instance().perform_core(s, n, rules, log, std::string(start));
3310 }
3311
3312 // For debugging purpose
3313 static bool parse_test(const char *d, const char *s) {
3314 Data data;
3315 std::any dt = &data;
3316
3317 auto n = strlen(s);
3318 auto r = get_instance().g[d].parse(s, n, dt);
3319 return r.ret && r.len == n;
3320 }
3321
3322#if defined(__cpp_lib_char8_t)
3323 static bool parse_test(const char *d, const char8_t *s) {
3324 return parse_test(d, reinterpret_cast<const char *>(s));
3325 }
3326#endif
3327
3328private:
3330 static ParserGenerator instance;
3331 return instance;
3332 }
3333
3335 make_grammar();
3336 setup_actions();
3337 }
3338
3340 std::string type;
3341 std::any data;
3342 std::string_view sv;
3343 };
3344
3345 struct Data {
3346 std::shared_ptr<Grammar> grammar;
3347 std::string start;
3348 const char *start_pos = nullptr;
3349
3350 std::vector<std::pair<std::string, const char *>> duplicates_of_definition;
3351
3352 std::vector<std::pair<std::string, const char *>> duplicates_of_instruction;
3353 std::map<std::string, std::vector<Instruction>> instructions;
3354
3355 std::vector<std::pair<std::string, const char *>> undefined_back_references;
3356 std::vector<std::set<std::string_view>> captures_stack{{}};
3357
3358 std::set<std::string_view> captures_in_current_definition;
3360
3361 Data() : grammar(std::make_shared<Grammar>()) {}
3362 };
3363
3365 // Setup PEG syntax parser
3366 g["Grammar"] <= seq(g["Spacing"], oom(g["Definition"]), g["EndOfFile"]);
3367 g["Definition"] <=
3368 cho(seq(g["Ignore"], g["IdentCont"], g["Parameters"], g["LEFTARROW"],
3369 g["Expression"], opt(g["Instruction"])),
3370 seq(g["Ignore"], g["Identifier"], g["LEFTARROW"], g["Expression"],
3371 opt(g["Instruction"])));
3372 g["Expression"] <= seq(g["Sequence"], zom(seq(g["SLASH"], g["Sequence"])));
3373 g["Sequence"] <= zom(cho(g["CUT"], g["Prefix"]));
3374 g["Prefix"] <= seq(opt(cho(g["AND"], g["NOT"])), g["SuffixWithLabel"]);
3375 g["SuffixWithLabel"] <=
3376 seq(g["Suffix"], opt(seq(g["LABEL"], g["Identifier"])));
3377 g["Suffix"] <= seq(g["Primary"], opt(g["Loop"]));
3378 g["Loop"] <= cho(g["QUESTION"], g["STAR"], g["PLUS"], g["Repetition"]);
3379 g["Primary"] <= cho(seq(g["Ignore"], g["IdentCont"], g["Arguments"],
3380 npd(g["LEFTARROW"])),
3381 seq(g["Ignore"], g["Identifier"],
3382 npd(seq(opt(g["Parameters"]), g["LEFTARROW"]))),
3383 seq(g["OPEN"], g["Expression"], g["CLOSE"]),
3384 seq(g["BeginTok"], g["Expression"], g["EndTok"]),
3385 g["CapScope"],
3386 seq(g["BeginCap"], g["Expression"], g["EndCap"]),
3387 g["BackRef"], g["DictionaryI"], g["LiteralI"],
3388 g["Dictionary"], g["Literal"], g["NegatedClassI"],
3389 g["NegatedClass"], g["ClassI"], g["Class"], g["DOT"]);
3390
3391 g["Identifier"] <= seq(g["IdentCont"], g["Spacing"]);
3392 g["IdentCont"] <= tok(seq(g["IdentStart"], zom(g["IdentRest"])));
3393
3394 const static std::vector<std::pair<char32_t, char32_t>> range = {
3395 {0x0080, 0xFFFF}};
3396 g["IdentStart"] <= seq(npd(lit(u8(u8"↑"))), npd(lit(u8(u8"⇑"))),
3397 cho(cls("a-zA-Z_%"), cls(range)));
3398
3399 g["IdentRest"] <= cho(g["IdentStart"], cls("0-9"));
3400
3401 g["Dictionary"] <= seq(g["LiteralD"], oom(seq(g["PIPE"], g["LiteralD"])));
3402
3403 g["DictionaryI"] <=
3404 seq(g["LiteralID"], oom(seq(g["PIPE"], g["LiteralID"])));
3405
3406 auto lit_ope = cho(seq(cls("'"), tok(zom(seq(npd(cls("'")), g["Char"]))),
3407 cls("'"), g["Spacing"]),
3408 seq(cls("\""), tok(zom(seq(npd(cls("\"")), g["Char"]))),
3409 cls("\""), g["Spacing"]));
3410 g["Literal"] <= lit_ope;
3411 g["LiteralD"] <= lit_ope;
3412
3413 auto lit_case_ignore_ope =
3414 cho(seq(cls("'"), tok(zom(seq(npd(cls("'")), g["Char"]))), lit("'i"),
3415 g["Spacing"]),
3416 seq(cls("\""), tok(zom(seq(npd(cls("\"")), g["Char"]))), lit("\"i"),
3417 g["Spacing"]));
3418 g["LiteralI"] <= lit_case_ignore_ope;
3419 g["LiteralID"] <= lit_case_ignore_ope;
3420
3421 // NOTE: The original Brian Ford's paper uses 'zom' instead of 'oom'.
3422 g["Class"] <= seq(chr('['), npd(chr('^')),
3423 tok(oom(seq(npd(chr(']')), g["Range"]))), chr(']'),
3424 g["Spacing"]);
3425 g["ClassI"] <= seq(chr('['), npd(chr('^')),
3426 tok(oom(seq(npd(chr(']')), g["Range"]))), lit("]i"),
3427 g["Spacing"]);
3428
3429 g["NegatedClass"] <= seq(lit("[^"),
3430 tok(oom(seq(npd(chr(']')), g["Range"]))), chr(']'),
3431 g["Spacing"]);
3432 g["NegatedClassI"] <= seq(lit("[^"),
3433 tok(oom(seq(npd(chr(']')), g["Range"]))),
3434 lit("]i"), g["Spacing"]);
3435
3436 // NOTE: This is different from The original Brian Ford's paper, and this
3437 // modification allows us to specify `[+-]` as a valid char class.
3438 g["Range"] <=
3439 cho(seq(g["Char"], chr('-'), npd(chr(']')), g["Char"]), g["Char"]);
3440
3441 g["Char"] <=
3442 cho(seq(chr('\\'), cls("fnrtv'\"[]\\^")),
3443 seq(chr('\\'), cls("0-3"), cls("0-7"), cls("0-7")),
3444 seq(chr('\\'), cls("0-7"), opt(cls("0-7"))),
3445 seq(lit("\\x"), cls("0-9a-fA-F"), opt(cls("0-9a-fA-F"))),
3446 seq(lit("\\u"),
3447 cho(seq(cho(seq(chr('0'), cls("0-9a-fA-F")), lit("10")),
3448 rep(cls("0-9a-fA-F"), 4, 4)),
3449 rep(cls("0-9a-fA-F"), 4, 5))),
3450 seq(npd(chr('\\')), dot()));
3451
3452 g["Repetition"] <=
3453 seq(g["BeginBracket"], g["RepetitionRange"], g["EndBracket"]);
3454 g["RepetitionRange"] <= cho(seq(g["Number"], g["COMMA"], g["Number"]),
3455 seq(g["Number"], g["COMMA"]), g["Number"],
3456 seq(g["COMMA"], g["Number"]));
3457 g["Number"] <= seq(oom(cls("0-9")), g["Spacing"]);
3458
3459 g["CapScope"] <= seq(g["BeginCapScope"], g["Expression"], g["EndCapScope"]);
3460
3461 g["LEFTARROW"] <= seq(cho(lit("<-"), lit(u8(u8"←"))), g["Spacing"]);
3462 ~g["SLASH"] <= seq(chr('/'), g["Spacing"]);
3463 ~g["PIPE"] <= seq(chr('|'), g["Spacing"]);
3464 g["AND"] <= seq(chr('&'), g["Spacing"]);
3465 g["NOT"] <= seq(chr('!'), g["Spacing"]);
3466 g["QUESTION"] <= seq(chr('?'), g["Spacing"]);
3467 g["STAR"] <= seq(chr('*'), g["Spacing"]);
3468 g["PLUS"] <= seq(chr('+'), g["Spacing"]);
3469 ~g["OPEN"] <= seq(chr('('), g["Spacing"]);
3470 ~g["CLOSE"] <= seq(chr(')'), g["Spacing"]);
3471 g["DOT"] <= seq(chr('.'), g["Spacing"]);
3472
3473 g["CUT"] <= seq(lit(u8(u8"↑")), g["Spacing"]);
3474 ~g["LABEL"] <= seq(cho(chr('^'), lit(u8(u8"⇑"))), g["Spacing"]);
3475
3476 ~g["Spacing"] <= zom(cho(g["Space"], g["Comment"]));
3477 g["Comment"] <=
3478 seq(chr('#'), zom(seq(npd(g["EndOfLine"]), dot())), g["EndOfLine"]);
3479 g["Space"] <= cho(chr(' '), chr('\t'), g["EndOfLine"]);
3480 g["EndOfLine"] <= cho(lit("\r\n"), chr('\n'), chr('\r'));
3481 g["EndOfFile"] <= npd(dot());
3482
3483 ~g["BeginTok"] <= seq(chr('<'), g["Spacing"]);
3484 ~g["EndTok"] <= seq(chr('>'), g["Spacing"]);
3485
3486 ~g["BeginCapScope"] <= seq(chr('$'), chr('('), g["Spacing"]);
3487 ~g["EndCapScope"] <= seq(chr(')'), g["Spacing"]);
3488
3489 g["BeginCap"] <= seq(chr('$'), tok(g["IdentCont"]), chr('<'), g["Spacing"]);
3490 ~g["EndCap"] <= seq(chr('>'), g["Spacing"]);
3491
3492 g["BackRef"] <= seq(chr('$'), tok(g["IdentCont"]), g["Spacing"]);
3493
3494 g["IGNORE"] <= chr('~');
3495
3496 g["Ignore"] <= opt(g["IGNORE"]);
3497 g["Parameters"] <= seq(g["OPEN"], g["Identifier"],
3498 zom(seq(g["COMMA"], g["Identifier"])), g["CLOSE"]);
3499 g["Arguments"] <= seq(g["OPEN"], g["Expression"],
3500 zom(seq(g["COMMA"], g["Expression"])), g["CLOSE"]);
3501 ~g["COMMA"] <= seq(chr(','), g["Spacing"]);
3502
3503 // Instruction grammars
3504 g["Instruction"] <=
3505 seq(g["BeginBracket"],
3506 opt(seq(g["InstructionItem"], zom(seq(g["InstructionItemSeparator"],
3507 g["InstructionItem"])))),
3508 g["EndBracket"]);
3509 g["InstructionItem"] <=
3510 cho(g["PrecedenceClimbing"], g["ErrorMessage"], g["NoAstOpt"]);
3511 ~g["InstructionItemSeparator"] <= seq(chr(';'), g["Spacing"]);
3512
3513 ~g["SpacesZom"] <= zom(g["Space"]);
3514 ~g["SpacesOom"] <= oom(g["Space"]);
3515 ~g["BeginBracket"] <= seq(chr('{'), g["Spacing"]);
3516 ~g["EndBracket"] <= seq(chr('}'), g["Spacing"]);
3517
3518 // PrecedenceClimbing instruction
3519 g["PrecedenceClimbing"] <=
3520 seq(lit("precedence"), g["SpacesOom"], g["PrecedenceInfo"],
3521 zom(seq(g["SpacesOom"], g["PrecedenceInfo"])), g["SpacesZom"]);
3522 g["PrecedenceInfo"] <=
3523 seq(g["PrecedenceAssoc"],
3524 oom(seq(ign(g["SpacesOom"]), g["PrecedenceOpe"])));
3525 g["PrecedenceOpe"] <=
3526 cho(seq(cls("'"),
3527 tok(zom(seq(npd(cho(g["Space"], cls("'"))), g["Char"]))),
3528 cls("'")),
3529 seq(cls("\""),
3530 tok(zom(seq(npd(cho(g["Space"], cls("\""))), g["Char"]))),
3531 cls("\"")),
3532 tok(oom(seq(npd(cho(g["PrecedenceAssoc"], g["Space"], chr('}'))),
3533 dot()))));
3534 g["PrecedenceAssoc"] <= cls("LR");
3535
3536 // Error message instruction
3537 g["ErrorMessage"] <= seq(lit("error_message"), g["SpacesOom"],
3538 g["LiteralD"], g["SpacesZom"]);
3539
3540 // No Ast node optimization instruction
3541 g["NoAstOpt"] <= seq(lit("no_ast_opt"), g["SpacesZom"]);
3542
3543 // Set definition names
3544 for (auto &x : g) {
3545 x.second.name = x.first;
3546 }
3547 }
3548
3550 g["Definition"] = [&](const SemanticValues &vs, std::any &dt) {
3551 auto &data = *std::any_cast<Data *>(dt);
3552
3553 auto is_macro = vs.choice() == 0;
3554 auto ignore = std::any_cast<bool>(vs[0]);
3555 auto name = std::any_cast<std::string>(vs[1]);
3556
3557 std::vector<std::string> params;
3558 std::shared_ptr<Ope> ope;
3559 auto has_instructions = false;
3560
3561 if (is_macro) {
3562 params = std::any_cast<std::vector<std::string>>(vs[2]);
3563 ope = std::any_cast<std::shared_ptr<Ope>>(vs[4]);
3564 if (vs.size() == 6) { has_instructions = true; }
3565 } else {
3566 ope = std::any_cast<std::shared_ptr<Ope>>(vs[3]);
3567 if (vs.size() == 5) { has_instructions = true; }
3568 }
3569
3570 if (has_instructions) {
3571 auto index = is_macro ? 5 : 4;
3572 std::unordered_set<std::string> types;
3573 for (const auto &instruction :
3574 std::any_cast<std::vector<Instruction>>(vs[index])) {
3575 const auto &type = instruction.type;
3576 if (types.find(type) == types.end()) {
3577 data.instructions[name].push_back(instruction);
3578 types.insert(instruction.type);
3579 if (type == "declare_symbol" || type == "check_symbol") {
3580 if (!TokenChecker::is_token(*ope)) { ope = tok(ope); }
3581 }
3582 } else {
3583 data.duplicates_of_instruction.emplace_back(type,
3584 instruction.sv.data());
3585 }
3586 }
3587 }
3588
3589 auto &grammar = *data.grammar;
3590 if (!grammar.count(name)) {
3591 auto &rule = grammar[name];
3592 rule <= ope;
3593 rule.name = name;
3594 rule.s_ = vs.sv().data();
3595 rule.line_ = line_info(vs.ss, rule.s_);
3596 rule.ignoreSemanticValue = ignore;
3597 rule.is_macro = is_macro;
3598 rule.params = params;
3599
3600 if (data.start.empty()) {
3601 data.start = rule.name;
3602 data.start_pos = rule.s_;
3603 }
3604 } else {
3605 data.duplicates_of_definition.emplace_back(name, vs.sv().data());
3606 }
3607 };
3608
3609 g["Definition"].enter = [](const Context & /*c*/, const char * /*s*/,
3610 size_t /*n*/, std::any &dt) {
3611 auto &data = *std::any_cast<Data *>(dt);
3612 data.captures_in_current_definition.clear();
3613 };
3614
3615 g["Expression"] = [&](const SemanticValues &vs) {
3616 if (vs.size() == 1) {
3617 return std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3618 } else {
3619 std::vector<std::shared_ptr<Ope>> opes;
3620 for (auto i = 0u; i < vs.size(); i++) {
3621 opes.emplace_back(std::any_cast<std::shared_ptr<Ope>>(vs[i]));
3622 }
3623 const std::shared_ptr<Ope> ope =
3624 std::make_shared<PrioritizedChoice>(opes);
3625 return ope;
3626 }
3627 };
3628
3629 g["Sequence"] = [&](const SemanticValues &vs) {
3630 if (vs.empty()) {
3631 return npd(lit(""));
3632 } else if (vs.size() == 1) {
3633 return std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3634 } else {
3635 std::vector<std::shared_ptr<Ope>> opes;
3636 for (const auto &x : vs) {
3637 opes.emplace_back(std::any_cast<std::shared_ptr<Ope>>(x));
3638 }
3639 const std::shared_ptr<Ope> ope = std::make_shared<Sequence>(opes);
3640 return ope;
3641 }
3642 };
3643
3644 g["Prefix"] = [&](const SemanticValues &vs) {
3645 std::shared_ptr<Ope> ope;
3646 if (vs.size() == 1) {
3647 ope = std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3648 } else {
3649 assert(vs.size() == 2);
3650 auto tok = std::any_cast<char>(vs[0]);
3651 ope = std::any_cast<std::shared_ptr<Ope>>(vs[1]);
3652 if (tok == '&') {
3653 ope = apd(ope);
3654 } else { // '!'
3655 ope = npd(ope);
3656 }
3657 }
3658 return ope;
3659 };
3660
3661 g["SuffixWithLabel"] = [&](const SemanticValues &vs, std::any &dt) {
3662 auto ope = std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3663 if (vs.size() == 1) {
3664 return ope;
3665 } else {
3666 assert(vs.size() == 2);
3667 auto &data = *std::any_cast<Data *>(dt);
3668 const auto &ident = std::any_cast<std::string>(vs[1]);
3669 auto label = ref(*data.grammar, ident, vs.sv().data(), false, {});
3670 auto recovery = rec(ref(*data.grammar, RECOVER_DEFINITION_NAME,
3671 vs.sv().data(), true, {label}));
3672 return cho4label_(ope, recovery);
3673 }
3674 };
3675
3676 struct Loop {
3677 enum class Type { opt = 0, zom, oom, rep };
3678 Type type;
3679 std::pair<size_t, size_t> range;
3680 };
3681
3682 g["Suffix"] = [&](const SemanticValues &vs) {
3683 auto ope = std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3684 if (vs.size() == 1) {
3685 return ope;
3686 } else {
3687 assert(vs.size() == 2);
3688 auto loop = std::any_cast<Loop>(vs[1]);
3689 switch (loop.type) {
3690 case Loop::Type::opt: return opt(ope);
3691 case Loop::Type::zom: return zom(ope);
3692 case Loop::Type::oom: return oom(ope);
3693 default: // Regex-like repetition
3694 return rep(ope, loop.range.first, loop.range.second);
3695 }
3696 }
3697 };
3698
3699 g["Loop"] = [&](const SemanticValues &vs) {
3700 switch (vs.choice()) {
3701 case 0: // Option
3702 return Loop{Loop::Type::opt, std::pair<size_t, size_t>()};
3703 case 1: // Zero or More
3704 return Loop{Loop::Type::zom, std::pair<size_t, size_t>()};
3705 case 2: // One or More
3706 return Loop{Loop::Type::oom, std::pair<size_t, size_t>()};
3707 default: // Regex-like repetition
3708 return Loop{Loop::Type::rep,
3709 std::any_cast<std::pair<size_t, size_t>>(vs[0])};
3710 }
3711 };
3712
3713 g["Primary"] = [&](const SemanticValues &vs, std::any &dt) {
3714 auto &data = *std::any_cast<Data *>(dt);
3715
3716 switch (vs.choice()) {
3717 case 0: // Macro Reference
3718 case 1: { // Reference
3719 auto is_macro = vs.choice() == 0;
3720 auto ignore = std::any_cast<bool>(vs[0]);
3721 const auto &ident = std::any_cast<std::string>(vs[1]);
3722
3723 std::vector<std::shared_ptr<Ope>> args;
3724 if (is_macro) {
3725 args = std::any_cast<std::vector<std::shared_ptr<Ope>>>(vs[2]);
3726 }
3727
3728 auto ope = ref(*data.grammar, ident, vs.sv().data(), is_macro, args);
3729 if (ident == RECOVER_DEFINITION_NAME) { ope = rec(ope); }
3730
3731 if (ignore) {
3732 return ign(ope);
3733 } else {
3734 return ope;
3735 }
3736 }
3737 case 2: { // (Expression)
3738 return std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3739 }
3740 case 3: { // TokenBoundary
3741 return tok(std::any_cast<std::shared_ptr<Ope>>(vs[0]));
3742 }
3743 case 4: { // CaptureScope
3744 return csc(std::any_cast<std::shared_ptr<Ope>>(vs[0]));
3745 }
3746 case 5: { // Capture
3747 const auto &name = std::any_cast<std::string_view>(vs[0]);
3748 auto ope = std::any_cast<std::shared_ptr<Ope>>(vs[1]);
3749
3750 data.captures_stack.back().insert(name);
3751 data.captures_in_current_definition.insert(name);
3752
3753 return cap(ope, [name](const char *a_s, size_t a_n, Context &c) {
3755 cs[name] = std::string(a_s, a_n);
3756 });
3757 }
3758 default: {
3759 return std::any_cast<std::shared_ptr<Ope>>(vs[0]);
3760 }
3761 }
3762 };
3763
3764 g["IdentCont"] = [](const SemanticValues &vs) {
3765 return std::string(vs.sv().data(), vs.sv().length());
3766 };
3767
3768 g["Dictionary"] = [](const SemanticValues &vs) {
3769 auto items = vs.transform<std::string>();
3770 return dic(items, false);
3771 };
3772 g["DictionaryI"] = [](const SemanticValues &vs) {
3773 auto items = vs.transform<std::string>();
3774 return dic(items, true);
3775 };
3776
3777 g["Literal"] = [](const SemanticValues &vs) {
3778 const auto &tok = vs.tokens.front();
3779 return lit(resolve_escape_sequence(tok.data(), tok.size()));
3780 };
3781 g["LiteralI"] = [](const SemanticValues &vs) {
3782 const auto &tok = vs.tokens.front();
3783 return liti(resolve_escape_sequence(tok.data(), tok.size()));
3784 };
3785 g["LiteralD"] = [](const SemanticValues &vs) {
3786 auto &tok = vs.tokens.front();
3787 return resolve_escape_sequence(tok.data(), tok.size());
3788 };
3789 g["LiteralID"] = [](const SemanticValues &vs) {
3790 auto &tok = vs.tokens.front();
3791 return resolve_escape_sequence(tok.data(), tok.size());
3792 };
3793
3794 g["Class"] = [](const SemanticValues &vs) {
3795 auto ranges = vs.transform<std::pair<char32_t, char32_t>>();
3796 return cls(ranges);
3797 };
3798 g["ClassI"] = [](const SemanticValues &vs) {
3799 auto ranges = vs.transform<std::pair<char32_t, char32_t>>();
3800 return cls(ranges, true);
3801 };
3802 g["NegatedClass"] = [](const SemanticValues &vs) {
3803 auto ranges = vs.transform<std::pair<char32_t, char32_t>>();
3804 return ncls(ranges);
3805 };
3806 g["NegatedClassI"] = [](const SemanticValues &vs) {
3807 auto ranges = vs.transform<std::pair<char32_t, char32_t>>();
3808 return ncls(ranges, true);
3809 };
3810 g["Range"] = [](const SemanticValues &vs) {
3811 switch (vs.choice()) {
3812 case 0: {
3813 auto s1 = std::any_cast<std::string>(vs[0]);
3814 auto s2 = std::any_cast<std::string>(vs[1]);
3815 auto cp1 = decode_codepoint(s1.data(), s1.length());
3816 auto cp2 = decode_codepoint(s2.data(), s2.length());
3817 return std::pair(cp1, cp2);
3818 }
3819 case 1: {
3820 auto s = std::any_cast<std::string>(vs[0]);
3821 auto cp = decode_codepoint(s.data(), s.length());
3822 return std::pair(cp, cp);
3823 }
3824 }
3825 return std::pair<char32_t, char32_t>(0, 0);
3826 };
3827 g["Char"] = [](const SemanticValues &vs) {
3828 return resolve_escape_sequence(vs.sv().data(), vs.sv().length());
3829 };
3830
3831 g["RepetitionRange"] = [&](const SemanticValues &vs) {
3832 switch (vs.choice()) {
3833 case 0: { // Number COMMA Number
3834 auto min = std::any_cast<size_t>(vs[0]);
3835 auto max = std::any_cast<size_t>(vs[1]);
3836 return std::pair(min, max);
3837 }
3838 case 1: // Number COMMA
3839 return std::pair(std::any_cast<size_t>(vs[0]),
3840 std::numeric_limits<size_t>::max());
3841 case 2: { // Number
3842 auto n = std::any_cast<size_t>(vs[0]);
3843 return std::pair(n, n);
3844 }
3845 default: // COMMA Number
3846 return std::pair(std::numeric_limits<size_t>::min(),
3847 std::any_cast<size_t>(vs[0]));
3848 }
3849 };
3850 g["Number"] = [&](const SemanticValues &vs) {
3851 return vs.token_to_number<size_t>();
3852 };
3853
3854 g["CapScope"].enter = [](const Context & /*c*/, const char * /*s*/,
3855 size_t /*n*/, std::any &dt) {
3856 auto &data = *std::any_cast<Data *>(dt);
3857 data.captures_stack.emplace_back();
3858 };
3859 g["CapScope"].leave = [](const Context & /*c*/, const char * /*s*/,
3860 size_t /*n*/, size_t /*matchlen*/,
3861 std::any & /*value*/, std::any &dt) {
3862 auto &data = *std::any_cast<Data *>(dt);
3863 data.captures_stack.pop_back();
3864 };
3865
3866 g["AND"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
3867 g["NOT"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
3868 g["QUESTION"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
3869 g["STAR"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
3870 g["PLUS"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
3871
3872 g["DOT"] = [](const SemanticValues & /*vs*/) { return dot(); };
3873
3874 g["CUT"] = [](const SemanticValues & /*vs*/) { return cut(); };
3875
3876 g["BeginCap"] = [](const SemanticValues &vs) { return vs.token(); };
3877
3878 g["BackRef"] = [&](const SemanticValues &vs, std::any &dt) {
3879 auto &data = *std::any_cast<Data *>(dt);
3880
3881 // Undefined back reference check
3882 {
3883 auto found = false;
3884 auto it = data.captures_stack.rbegin();
3885 while (it != data.captures_stack.rend()) {
3886 if (it->find(vs.token()) != it->end()) {
3887 found = true;
3888 break;
3889 }
3890 ++it;
3891 }
3892 if (!found) {
3893 auto ptr = vs.token().data() - 1; // include '$' symbol
3894 data.undefined_back_references.emplace_back(vs.token(), ptr);
3895 }
3896 }
3897
3898 // NOTE: Disable packrat parsing if a back reference is not defined in
3899 // captures in the current definition rule.
3900 if (data.captures_in_current_definition.find(vs.token()) ==
3901 data.captures_in_current_definition.end()) {
3902 data.enablePackratParsing = false;
3903 }
3904
3905 return bkr(vs.token_to_string());
3906 };
3907
3908 g["Ignore"] = [](const SemanticValues &vs) { return vs.size() > 0; };
3909
3910 g["Parameters"] = [](const SemanticValues &vs) {
3911 return vs.transform<std::string>();
3912 };
3913
3914 g["Arguments"] = [](const SemanticValues &vs) {
3915 return vs.transform<std::shared_ptr<Ope>>();
3916 };
3917
3918 g["PrecedenceClimbing"] = [](const SemanticValues &vs) {
3920 size_t level = 1;
3921 for (auto v : vs) {
3922 auto tokens = std::any_cast<std::vector<std::string_view>>(v);
3923 auto assoc = tokens[0][0];
3924 for (size_t i = 1; i < tokens.size(); i++) {
3925 binOpeInfo[tokens[i]] = std::pair(level, assoc);
3926 }
3927 level++;
3928 }
3929 Instruction instruction;
3930 instruction.type = "precedence";
3931 instruction.data = binOpeInfo;
3932 instruction.sv = vs.sv();
3933 return instruction;
3934 };
3935 g["PrecedenceInfo"] = [](const SemanticValues &vs) {
3936 return vs.transform<std::string_view>();
3937 };
3938 g["PrecedenceOpe"] = [](const SemanticValues &vs) { return vs.token(); };
3939 g["PrecedenceAssoc"] = [](const SemanticValues &vs) { return vs.token(); };
3940
3941 g["ErrorMessage"] = [](const SemanticValues &vs) {
3942 Instruction instruction;
3943 instruction.type = "error_message";
3944 instruction.data = std::any_cast<std::string>(vs[0]);
3945 instruction.sv = vs.sv();
3946 return instruction;
3947 };
3948
3949 g["NoAstOpt"] = [](const SemanticValues &vs) {
3950 Instruction instruction;
3951 instruction.type = "no_ast_opt";
3952 instruction.sv = vs.sv();
3953 return instruction;
3954 };
3955
3956 g["Instruction"] = [](const SemanticValues &vs) {
3957 return vs.transform<Instruction>();
3958 };
3959 }
3960
3963 const char *s, Log log) {
3964 try {
3965 auto &seq = dynamic_cast<Sequence &>(*rule.get_core_operator());
3966 auto atom = seq.opes_[0];
3967 auto &rep = dynamic_cast<Repetition &>(*seq.opes_[1]);
3968 auto &seq1 = dynamic_cast<Sequence &>(*rep.ope_);
3969 auto binop = seq1.opes_[0];
3970 auto atom1 = seq1.opes_[1];
3971
3972 auto atom_name = dynamic_cast<Reference &>(*atom).name_;
3973 auto binop_name = dynamic_cast<Reference &>(*binop).name_;
3974 auto atom1_name = dynamic_cast<Reference &>(*atom1).name_;
3975
3976 if (!rep.is_zom() || atom_name != atom1_name || atom_name == binop_name) {
3977 if (log) {
3978 auto line = line_info(s, rule.s_);
3979 log(line.first, line.second,
3980 "'precedence' instruction cannot be applied to '" + rule.name +
3981 "'.",
3982 "");
3983 }
3984 return false;
3985 }
3986
3987 rule.holder_->ope_ = pre(atom, binop, info, rule);
3988 rule.disable_action = true;
3989 } catch (...) {
3990 if (log) {
3991 auto line = line_info(s, rule.s_);
3992 log(line.first, line.second,
3993 "'precedence' instruction cannot be applied to '" + rule.name +
3994 "'.",
3995 "");
3996 }
3997 return false;
3998 }
3999 return true;
4000 }
4001
4002 ParserContext perform_core(const char *s, size_t n, const Rules &rules,
4003 Log log, std::string requested_start) {
4004 Data data;
4005 auto &grammar = *data.grammar;
4006
4007 // Built-in macros
4008 {
4009 // `%recover`
4010 {
4011 auto &rule = grammar[RECOVER_DEFINITION_NAME];
4012 rule <= ref(grammar, "x", "", false, {});
4013 rule.name = RECOVER_DEFINITION_NAME;
4014 rule.s_ = "[native]";
4015 rule.ignoreSemanticValue = true;
4016 rule.is_macro = true;
4017 rule.params = {"x"};
4018 }
4019 }
4020
4021 std::any dt = &data;
4022 auto r = g["Grammar"].parse(s, n, dt, nullptr, log);
4023
4024 if (!r.ret) {
4025 if (log) {
4026 if (r.error_info.message_pos) {
4027 auto line = line_info(s, r.error_info.message_pos);
4028 log(line.first, line.second, r.error_info.message,
4029 r.error_info.label);
4030 } else {
4031 auto line = line_info(s, r.error_info.error_pos);
4032 log(line.first, line.second, "syntax error", r.error_info.label);
4033 }
4034 }
4035 return {};
4036 }
4037
4038 // User provided rules
4039 for (auto [user_name, user_rule] : rules) {
4040 auto name = user_name;
4041 auto ignore = false;
4042 if (!name.empty() && name[0] == '~') {
4043 ignore = true;
4044 name.erase(0, 1);
4045 }
4046 if (!name.empty()) {
4047 auto &rule = grammar[name];
4048 rule <= user_rule;
4049 rule.name = name;
4050 rule.ignoreSemanticValue = ignore;
4051 }
4052 }
4053
4054 // Check duplicated definitions
4055 auto ret = true;
4056
4057 if (!data.duplicates_of_definition.empty()) {
4058 for (const auto &[name, ptr] : data.duplicates_of_definition) {
4059 if (log) {
4060 auto line = line_info(s, ptr);
4061 log(line.first, line.second,
4062 "The definition '" + name + "' is already defined.", "");
4063 }
4064 }
4065 ret = false;
4066 }
4067
4068 // Check duplicated instructions
4069 if (!data.duplicates_of_instruction.empty()) {
4070 for (const auto &[type, ptr] : data.duplicates_of_instruction) {
4071 if (log) {
4072 auto line = line_info(s, ptr);
4073 log(line.first, line.second,
4074 "The instruction '" + type + "' is already defined.", "");
4075 }
4076 }
4077 ret = false;
4078 }
4079
4080 // Check undefined back references
4081 if (!data.undefined_back_references.empty()) {
4082 for (const auto &[name, ptr] : data.undefined_back_references) {
4083 if (log) {
4084 auto line = line_info(s, ptr);
4085 log(line.first, line.second,
4086 "The back reference '" + name + "' is undefined.", "");
4087 }
4088 }
4089 ret = false;
4090 }
4091
4092 // Set root definition
4093 auto start = data.start;
4094
4095 if (!requested_start.empty()) {
4096 if (grammar.count(requested_start)) {
4097 start = requested_start;
4098 } else {
4099 if (log) {
4100 auto line = line_info(s, s);
4101 log(line.first, line.second,
4102 "The specified start rule '" + requested_start +
4103 "' is undefined.",
4104 "");
4105 }
4106 ret = false;
4107 }
4108 }
4109
4110 if (!ret) { return {}; }
4111
4112 auto &start_rule = grammar[start];
4113
4114 // Check if the start rule has ignore operator
4115 {
4116 if (start_rule.ignoreSemanticValue) {
4117 if (log) {
4118 auto line = line_info(s, start_rule.s_);
4119 log(line.first, line.second,
4120 "Ignore operator cannot be applied to '" + start_rule.name + "'.",
4121 "");
4122 }
4123 ret = false;
4124 }
4125 }
4126
4127 if (!ret) { return {}; }
4128
4129 // Check missing definitions
4130 auto referenced = std::unordered_set<std::string>{
4134 start_rule.name,
4135 };
4136
4137 for (auto &[_, rule] : grammar) {
4138 ReferenceChecker vis(grammar, rule.params);
4139 rule.accept(vis);
4140 referenced.insert(vis.referenced.begin(), vis.referenced.end());
4141 for (const auto &[name, ptr] : vis.error_s) {
4142 if (log) {
4143 auto line = line_info(s, ptr);
4144 log(line.first, line.second, vis.error_message[name], "");
4145 }
4146 ret = false;
4147 }
4148 }
4149
4150 for (auto &[name, rule] : grammar) {
4151 if (!referenced.count(name)) {
4152 if (log) {
4153 auto line = line_info(s, rule.s_);
4154 auto msg = "'" + name + "' is not referenced.";
4155 log(line.first, line.second, msg, "");
4156 }
4157 }
4158 }
4159
4160 if (!ret) { return {}; }
4161
4162 // Link references
4163 for (auto &x : grammar) {
4164 auto &rule = x.second;
4165 LinkReferences vis(grammar, rule.params);
4166 rule.accept(vis);
4167 }
4168
4169 // Check left recursion
4170 ret = true;
4171
4172 for (auto &[name, rule] : grammar) {
4173 DetectLeftRecursion vis(name);
4174 rule.accept(vis);
4175 if (vis.error_s) {
4176 if (log) {
4177 auto line = line_info(s, vis.error_s);
4178 log(line.first, line.second, "'" + name + "' is left recursive.", "");
4179 }
4180 ret = false;
4181 }
4182 }
4183
4184 if (!ret) { return {}; }
4185
4186 // Check infinite loop
4187 if (detect_infiniteLoop(data, start_rule, log, s)) { return {}; }
4188
4189 // Automatic whitespace skipping
4190 if (grammar.count(WHITESPACE_DEFINITION_NAME)) {
4191 for (auto &x : grammar) {
4192 auto &rule = x.second;
4193 auto ope = rule.get_core_operator();
4194 if (IsLiteralToken::check(*ope)) { rule <= tok(ope); }
4195 }
4196
4197 auto &rule = grammar[WHITESPACE_DEFINITION_NAME];
4198 start_rule.whitespaceOpe = wsp(rule.get_core_operator());
4199
4200 if (detect_infiniteLoop(data, rule, log, s)) { return {}; }
4201 }
4202
4203 // Word expression
4204 if (grammar.count(WORD_DEFINITION_NAME)) {
4205 auto &rule = grammar[WORD_DEFINITION_NAME];
4206 start_rule.wordOpe = rule.get_core_operator();
4207
4208 if (detect_infiniteLoop(data, rule, log, s)) { return {}; }
4209 }
4210
4211 // Apply instructions
4212 for (const auto &[name, instructions] : data.instructions) {
4213 auto &rule = grammar[name];
4214
4215 for (const auto &instruction : instructions) {
4216 if (instruction.type == "precedence") {
4217 const auto &info =
4218 std::any_cast<PrecedenceClimbing::BinOpeInfo>(instruction.data);
4219
4220 if (!apply_precedence_instruction(rule, info, s, log)) { return {}; }
4221 } else if (instruction.type == "error_message") {
4222 rule.error_message = std::any_cast<std::string>(instruction.data);
4223 } else if (instruction.type == "no_ast_opt") {
4224 rule.no_ast_opt = true;
4225 }
4226 }
4227 }
4228
4229 return {data.grammar, start, data.enablePackratParsing};
4230 }
4231
4232 bool detect_infiniteLoop(const Data &data, Definition &rule, const Log &log,
4233 const char *s) const {
4234 std::vector<std::pair<const char *, std::string>> refs;
4235 std::unordered_map<std::string, bool> has_error_cache;
4236 DetectInfiniteLoop vis(data.start_pos, rule.name, refs, has_error_cache);
4237 rule.accept(vis);
4238 if (vis.has_error) {
4239 if (log) {
4240 auto line = line_info(s, vis.error_s);
4241 log(line.first, line.second,
4242 "infinite loop is detected in '" + vis.error_name + "'.", "");
4243 }
4244 return true;
4245 }
4246 return false;
4247 }
4248
4250};
4251
4252/*-----------------------------------------------------------------------------
4253 * AST
4254 *---------------------------------------------------------------------------*/
4255
4256template <typename Annotation> struct AstBase : public Annotation {
4257 AstBase(const char *path, size_t line, size_t column, const char *name,
4258 const std::vector<std::shared_ptr<AstBase>> &nodes,
4259 size_t position = 0, size_t length = 0, size_t choice_count = 0,
4260 size_t choice = 0)
4261 : path(path ? path : ""), line(line), column(column), name(name),
4266
4267 AstBase(const char *path, size_t line, size_t column, const char *name,
4268 const std::string_view &token, size_t position = 0, size_t length = 0,
4269 size_t choice_count = 0, size_t choice = 0)
4270 : path(path ? path : ""), line(line), column(column), name(name),
4275
4276 AstBase(const AstBase &ast, const char *original_name, size_t position = 0,
4277 size_t length = 0, size_t original_choice_count = 0,
4278 size_t original_choice = 0)
4279 : path(ast.path), line(ast.line), column(ast.column), name(ast.name),
4285 token(ast.token), nodes(ast.nodes), parent(ast.parent) {}
4286
4287 const std::string path;
4288 const size_t line = 1;
4289 const size_t column = 1;
4290
4291 const std::string name;
4292 size_t position;
4293 size_t length;
4294 const size_t choice_count;
4295 const size_t choice;
4296 const std::string original_name;
4298 const size_t original_choice;
4299 const unsigned int tag;
4300 const unsigned int original_tag;
4301
4302 const bool is_token;
4303 const std::string_view token;
4304
4305 std::vector<std::shared_ptr<AstBase<Annotation>>> nodes;
4306 std::weak_ptr<AstBase<Annotation>> parent;
4307
4308 std::string token_to_string() const {
4309 assert(is_token);
4310 return std::string(token);
4311 }
4312
4313 template <typename T> T token_to_number() const {
4314 return token_to_number_<T>(token);
4315 }
4316};
4317
4318template <typename T>
4319void ast_to_s_core(const std::shared_ptr<T> &ptr, std::string &s, int level,
4320 std::function<std::string(const T &ast, int level)> fn) {
4321 const auto &ast = *ptr;
4322 for (auto i = 0; i < level; i++) {
4323 s += " ";
4324 }
4325 auto name = ast.original_name;
4326 if (ast.original_choice_count > 0) {
4327 name += "/" + std::to_string(ast.original_choice);
4328 }
4329 if (ast.name != ast.original_name) { name += "[" + ast.name + "]"; }
4330 if (ast.is_token) {
4331 s += "- " + name + " (";
4332 s += ast.token;
4333 s += ")\n";
4334 } else {
4335 s += "+ " + name + "\n";
4336 }
4337 if (fn) { s += fn(ast, level + 1); }
4338 for (auto node : ast.nodes) {
4339 ast_to_s_core(node, s, level + 1, fn);
4340 }
4341}
4342
4343template <typename T>
4344std::string
4345ast_to_s(const std::shared_ptr<T> &ptr,
4346 std::function<std::string(const T &ast, int level)> fn = nullptr) {
4347 std::string s;
4348 ast_to_s_core(ptr, s, 0, fn);
4349 return s;
4350}
4351
4353 AstOptimizer(bool mode, const std::vector<std::string> &rules = {})
4354 : mode_(mode), rules_(rules) {}
4355
4356 template <typename T>
4357 std::shared_ptr<T> optimize(std::shared_ptr<T> original,
4358 std::shared_ptr<T> parent = nullptr) {
4359 auto found =
4360 std::find(rules_.begin(), rules_.end(), original->name) != rules_.end();
4361 auto opt = mode_ ? !found : found;
4362
4363 if (opt && original->nodes.size() == 1) {
4364 auto child = optimize(original->nodes[0], parent);
4365 auto ast = std::make_shared<T>(*child, original->name.data(),
4366 original->choice_count, original->position,
4367 original->length, original->choice);
4368 for (auto node : ast->nodes) {
4369 node->parent = ast;
4370 }
4371 return ast;
4372 }
4373
4374 auto ast = std::make_shared<T>(*original);
4375 ast->parent = parent;
4376 ast->nodes.clear();
4377 for (auto node : original->nodes) {
4378 auto child = optimize(node, ast);
4379 ast->nodes.push_back(child);
4380 }
4381 return ast;
4382 }
4383
4384private:
4385 const bool mode_;
4386 const std::vector<std::string> rules_;
4387};
4388
4389struct EmptyType {};
4390using Ast = AstBase<EmptyType>;
4391
4392template <typename T = Ast> void add_ast_action(Definition &rule) {
4393 rule.action = [&](const SemanticValues &vs) {
4394 auto line = vs.line_info();
4395
4396 if (rule.is_token()) {
4397 return std::make_shared<T>(
4398 vs.path, line.first, line.second, rule.name.data(), vs.token(),
4399 std::distance(vs.ss, vs.sv().data()), vs.sv().length(),
4400 vs.choice_count(), vs.choice());
4401 }
4402
4403 auto ast =
4404 std::make_shared<T>(vs.path, line.first, line.second, rule.name.data(),
4405 vs.transform<std::shared_ptr<T>>(),
4406 std::distance(vs.ss, vs.sv().data()),
4407 vs.sv().length(), vs.choice_count(), vs.choice());
4408
4409 for (auto node : ast->nodes) {
4410 node->parent = ast;
4411 }
4412 return ast;
4413 };
4414}
4415
4416#define PEG_EXPAND(...) __VA_ARGS__
4417#define PEG_CONCAT(a, b) a##b
4418#define PEG_CONCAT2(a, b) PEG_CONCAT(a, b)
4419
4420#define PEG_PICK( \
4421 a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, \
4422 a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, \
4423 a32, a33, a34, a35, a36, a37, a38, a39, a40, a41, a42, a43, a44, a45, a46, \
4424 a47, a48, a49, a50, a51, a52, a53, a54, a55, a56, a57, a58, a59, a60, a61, \
4425 a62, a63, a64, a65, a66, a67, a68, a69, a70, a71, a72, a73, a74, a75, a76, \
4426 a77, a78, a79, a80, a81, a82, a83, a84, a85, a86, a87, a88, a89, a90, a91, \
4427 a92, a93, a94, a95, a96, a97, a98, a99, a100, ...) \
4428 a100
4429
4430#define PEG_COUNT(...) \
4431 PEG_EXPAND(PEG_PICK( \
4432 __VA_ARGS__, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91, 90, 89, 88, 87, \
4433 86, 85, 84, 83, 82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, \
4434 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, \
4435 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, \
4436 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, \
4437 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0))
4438
4439#define PEG_DEF_1(r) \
4440 peg::Definition r; \
4441 r.name = #r; \
4442 peg::add_ast_action(r);
4443
4444#define PEG_DEF_2(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_1(__VA_ARGS__))
4445#define PEG_DEF_3(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_2(__VA_ARGS__))
4446#define PEG_DEF_4(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_3(__VA_ARGS__))
4447#define PEG_DEF_5(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_4(__VA_ARGS__))
4448#define PEG_DEF_6(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_5(__VA_ARGS__))
4449#define PEG_DEF_7(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_6(__VA_ARGS__))
4450#define PEG_DEF_8(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_7(__VA_ARGS__))
4451#define PEG_DEF_9(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_8(__VA_ARGS__))
4452#define PEG_DEF_10(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_9(__VA_ARGS__))
4453#define PEG_DEF_11(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_10(__VA_ARGS__))
4454#define PEG_DEF_12(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_11(__VA_ARGS__))
4455#define PEG_DEF_13(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_12(__VA_ARGS__))
4456#define PEG_DEF_14(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_13(__VA_ARGS__))
4457#define PEG_DEF_15(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_14(__VA_ARGS__))
4458#define PEG_DEF_16(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_15(__VA_ARGS__))
4459#define PEG_DEF_17(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_16(__VA_ARGS__))
4460#define PEG_DEF_18(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_17(__VA_ARGS__))
4461#define PEG_DEF_19(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_18(__VA_ARGS__))
4462#define PEG_DEF_20(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_19(__VA_ARGS__))
4463#define PEG_DEF_21(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_20(__VA_ARGS__))
4464#define PEG_DEF_22(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_21(__VA_ARGS__))
4465#define PEG_DEF_23(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_22(__VA_ARGS__))
4466#define PEG_DEF_24(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_23(__VA_ARGS__))
4467#define PEG_DEF_25(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_24(__VA_ARGS__))
4468#define PEG_DEF_26(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_25(__VA_ARGS__))
4469#define PEG_DEF_27(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_26(__VA_ARGS__))
4470#define PEG_DEF_28(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_27(__VA_ARGS__))
4471#define PEG_DEF_29(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_28(__VA_ARGS__))
4472#define PEG_DEF_30(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_29(__VA_ARGS__))
4473#define PEG_DEF_31(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_30(__VA_ARGS__))
4474#define PEG_DEF_32(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_31(__VA_ARGS__))
4475#define PEG_DEF_33(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_32(__VA_ARGS__))
4476#define PEG_DEF_34(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_33(__VA_ARGS__))
4477#define PEG_DEF_35(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_34(__VA_ARGS__))
4478#define PEG_DEF_36(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_35(__VA_ARGS__))
4479#define PEG_DEF_37(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_36(__VA_ARGS__))
4480#define PEG_DEF_38(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_37(__VA_ARGS__))
4481#define PEG_DEF_39(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_38(__VA_ARGS__))
4482#define PEG_DEF_40(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_39(__VA_ARGS__))
4483#define PEG_DEF_41(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_40(__VA_ARGS__))
4484#define PEG_DEF_42(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_41(__VA_ARGS__))
4485#define PEG_DEF_43(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_42(__VA_ARGS__))
4486#define PEG_DEF_44(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_43(__VA_ARGS__))
4487#define PEG_DEF_45(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_44(__VA_ARGS__))
4488#define PEG_DEF_46(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_45(__VA_ARGS__))
4489#define PEG_DEF_47(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_46(__VA_ARGS__))
4490#define PEG_DEF_48(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_47(__VA_ARGS__))
4491#define PEG_DEF_49(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_48(__VA_ARGS__))
4492#define PEG_DEF_50(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_49(__VA_ARGS__))
4493#define PEG_DEF_51(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_50(__VA_ARGS__))
4494#define PEG_DEF_52(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_51(__VA_ARGS__))
4495#define PEG_DEF_53(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_52(__VA_ARGS__))
4496#define PEG_DEF_54(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_53(__VA_ARGS__))
4497#define PEG_DEF_55(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_54(__VA_ARGS__))
4498#define PEG_DEF_56(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_55(__VA_ARGS__))
4499#define PEG_DEF_57(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_56(__VA_ARGS__))
4500#define PEG_DEF_58(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_57(__VA_ARGS__))
4501#define PEG_DEF_59(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_58(__VA_ARGS__))
4502#define PEG_DEF_60(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_59(__VA_ARGS__))
4503#define PEG_DEF_61(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_60(__VA_ARGS__))
4504#define PEG_DEF_62(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_61(__VA_ARGS__))
4505#define PEG_DEF_63(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_62(__VA_ARGS__))
4506#define PEG_DEF_64(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_63(__VA_ARGS__))
4507#define PEG_DEF_65(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_64(__VA_ARGS__))
4508#define PEG_DEF_66(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_65(__VA_ARGS__))
4509#define PEG_DEF_67(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_66(__VA_ARGS__))
4510#define PEG_DEF_68(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_67(__VA_ARGS__))
4511#define PEG_DEF_69(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_68(__VA_ARGS__))
4512#define PEG_DEF_70(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_69(__VA_ARGS__))
4513#define PEG_DEF_71(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_70(__VA_ARGS__))
4514#define PEG_DEF_72(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_71(__VA_ARGS__))
4515#define PEG_DEF_73(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_72(__VA_ARGS__))
4516#define PEG_DEF_74(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_73(__VA_ARGS__))
4517#define PEG_DEF_75(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_74(__VA_ARGS__))
4518#define PEG_DEF_76(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_75(__VA_ARGS__))
4519#define PEG_DEF_77(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_76(__VA_ARGS__))
4520#define PEG_DEF_78(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_77(__VA_ARGS__))
4521#define PEG_DEF_79(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_78(__VA_ARGS__))
4522#define PEG_DEF_80(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_79(__VA_ARGS__))
4523#define PEG_DEF_81(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_80(__VA_ARGS__))
4524#define PEG_DEF_82(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_81(__VA_ARGS__))
4525#define PEG_DEF_83(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_82(__VA_ARGS__))
4526#define PEG_DEF_84(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_83(__VA_ARGS__))
4527#define PEG_DEF_85(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_84(__VA_ARGS__))
4528#define PEG_DEF_86(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_85(__VA_ARGS__))
4529#define PEG_DEF_87(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_86(__VA_ARGS__))
4530#define PEG_DEF_88(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_87(__VA_ARGS__))
4531#define PEG_DEF_89(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_88(__VA_ARGS__))
4532#define PEG_DEF_90(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_89(__VA_ARGS__))
4533#define PEG_DEF_91(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_90(__VA_ARGS__))
4534#define PEG_DEF_92(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_91(__VA_ARGS__))
4535#define PEG_DEF_93(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_92(__VA_ARGS__))
4536#define PEG_DEF_94(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_93(__VA_ARGS__))
4537#define PEG_DEF_95(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_94(__VA_ARGS__))
4538#define PEG_DEF_96(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_95(__VA_ARGS__))
4539#define PEG_DEF_97(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_96(__VA_ARGS__))
4540#define PEG_DEF_98(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_97(__VA_ARGS__))
4541#define PEG_DEF_99(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_98(__VA_ARGS__))
4542#define PEG_DEF_100(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_99(__VA_ARGS__))
4543
4544#define AST_DEFINITIONS(...) \
4545 PEG_EXPAND(PEG_CONCAT2(PEG_DEF_, PEG_COUNT(__VA_ARGS__))(__VA_ARGS__))
4546
4547/*-----------------------------------------------------------------------------
4548 * parser
4549 *---------------------------------------------------------------------------*/
4550
4551class parser {
4552public:
4553 parser() = default;
4554
4555 parser(const char *s, size_t n, const Rules &rules,
4556 std::string_view start = {}) {
4557 load_grammar(s, n, rules, start);
4558 }
4559
4560 parser(const char *s, size_t n, std::string_view start = {})
4561 : parser(s, n, Rules(), start) {}
4562
4563 parser(std::string_view sv, const Rules &rules, std::string_view start = {})
4564 : parser(sv.data(), sv.size(), rules, start) {}
4565
4566 parser(std::string_view sv, std::string_view start = {})
4567 : parser(sv.data(), sv.size(), Rules(), start) {}
4568
4569#if defined(__cpp_lib_char8_t)
4570 parser(std::u8string_view sv, const Rules &rules, std::string_view start = {})
4571 : parser(reinterpret_cast<const char *>(sv.data()), sv.size(), rules,
4572 start) {}
4573
4574 parser(std::u8string_view sv, std::string_view start = {})
4575 : parser(reinterpret_cast<const char *>(sv.data()), sv.size(), Rules(),
4576 start) {}
4577#endif
4578
4579 operator bool() { return grammar_ != nullptr; }
4580
4581 bool load_grammar(const char *s, size_t n, const Rules &rules,
4582 std::string_view start = {}) {
4583 auto cxt = ParserGenerator::parse(s, n, rules, log_, start);
4584 grammar_ = cxt.grammar;
4585 start_ = cxt.start;
4586 enablePackratParsing_ = cxt.enablePackratParsing;
4587 return grammar_ != nullptr;
4588 }
4589
4590 bool load_grammar(const char *s, size_t n, std::string_view start = {}) {
4591 return load_grammar(s, n, Rules(), start);
4592 }
4593
4594 bool load_grammar(std::string_view sv, const Rules &rules,
4595 std::string_view start = {}) {
4596 return load_grammar(sv.data(), sv.size(), rules, start);
4597 }
4598
4599 bool load_grammar(std::string_view sv, std::string_view start = {}) {
4600 return load_grammar(sv.data(), sv.size(), start);
4601 }
4602
4603 bool parse_n(const char *s, size_t n, const char *path = nullptr) const {
4604 if (grammar_ != nullptr) {
4605 const auto &rule = (*grammar_)[start_];
4606 auto result = rule.parse(s, n, path, log_);
4607 return post_process(s, n, result);
4608 }
4609 return false;
4610 }
4611
4612 bool parse_n(const char *s, size_t n, std::any &dt,
4613 const char *path = nullptr) const {
4614 if (grammar_ != nullptr) {
4615 const auto &rule = (*grammar_)[start_];
4616 auto result = rule.parse(s, n, dt, path, log_);
4617 return post_process(s, n, result);
4618 }
4619 return false;
4620 }
4621
4622 template <typename T>
4623 bool parse_n(const char *s, size_t n, T &val,
4624 const char *path = nullptr) const {
4625 if (grammar_ != nullptr) {
4626 const auto &rule = (*grammar_)[start_];
4627 auto result = rule.parse_and_get_value(s, n, val, path, log_);
4628 return post_process(s, n, result);
4629 }
4630 return false;
4631 }
4632
4633 template <typename T>
4634 bool parse_n(const char *s, size_t n, std::any &dt, T &val,
4635 const char *path = nullptr) const {
4636 if (grammar_ != nullptr) {
4637 const auto &rule = (*grammar_)[start_];
4638 auto result = rule.parse_and_get_value(s, n, dt, val, path, log_);
4639 return post_process(s, n, result);
4640 }
4641 return false;
4642 }
4643
4644 bool parse(std::string_view sv, const char *path = nullptr) const {
4645 return parse_n(sv.data(), sv.size(), path);
4646 }
4647
4648 bool parse(std::string_view sv, std::any &dt,
4649 const char *path = nullptr) const {
4650 return parse_n(sv.data(), sv.size(), dt, path);
4651 }
4652
4653 template <typename T>
4654 bool parse(std::string_view sv, T &val, const char *path = nullptr) const {
4655 return parse_n(sv.data(), sv.size(), val, path);
4656 }
4657
4658 template <typename T>
4659 bool parse(std::string_view sv, std::any &dt, T &val,
4660 const char *path = nullptr) const {
4661 return parse_n(sv.data(), sv.size(), dt, val, path);
4662 }
4663
4664#if defined(__cpp_lib_char8_t)
4665 bool parse(std::u8string_view sv, const char *path = nullptr) const {
4666 return parse_n(reinterpret_cast<const char *>(sv.data()), sv.size(), path);
4667 }
4668
4669 bool parse(std::u8string_view sv, std::any &dt,
4670 const char *path = nullptr) const {
4671 return parse_n(reinterpret_cast<const char *>(sv.data()), sv.size(), dt,
4672 path);
4673 }
4674
4675 template <typename T>
4676 bool parse(std::u8string_view sv, T &val, const char *path = nullptr) const {
4677 return parse_n(reinterpret_cast<const char *>(sv.data()), sv.size(), val,
4678 path);
4679 }
4680
4681 template <typename T>
4682 bool parse(std::u8string_view sv, std::any &dt, T &val,
4683 const char *path = nullptr) const {
4684 return parse_n(reinterpret_cast<const char *>(sv.data()), sv.size(), dt,
4685 val, path);
4686 }
4687#endif
4688
4689 Definition &operator[](const char *s) { return (*grammar_)[s]; }
4690
4691 const Definition &operator[](const char *s) const { return (*grammar_)[s]; }
4692
4693 const Grammar &get_grammar() const { return *grammar_; }
4694
4696 if (grammar_ != nullptr) {
4697 auto &rule = (*grammar_)[start_];
4698 rule.eoi_check = false;
4699 }
4700 }
4701
4703 if (grammar_ != nullptr) {
4704 auto &rule = (*grammar_)[start_];
4705 rule.enablePackratParsing = enablePackratParsing_;
4706 }
4707 }
4708
4709 void enable_trace(TracerEnter tracer_enter, TracerLeave tracer_leave) {
4710 if (grammar_ != nullptr) {
4711 auto &rule = (*grammar_)[start_];
4712 rule.tracer_enter = tracer_enter;
4713 rule.tracer_leave = tracer_leave;
4714 }
4715 }
4716
4717 void enable_trace(TracerEnter tracer_enter, TracerLeave tracer_leave,
4718 TracerStartOrEnd tracer_start,
4719 TracerStartOrEnd tracer_end) {
4720 if (grammar_ != nullptr) {
4721 auto &rule = (*grammar_)[start_];
4722 rule.tracer_enter = tracer_enter;
4723 rule.tracer_leave = tracer_leave;
4724 rule.tracer_start = tracer_start;
4725 rule.tracer_end = tracer_end;
4726 }
4727 }
4728
4729 void set_verbose_trace(bool verbose_trace) {
4730 if (grammar_ != nullptr) {
4731 auto &rule = (*grammar_)[start_];
4732 rule.verbose_trace = verbose_trace;
4733 }
4734 }
4735
4736 template <typename T = Ast> parser &enable_ast() {
4737 for (auto &[_, rule] : *grammar_) {
4738 if (!rule.action) { add_ast_action<T>(rule); }
4739 }
4740 return *this;
4741 }
4742
4743 template <typename T>
4744 std::shared_ptr<T> optimize_ast(std::shared_ptr<T> ast,
4745 bool opt_mode = true) const {
4746 return AstOptimizer(opt_mode, get_no_ast_opt_rules()).optimize(ast);
4747 }
4748
4749 void set_logger(Log log) { log_ = log; }
4750
4752 std::function<void(size_t line, size_t col, const std::string &msg)>
4753 log) {
4754 log_ = [log](size_t line, size_t col, const std::string &msg,
4755 const std::string & /*rule*/) { log(line, col, msg); };
4756 }
4757
4758private:
4759 bool post_process(const char *s, size_t n, Definition::Result &r) const {
4760 if (log_ && !r.ret) { r.error_info.output_log(log_, s, n); }
4761 return r.ret && !r.recovered;
4762 }
4763
4764 std::vector<std::string> get_no_ast_opt_rules() const {
4765 std::vector<std::string> rules;
4766 for (auto &[name, rule] : *grammar_) {
4767 if (rule.no_ast_opt) { rules.push_back(name); }
4768 }
4769 return rules;
4770 }
4771
4772 std::shared_ptr<Grammar> grammar_;
4773 std::string start_;
4776};
4777
4778/*-----------------------------------------------------------------------------
4779 * enable_tracing
4780 *---------------------------------------------------------------------------*/
4781
4782inline void enable_tracing(parser &parser, std::ostream &os) {
4784 [&](auto &ope, auto s, auto, auto &, auto &c, auto &, auto &trace_data) {
4785 auto prev_pos = std::any_cast<size_t>(trace_data);
4786 auto pos = static_cast<size_t>(s - c.s);
4787 auto backtrack = (pos < prev_pos ? "*" : "");
4788 std::string indent;
4789 auto level = c.trace_ids.size() - 1;
4790 while (level--) {
4791 indent += "│";
4792 }
4793 std::string name;
4794 {
4795 name = peg::TraceOpeName::get(const_cast<peg::Ope &>(ope));
4796
4797 auto lit = dynamic_cast<const peg::LiteralString *>(&ope);
4798 if (lit) { name += " '" + peg::escape_characters(lit->lit_) + "'"; }
4799 }
4800 os << "E " << pos + 1 << backtrack << "\t" << indent << "┌" << name
4801 << " #" << c.trace_ids.back() << std::endl;
4802 trace_data = static_cast<size_t>(pos);
4803 },
4804 [&](auto &ope, auto s, auto, auto &sv, auto &c, auto &, auto len,
4805 auto &) {
4806 auto pos = static_cast<size_t>(s - c.s);
4807 if (len != static_cast<size_t>(-1)) { pos += len; }
4808 std::string indent;
4809 auto level = c.trace_ids.size() - 1;
4810 while (level--) {
4811 indent += "│";
4812 }
4813 auto ret = len != static_cast<size_t>(-1) ? "└o " : "└x ";
4814 auto name = peg::TraceOpeName::get(const_cast<peg::Ope &>(ope));
4815 std::stringstream choice;
4816 if (sv.choice_count() > 0) {
4817 choice << " " << sv.choice() << "/" << sv.choice_count();
4818 }
4819 std::string token;
4820 if (!sv.tokens.empty()) {
4821 token += ", token '";
4822 token += sv.tokens[0];
4823 token += "'";
4824 }
4825 std::string matched;
4826 if (peg::success(len) &&
4827 peg::TokenChecker::is_token(const_cast<peg::Ope &>(ope))) {
4828 matched = ", match '" + peg::escape_characters(s, len) + "'";
4829 }
4830 os << "L " << pos + 1 << "\t" << indent << ret << name << " #"
4831 << c.trace_ids.back() << choice.str() << token << matched
4832 << std::endl;
4833 },
4834 [&](auto &trace_data) { trace_data = static_cast<size_t>(0); },
4835 [&](auto &) {});
4836}
4837
4838/*-----------------------------------------------------------------------------
4839 * enable_profiling
4840 *---------------------------------------------------------------------------*/
4841
4842inline void enable_profiling(parser &parser, std::ostream &os) {
4843 struct Stats {
4844 struct Item {
4845 std::string name;
4846 size_t success;
4847 size_t fail;
4848 };
4849 std::vector<Item> items;
4850 std::map<std::string, size_t> index;
4851 size_t total = 0;
4852 std::chrono::steady_clock::time_point start;
4853 };
4854
4856 [&](auto &ope, auto, auto, auto &, auto &, auto &, std::any &trace_data) {
4857 if (auto holder = dynamic_cast<const peg::Holder *>(&ope)) {
4858 auto &stats = *std::any_cast<Stats *>(trace_data);
4859
4860 auto &name = holder->name();
4861 if (stats.index.find(name) == stats.index.end()) {
4862 stats.index[name] = stats.index.size();
4863 stats.items.push_back({name, 0, 0});
4864 }
4865 stats.total++;
4866 }
4867 },
4868 [&](auto &ope, auto, auto, auto &, auto &, auto &, auto len,
4869 std::any &trace_data) {
4870 if (auto holder = dynamic_cast<const peg::Holder *>(&ope)) {
4871 auto &stats = *std::any_cast<Stats *>(trace_data);
4872
4873 auto &name = holder->name();
4874 auto index = stats.index[name];
4875 auto &stat = stats.items[index];
4876 if (len != static_cast<size_t>(-1)) {
4877 stat.success++;
4878 } else {
4879 stat.fail++;
4880 }
4881
4882 if (index == 0) {
4883 auto end = std::chrono::steady_clock::now();
4884 auto nano = std::chrono::duration_cast<std::chrono::microseconds>(
4885 end - stats.start)
4886 .count();
4887 auto sec = nano / 1000000.0;
4888 os << "duration: " << sec << "s (" << nano << "µs)" << std::endl
4889 << std::endl;
4890
4891 char buff[BUFSIZ];
4892 size_t total_success = 0;
4893 size_t total_fail = 0;
4894 for (auto &[name_, success, fail] : stats.items) {
4895 total_success += success;
4896 total_fail += fail;
4897 }
4898
4899 os << " id total % success fail "
4900 "definition"
4901 << std::endl;
4902
4903 auto grand_total = total_success + total_fail;
4904 snprintf(buff, BUFSIZ, "%4s %10zu %5s %10zu %10zu %s", "",
4905 grand_total, "", total_success, total_fail,
4906 "Total counters");
4907 os << buff << std::endl;
4908
4909 snprintf(buff, BUFSIZ, "%4s %10s %5s %10.2f %10.2f %s", "", "",
4910 "", total_success * 100.0 / grand_total,
4911 total_fail * 100.0 / grand_total, "% success/fail");
4912 os << buff << std::endl << std::endl;
4913 ;
4914
4915 size_t id = 0;
4916 for (auto &[name_, success, fail] : stats.items) {
4917 auto total = success + fail;
4918 auto ratio = total * 100.0 / stats.total;
4919 snprintf(buff, BUFSIZ, "%4zu %10zu %5.2f %10zu %10zu %s", id,
4920 total, ratio, success, fail, name.c_str());
4921 os << buff << std::endl;
4922 id++;
4923 }
4924 }
4925 }
4926 },
4927 [&](auto &trace_data) {
4928 auto stats = new Stats{};
4929 stats->start = std::chrono::steady_clock::now();
4930 trace_data = stats;
4931 },
4932 [&](auto &trace_data) {
4933 auto stats = std::any_cast<Stats *>(trace_data);
4934 delete stats;
4935 });
4936}
4937} // namespace peg
Definition peglib.h:638
void operator=(F fn)
Definition peglib.h:643
Action()=default
Action(Action &&rhs)=default
Fty make_adaptor(F fn)
Definition peglib.h:655
std::function< std::any(SemanticValues &vs, std::any &dt)> Fty
Definition peglib.h:653
Fty fn_
Definition peglib.h:663
Action(F fn)
Definition peglib.h:642
std::any operator()(SemanticValues &vs, std::any &dt) const
Definition peglib.h:648
Action & operator=(const Action &rhs)=default
Definition peglib.h:1138
size_t parse_core(const char *s, size_t n, SemanticValues &, Context &c, std::any &dt) const override
Definition peglib.h:1142
AndPredicate(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1140
std::shared_ptr< Ope > ope_
Definition peglib.h:1158
void accept(Visitor &v) override
Definition peglib.h:3081
Definition peglib.h:1310
size_t parse_core(const char *s, size_t n, SemanticValues &, Context &c, std::any &) const override
Definition peglib.h:1312
void accept(Visitor &v) override
Definition peglib.h:3087
Definition peglib.h:1488
std::string name_
Definition peglib.h:1499
BackReference(const std::string &name)
Definition peglib.h:1492
BackReference(std::string &&name)
Definition peglib.h:1490
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:2921
void accept(Visitor &v) override
Definition peglib.h:3097
Definition peglib.h:1325
void accept(Visitor &v) override
Definition peglib.h:3088
CaptureScope(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1327
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:1329
std::shared_ptr< Ope > ope_
Definition peglib.h:1338
Definition peglib.h:1341
MatchAction match_action_
Definition peglib.h:1358
std::function< void(const char *s, size_t n, Context &c)> MatchAction
Definition peglib.h:1343
std::shared_ptr< Ope > ope_
Definition peglib.h:1357
void accept(Visitor &v) override
Definition peglib.h:3089
Capture(const std::shared_ptr< Ope > &ope, MatchAction ma)
Definition peglib.h:1345
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:1348
Definition peglib.h:1217
bool negated_
Definition peglib.h:1287
bool ignore_case_
Definition peglib.h:1288
std::vector< std::pair< char32_t, char32_t > > ranges_
Definition peglib.h:1286
CharacterClass(const std::string &s, bool negated, bool ignore_case)
Definition peglib.h:1219
size_t parse_core(const char *s, size_t n, SemanticValues &, Context &c, std::any &) const override
Definition peglib.h:1244
bool in_range(const std::pair< char32_t, char32_t > &range, char32_t cp) const
Definition peglib.h:1276
CharacterClass(const std::vector< std::pair< char32_t, char32_t > > &ranges, bool negated, bool ignore_case)
Definition peglib.h:1238
void accept(Visitor &v) override
Definition peglib.h:3085
Definition peglib.h:1291
void accept(Visitor &v) override
Definition peglib.h:3086
size_t parse_core(const char *s, size_t n, SemanticValues &, Context &c, std::any &) const override
Definition peglib.h:1295
char ch_
Definition peglib.h:1306
Character(char ch)
Definition peglib.h:1293
Definition peglib.h:763
size_t in_token_boundary_count
Definition peglib.h:778
std::vector< Definition * > rule_stack
Definition peglib.h:775
void trace_leave(const Ope &ope, const char *a_s, size_t n, const SemanticValues &vs, std::any &dt, size_t len)
Definition peglib.h:2661
std::once_flag source_line_index_init_
Definition peglib.h:968
std::shared_ptr< Ope > wordOpe
Definition peglib.h:783
void trace_enter(const Ope &ope, const char *a_s, size_t n, const SemanticValues &vs, std::any &dt)
Definition peglib.h:2655
TracerEnter tracer_enter
Definition peglib.h:798
std::vector< bool > cut_stack
Definition peglib.h:788
const std::vector< std::shared_ptr< Ope > > & top_args() const
Definition peglib.h:907
std::vector< bool > cache_success
Definition peglib.h:793
Context operator=(const Context &)=delete
std::shared_ptr< Ope > whitespaceOpe
Definition peglib.h:780
std::map< std::pair< size_t, size_t >, std::tuple< size_t, std::any > > cache_values
Definition peglib.h:796
const size_t def_count
Definition peglib.h:790
Context(Context &&)=delete
Context(const char *path, const char *s, size_t l, size_t def_count, std::shared_ptr< Ope > whitespaceOpe, std::shared_ptr< Ope > wordOpe, bool enablePackratParsing, TracerEnter tracer_enter, TracerLeave tracer_leave, std::any trace_data, bool verbose_trace, Log log)
Definition peglib.h:805
const bool verbose_trace
Definition peglib.h:801
Log log
Definition peglib.h:803
void push_capture_scope()
Definition peglib.h:912
const char * s
Definition peglib.h:766
size_t next_trace_id
Definition peglib.h:965
void shift_capture_values()
Definition peglib.h:926
SemanticValues & push_semantic_values_scope()
Definition peglib.h:876
void pop_semantic_values_scope()
Definition peglib.h:898
bool is_traceable(const Ope &ope) const
Definition peglib.h:2668
const char * path
Definition peglib.h:765
std::vector< std::map< std::string_view, std::string > > capture_scope_stack
Definition peglib.h:785
std::any trace_data
Definition peglib.h:800
size_t capture_scope_stack_size
Definition peglib.h:786
TracerLeave tracer_leave
Definition peglib.h:799
std::vector< size_t > trace_ids
Definition peglib.h:966
size_t value_stack_size
Definition peglib.h:773
const size_t l
Definition peglib.h:767
void push_args(std::vector< std::shared_ptr< Ope > > &&args)
Definition peglib.h:901
void pop_args()
Definition peglib.h:905
ErrorInfo error_info
Definition peglib.h:769
std::vector< bool > cache_registered
Definition peglib.h:792
bool in_whitespace
Definition peglib.h:781
std::pair< size_t, size_t > line_info(const char *cur) const
Definition peglib.h:946
void pop_capture_scope()
Definition peglib.h:924
void pop()
Definition peglib.h:870
std::vector< size_t > source_line_index
Definition peglib.h:969
bool recovered
Definition peglib.h:770
SemanticValues & push()
Definition peglib.h:865
std::vector< std::shared_ptr< SemanticValues > > value_stack
Definition peglib.h:772
~Context()
Definition peglib.h:821
void packrat(const char *a_s, size_t def_id, size_t &len, std::any &val, T fn)
Definition peglib.h:834
const bool enablePackratParsing
Definition peglib.h:791
Context(const Context &)=delete
void set_error_pos(const char *a_s, const char *literal=nullptr)
Definition peglib.h:2621
bool ignore_trace_state
Definition peglib.h:967
std::vector< std::vector< std::shared_ptr< Ope > > > args_stack
Definition peglib.h:776
Definition peglib.h:1542
void accept(Visitor &v) override
Definition peglib.h:3100
size_t parse_core(const char *, size_t, SemanticValues &, Context &c, std::any &) const override
Definition peglib.h:1544
Definition peglib.h:2206
std::shared_ptr< Ope > wordOpe
Definition peglib.h:2394
bool is_macro
Definition peglib.h:2396
bool ignoreSemanticValue
Definition peglib.h:2392
bool eoi_check
Definition peglib.h:2409
Definition & operator<=(const std::shared_ptr< Ope > &ope)
Definition peglib.h:2230
std::function< void(const Context &c, const char *s, size_t n, size_t matchlen, std::any &value, std::any &dt)> leave
Definition peglib.h:2391
Definition()
Definition peglib.h:2215
Result parse(const char *s, std::any &dt, const char *path=nullptr, Log log=nullptr) const
Definition peglib.h:2254
bool disable_action
Definition peglib.h:2398
TracerEnter tracer_enter
Definition peglib.h:2400
std::vector< std::string > params
Definition peglib.h:2397
Definition & operator~()
Definition peglib.h:2361
Result parse(const char *s, size_t n, const char *path=nullptr, Log log=nullptr) const
Definition peglib.h:2235
bool enablePackratParsing
Definition peglib.h:2395
std::pair< size_t, size_t > line_
Definition peglib.h:2379
friend class ParserGenerator
Definition peglib.h:2413
std::once_flag is_token_init_
Definition peglib.h:2476
TracerStartOrEnd tracer_end
Definition peglib.h:2404
std::once_flag definition_ids_init_
Definition peglib.h:2479
bool no_ast_opt
Definition peglib.h:2407
Definition & operator,(T fn)
Definition peglib.h:2356
friend class Reference
Definition peglib.h:2412
void operator=(Action a)
Definition peglib.h:2354
Result parse_and_get_value(const char *s, size_t n, std::any &dt, T &val, const char *path=nullptr, Log log=nullptr) const
Definition peglib.h:2281
Result parse(const char *s, size_t n, std::any &dt, const char *path=nullptr, Log log=nullptr) const
Definition peglib.h:2248
std::unordered_map< void *, size_t > definition_ids_
Definition peglib.h:2480
Result parse_and_get_value(const char *s, size_t n, T &val, const char *path=nullptr, Log log=nullptr) const
Definition peglib.h:2261
std::once_flag assign_id_to_definition_init_
Definition peglib.h:2478
TracerLeave tracer_leave
Definition peglib.h:2401
size_t id
Definition peglib.h:2385
std::shared_ptr< Ope > whitespaceOpe
Definition peglib.h:2393
bool is_token() const
Definition peglib.h:2370
Result parse_and_get_value(const char *s, T &val, const char *path=nullptr, Log log=nullptr) const
Definition peglib.h:2274
Definition & operator=(Definition &&rhs)
bool is_token_
Definition peglib.h:2477
Definition(const Definition &rhs)
Definition peglib.h:2217
Result parse_core(const char *s, size_t n, SemanticValues &vs, std::any &dt, const char *path, Log log) const
Definition peglib.h:2428
std::function< bool(const SemanticValues &vs, const std::any &dt, std::string &msg)> predicate
Definition peglib.h:2383
Definition & operator=(const Definition &rhs)
void accept(Ope::Visitor &v)
Definition peglib.h:2366
bool verbose_trace
Definition peglib.h:2402
std::string name
Definition peglib.h:2377
std::shared_ptr< Holder > holder_
Definition peglib.h:2475
std::string error_message
Definition peglib.h:2406
std::function< void(const Context &c, const char *s, size_t n, std::any &dt)> enter
Definition peglib.h:2388
Result parse_and_get_value(const char *s, std::any &dt, T &val, const char *path=nullptr, Log log=nullptr) const
Definition peglib.h:2293
TracerStartOrEnd tracer_start
Definition peglib.h:2403
Action action
Definition peglib.h:2386
Result parse(const char *s, const char *path=nullptr, Log log=nullptr) const
Definition peglib.h:2242
void initialize_definition_ids() const
Definition peglib.h:2418
std::shared_ptr< Ope > get_core_operator() const
Definition peglib.h:2368
Definition(const std::shared_ptr< Ope > &ope)
Definition peglib.h:2221
const char * s_
Definition peglib.h:2378
Definition peglib.h:1183
Dictionary(const std::vector< std::string > &v, bool ignore_case)
Definition peglib.h:1185
void accept(Visitor &v) override
Definition peglib.h:3083
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:2687
Trie trie_
Definition peglib.h:1193
Definition peglib.h:1422
Holder(Definition *outer)
Definition peglib.h:1424
const std::string & name() const
Definition peglib.h:2869
Definition * outer_
Definition peglib.h:1437
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:2775
const std::string & trace_name() const
Definition peglib.h:2871
void accept(Visitor &v) override
Definition peglib.h:3094
std::string trace_name_
Definition peglib.h:1439
friend class Definition
Definition peglib.h:1441
std::any reduce(SemanticValues &vs, std::any &dt) const
Definition peglib.h:2859
std::once_flag trace_name_init_
Definition peglib.h:1438
std::shared_ptr< Ope > ope_
Definition peglib.h:1436
Definition peglib.h:1373
void accept(Visitor &v) override
Definition peglib.h:3091
Ignore(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1375
std::shared_ptr< Ope > ope_
Definition peglib.h:1386
size_t parse_core(const char *s, size_t n, SemanticValues &, Context &c, std::any &dt) const override
Definition peglib.h:1377
Definition peglib.h:1197
void accept(Visitor &v) override
Definition peglib.h:3084
bool ignore_case_
Definition peglib.h:1211
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:2739
std::string lit_
Definition peglib.h:1210
LiteralString(std::string &&s, bool ignore_case)
Definition peglib.h:1199
std::once_flag init_is_word_
Definition peglib.h:1212
bool is_word_
Definition peglib.h:1213
LiteralString(const std::string &s, bool ignore_case)
Definition peglib.h:1202
Definition peglib.h:1161
std::shared_ptr< Ope > ope_
Definition peglib.h:1180
NotPredicate(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1163
void accept(Visitor &v) override
Definition peglib.h:3082
size_t parse_core(const char *s, size_t n, SemanticValues &, Context &c, std::any &dt) const override
Definition peglib.h:1165
Definition peglib.h:975
virtual ~Ope()=default
size_t parse(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const
Definition peglib.h:2676
virtual size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const =0
virtual void accept(Visitor &v)=0
static ParserContext parse(const char *s, size_t n, const Rules &rules, Log log, std::string_view start)
Definition peglib.h:3307
bool apply_precedence_instruction(Definition &rule, const PrecedenceClimbing::BinOpeInfo &info, const char *s, Log log)
Definition peglib.h:3961
void make_grammar()
Definition peglib.h:3364
Grammar g
Definition peglib.h:4249
ParserGenerator()
Definition peglib.h:3334
static bool parse_test(const char *d, const char *s)
Definition peglib.h:3313
bool detect_infiniteLoop(const Data &data, Definition &rule, const Log &log, const char *s) const
Definition peglib.h:4232
ParserContext perform_core(const char *s, size_t n, const Rules &rules, Log log, std::string requested_start)
Definition peglib.h:4002
static ParserGenerator & get_instance()
Definition peglib.h:3329
void setup_actions()
Definition peglib.h:3549
Definition peglib.h:1502
std::shared_ptr< Ope > atom_
Definition peglib.h:1518
std::map< std::string_view, std::pair< size_t, char > > BinOpeInfo
Definition peglib.h:1504
PrecedenceClimbing(const std::shared_ptr< Ope > &atom, const std::shared_ptr< Ope > &binop, const BinOpeInfo &info, const Definition &rule)
Definition peglib.h:1506
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:1511
const Definition & rule_
Definition peglib.h:1521
std::shared_ptr< Ope > binop_
Definition peglib.h:1519
Definition & get_reference_for_binop(Context &c) const
Definition peglib.h:2942
BinOpeInfo info_
Definition peglib.h:1520
void accept(Visitor &v) override
Definition peglib.h:3098
size_t parse_expression(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt, size_t min_prec) const
Definition peglib.h:2954
Definition peglib.h:1014
PrioritizedChoice(bool for_label, const Args &...args)
Definition peglib.h:1017
size_t size() const
Definition peglib.h:1064
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:1024
void accept(Visitor &v) override
Definition peglib.h:3079
bool for_label_
Definition peglib.h:1067
std::vector< std::shared_ptr< Ope > > opes_
Definition peglib.h:1066
PrioritizedChoice(const std::vector< std::shared_ptr< Ope > > &opes)
Definition peglib.h:1020
PrioritizedChoice(std::vector< std::shared_ptr< Ope > > &&opes)
Definition peglib.h:1022
Definition peglib.h:1530
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:3029
void accept(Visitor &v) override
Definition peglib.h:3099
Recovery(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1532
std::shared_ptr< Ope > ope_
Definition peglib.h:1539
Definition peglib.h:1446
const std::string name_
Definition peglib.h:1461
Definition * rule_
Definition peglib.h:1467
std::shared_ptr< Ope > get_core_operator() const
Definition peglib.h:2917
const char * s_
Definition peglib.h:1462
void accept(Visitor &v) override
Definition peglib.h:3095
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:2877
const bool is_macro_
Definition peglib.h:1464
const std::vector< std::shared_ptr< Ope > > args_
Definition peglib.h:1465
size_t iarg_
Definition peglib.h:1468
const Grammar & grammar_
Definition peglib.h:1460
Reference(const Grammar &grammar, const std::string &name, const char *s, bool is_macro, const std::vector< std::shared_ptr< Ope > > &args)
Definition peglib.h:1448
Definition peglib.h:1070
static std::shared_ptr< Repetition > zom(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1119
static std::shared_ptr< Repetition > opt(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1129
bool is_zom() const
Definition peglib.h:1115
Repetition(const std::shared_ptr< Ope > &ope, size_t min, size_t max)
Definition peglib.h:1072
std::shared_ptr< Ope > ope_
Definition peglib.h:1133
size_t max_
Definition peglib.h:1135
void accept(Visitor &v) override
Definition peglib.h:3080
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:1075
size_t min_
Definition peglib.h:1134
static std::shared_ptr< Repetition > oom(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1124
Definition peglib.h:987
Sequence(std::vector< std::shared_ptr< Ope > > &&opes)
Definition peglib.h:993
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:995
std::vector< std::shared_ptr< Ope > > opes_
Definition peglib.h:1011
void accept(Visitor &v) override
Definition peglib.h:3078
Sequence(const Args &...args)
Definition peglib.h:990
Sequence(const std::vector< std::shared_ptr< Ope > > &opes)
Definition peglib.h:992
Definition peglib.h:1361
void accept(Visitor &v) override
Definition peglib.h:3090
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:2746
std::shared_ptr< Ope > ope_
Definition peglib.h:1370
TokenBoundary(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1363
Definition peglib.h:378
std::map< std::string, Info, std::less<> > dic_
Definition peglib.h:446
size_t match(const char *text, size_t text_len, size_t &id) const
Definition peglib.h:401
Trie(const std::vector< std::string > &items, bool ignore_case)
Definition peglib.h:380
std::string to_lower(std::string s) const
Definition peglib.h:431
size_t size() const
Definition peglib.h:428
bool ignore_case_
Definition peglib.h:448
Definition peglib.h:1392
std::function< size_t(const char *s, size_t n, SemanticValues &vs, std::any &dt)> fn_
Definition peglib.h:1403
void accept(Visitor &v) override
Definition peglib.h:3092
User(Parser fn)
Definition peglib.h:1394
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &, std::any &dt) const override
Definition peglib.h:1395
Definition peglib.h:1406
WeakHolder(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1408
void accept(Visitor &v) override
Definition peglib.h:3093
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:1410
std::weak_ptr< Ope > weak_
Definition peglib.h:1419
Definition peglib.h:1471
std::shared_ptr< Ope > ope_
Definition peglib.h:1485
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:1475
void accept(Visitor &v) override
Definition peglib.h:3096
Whitespace(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1473
Definition peglib.h:4551
parser(const char *s, size_t n, std::string_view start={})
Definition peglib.h:4560
Log log_
Definition peglib.h:4775
bool enablePackratParsing_
Definition peglib.h:4774
parser(const char *s, size_t n, const Rules &rules, std::string_view start={})
Definition peglib.h:4555
bool parse_n(const char *s, size_t n, std::any &dt, T &val, const char *path=nullptr) const
Definition peglib.h:4634
std::string start_
Definition peglib.h:4773
const Grammar & get_grammar() const
Definition peglib.h:4693
std::shared_ptr< Grammar > grammar_
Definition peglib.h:4772
bool parse_n(const char *s, size_t n, std::any &dt, const char *path=nullptr) const
Definition peglib.h:4612
void set_logger(Log log)
Definition peglib.h:4749
bool load_grammar(std::string_view sv, std::string_view start={})
Definition peglib.h:4599
void enable_packrat_parsing()
Definition peglib.h:4702
parser & enable_ast()
Definition peglib.h:4736
parser(std::string_view sv, const Rules &rules, std::string_view start={})
Definition peglib.h:4563
void disable_eoi_check()
Definition peglib.h:4695
bool load_grammar(const char *s, size_t n, std::string_view start={})
Definition peglib.h:4590
std::shared_ptr< T > optimize_ast(std::shared_ptr< T > ast, bool opt_mode=true) const
Definition peglib.h:4744
void set_verbose_trace(bool verbose_trace)
Definition peglib.h:4729
parser()=default
const Definition & operator[](const char *s) const
Definition peglib.h:4691
bool parse_n(const char *s, size_t n, T &val, const char *path=nullptr) const
Definition peglib.h:4623
bool parse(std::string_view sv, std::any &dt, const char *path=nullptr) const
Definition peglib.h:4648
parser(std::string_view sv, std::string_view start={})
Definition peglib.h:4566
void set_logger(std::function< void(size_t line, size_t col, const std::string &msg)> log)
Definition peglib.h:4751
bool load_grammar(const char *s, size_t n, const Rules &rules, std::string_view start={})
Definition peglib.h:4581
void enable_trace(TracerEnter tracer_enter, TracerLeave tracer_leave)
Definition peglib.h:4709
bool parse(std::string_view sv, std::any &dt, T &val, const char *path=nullptr) const
Definition peglib.h:4659
bool post_process(const char *s, size_t n, Definition::Result &r) const
Definition peglib.h:4759
std::vector< std::string > get_no_ast_opt_rules() const
Definition peglib.h:4764
bool load_grammar(std::string_view sv, const Rules &rules, std::string_view start={})
Definition peglib.h:4594
bool parse(std::string_view sv, T &val, const char *path=nullptr) const
Definition peglib.h:4654
Definition & operator[](const char *s)
Definition peglib.h:4689
bool parse_n(const char *s, size_t n, const char *path=nullptr) const
Definition peglib.h:4603
bool parse(std::string_view sv, const char *path=nullptr) const
Definition peglib.h:4644
void enable_trace(TracerEnter tracer_enter, TracerLeave tracer_leave, TracerStartOrEnd tracer_start, TracerStartOrEnd tracer_end)
Definition peglib.h:4717
Definition peglib.h:490
Definition filter_string.h:27
std::string escape_characters(const char *s, size_t n)
Definition peglib.h:213
static const char * WORD_DEFINITION_NAME
Definition peglib.h:2200
const char * u8(const T *s)
Definition peglib.h:205
std::shared_ptr< Ope > ref(const Grammar &grammar, const std::string &name, const char *s, bool is_macro, const std::vector< std::shared_ptr< Ope > > &args)
Definition peglib.h:1658
size_t encode_codepoint(char32_t cp, char *buff)
Definition peglib.h:106
std::shared_ptr< Ope > cut()
Definition peglib.h:1683
std::shared_ptr< Ope > tok(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1643
std::shared_ptr< Ope > csc(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1634
bool decode_codepoint(const char *s8, size_t l, size_t &bytes, char32_t &cp)
Definition peglib.h:143
size_t codepoint_count(const char *s8, size_t l)
Definition peglib.h:98
std::shared_ptr< Ope > apd(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1587
std::pair< int, size_t > parse_octal_number(const char *s, size_t n, size_t i)
Definition peglib.h:270
std::u32string decode(const char *s8, size_t l)
Definition peglib.h:192
std::pair< size_t, size_t > line_info(const char *start, const char *cur)
Definition peglib.h:458
std::shared_ptr< Ope > rec(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1679
std::function< size_t(const char *s, size_t n, SemanticValues &vs, std::any &dt)> Parser
Definition peglib.h:1389
std::shared_ptr< Ope > cls(const std::string &s)
Definition peglib.h:1608
std::shared_ptr< Ope > wsp(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1664
bool fail(size_t len)
Definition peglib.h:671
T token_to_number_(std::string_view sv)
Definition peglib.h:358
std::shared_ptr< Ope > pre(const std::shared_ptr< Ope > &atom, const std::shared_ptr< Ope > &binop, const PrecedenceClimbing::BinOpeInfo &info, const Definition &rule)
Definition peglib.h:1672
bool is_digit(char c, int &v)
Definition peglib.h:251
std::shared_ptr< Ope > dic(const std::vector< std::string > &v, bool ignore_case)
Definition peglib.h:1595
std::shared_ptr< Ope > liti(std::string &&s)
Definition peglib.h:1604
std::shared_ptr< Ope > ign(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1647
std::unordered_map< std::string, Definition > Grammar
Definition peglib.h:1444
std::function< void( const Ope &ope, const char *s, size_t n, const SemanticValues &vs, const Context &c, const std::any &dt, size_t, std::any &trace_data)> TracerLeave
Definition peglib.h:757
size_t codepoint_length(const char *s8, size_t l)
Definition peglib.h:82
size_t parse_literal(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt, const std::string &lit, std::once_flag &init_is_word, bool &is_word, bool ignore_case)
Definition peglib.h:2487
static const char * WHITESPACE_DEFINITION_NAME
Definition peglib.h:2199
std::string resolve_escape_sequence(const char *s, size_t n)
Definition peglib.h:281
std::shared_ptr< Ope > lit(std::string &&s)
Definition peglib.h:1600
std::shared_ptr< Ope > chr(char dt)
Definition peglib.h:1628
std::shared_ptr< Ope > cho4label_(Args &&...args)
Definition peglib.h:1565
std::shared_ptr< Ope > dot()
Definition peglib.h:1632
std::function< void( const Ope &name, const char *s, size_t n, const SemanticValues &vs, const Context &c, const std::any &dt, std::any &trace_data)> TracerEnter
Definition peglib.h:753
std::unordered_map< std::string, std::shared_ptr< Ope > > Rules
Definition peglib.h:3297
void enable_profiling(parser &parser, std::ostream &os)
Definition peglib.h:4842
bool is_hex(char c, int &v)
Definition peglib.h:237
std::string ast_to_s(const std::shared_ptr< T > &ptr, std::function< std::string(const T &ast, int level)> fn=nullptr)
Definition peglib.h:4345
std::function< void(size_t line, size_t col, const std::string &msg, const std::string &rule)> Log
Definition peglib.h:676
std::shared_ptr< Ope > opt(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1578
std::function< void(std::any &trace_data)> TracerStartOrEnd
Definition peglib.h:761
std::shared_ptr< Ope > oom(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1574
std::shared_ptr< Ope > cho(Args &&...args)
Definition peglib.h:1560
std::shared_ptr< Ope > rep(const std::shared_ptr< Ope > &ope, size_t min, size_t max)
Definition peglib.h:1582
constexpr unsigned int str2tag_core(const char *s, size_t l, unsigned int h)
Definition peglib.h:479
std::shared_ptr< Ope > npd(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1591
void ast_to_s_core(const std::shared_ptr< T > &ptr, std::string &s, int level, std::function< std::string(const T &ast, int level)> fn)
Definition peglib.h:4319
constexpr unsigned int str2tag(std::string_view sv)
Definition peglib.h:486
std::shared_ptr< Ope > seq(Args &&...args)
Definition peglib.h:1556
bool success(size_t len)
Definition peglib.h:669
std::shared_ptr< Ope > ncls(const std::string &s)
Definition peglib.h:1618
std::pair< int, size_t > parse_hex_number(const char *s, size_t n, size_t i)
Definition peglib.h:259
std::shared_ptr< Ope > cap(const std::shared_ptr< Ope > &ope, Capture::MatchAction ma)
Definition peglib.h:1638
void add_ast_action(Definition &rule)
Definition peglib.h:4392
AstBase< EmptyType > Ast
Definition filter_string.h:30
static const char * RECOVER_DEFINITION_NAME
Definition peglib.h:2201
std::any call(F fn, Args &&...args)
Definition peglib.h:613
std::shared_ptr< Ope > zom(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1570
std::shared_ptr< Ope > bkr(std::string &&name)
Definition peglib.h:1668
std::shared_ptr< Ope > usr(std::function< size_t(const char *s, size_t n, SemanticValues &vs, std::any &dt)> fn)
Definition peglib.h:1652
void enable_tracing(parser &parser, std::ostream &os)
Definition peglib.h:4782
#define CPPPEGLIB_HEURISTIC_ERROR_TOKEN_MAX_CHAR_COUNT
Definition peglib.h:15
Definition clipboard_testing.h:11
Definition peglib.h:1752
void visit(Recovery &ope) override
Definition peglib.h:1777
void visit(Ignore &ope) override
Definition peglib.h:1771
void visit(Capture &ope) override
Definition peglib.h:1769
void visit(Repetition &ope) override
Definition peglib.h:1765
void visit(WeakHolder &ope) override
Definition peglib.h:1772
void visit(TokenBoundary &ope) override
Definition peglib.h:1770
void visit(NotPredicate &ope) override
Definition peglib.h:1767
void visit(PrioritizedChoice &ope) override
Definition peglib.h:1760
void visit(AndPredicate &ope) override
Definition peglib.h:1766
std::unordered_map< void *, size_t > ids
Definition peglib.h:1779
void visit(Sequence &ope) override
Definition peglib.h:1755
void visit(CaptureScope &ope) override
Definition peglib.h:1768
void visit(Whitespace &ope) override
Definition peglib.h:1775
Definition peglib.h:4256
const size_t column
Definition peglib.h:4289
AstBase(const AstBase &ast, const char *original_name, size_t position=0, size_t length=0, size_t original_choice_count=0, size_t original_choice=0)
Definition peglib.h:4276
std::weak_ptr< AstBase< EmptyType > > parent
Definition peglib.h:4306
AstBase(const char *path, size_t line, size_t column, const char *name, const std::string_view &token, size_t position=0, size_t length=0, size_t choice_count=0, size_t choice=0)
Definition peglib.h:4267
const std::string name
Definition peglib.h:4291
T token_to_number() const
Definition peglib.h:4313
const bool is_token
Definition peglib.h:4302
const size_t line
Definition peglib.h:4288
size_t length
Definition peglib.h:4293
const unsigned int original_tag
Definition peglib.h:4300
const size_t choice
Definition peglib.h:4295
size_t position
Definition peglib.h:4292
const size_t original_choice_count
Definition peglib.h:4297
const std::string_view token
Definition peglib.h:4303
std::vector< std::shared_ptr< AstBase< EmptyType > > > nodes
Definition peglib.h:4305
const size_t choice_count
Definition peglib.h:4294
const size_t original_choice
Definition peglib.h:4298
const std::string path
Definition peglib.h:4287
std::string token_to_string() const
Definition peglib.h:4308
AstBase(const char *path, size_t line, size_t column, const char *name, const std::vector< std::shared_ptr< AstBase > > &nodes, size_t position=0, size_t length=0, size_t choice_count=0, size_t choice=0)
Definition peglib.h:4257
const unsigned int tag
Definition peglib.h:4299
const std::string original_name
Definition peglib.h:4296
Definition peglib.h:4352
const bool mode_
Definition peglib.h:4385
const std::vector< std::string > rules_
Definition peglib.h:4386
std::shared_ptr< T > optimize(std::shared_ptr< T > original, std::shared_ptr< T > parent=nullptr)
Definition peglib.h:4357
AstOptimizer(bool mode, const std::vector< std::string > &rules={})
Definition peglib.h:4353
Definition peglib.h:2208
bool ret
Definition peglib.h:2209
size_t len
Definition peglib.h:2211
ErrorInfo error_info
Definition peglib.h:2212
bool recovered
Definition peglib.h:2210
Definition peglib.h:1976
void visit(Repetition &ope) override
Definition peglib.h:2002
void visit(PrioritizedChoice &ope) override
Definition peglib.h:1996
void visit(Capture &ope) override
Definition peglib.h:2018
void visit(Whitespace &ope) override
Definition peglib.h:2024
void visit(TokenBoundary &ope) override
Definition peglib.h:2019
DetectInfiniteLoop(std::vector< std::pair< const char *, std::string > > &refs, std::unordered_map< std::string, bool > &has_error_cache)
Definition peglib.h:1986
void visit(Holder &ope) override
Definition peglib.h:2022
void visit(Recovery &ope) override
Definition peglib.h:2026
void visit(NotPredicate &ope) override
Definition peglib.h:2016
std::unordered_map< std::string, bool > & has_error_cache_
Definition peglib.h:2034
void visit(WeakHolder &ope) override
Definition peglib.h:2021
void visit(Sequence &ope) override
Definition peglib.h:1990
bool has_error
Definition peglib.h:2028
void visit(Ignore &ope) override
Definition peglib.h:2020
void visit(AndPredicate &ope) override
Definition peglib.h:2015
std::vector< std::pair< const char *, std::string > > & refs_
Definition peglib.h:2033
void visit(CaptureScope &ope) override
Definition peglib.h:2017
const char * error_s
Definition peglib.h:2029
void visit(PrecedenceClimbing &ope) override
Definition peglib.h:2025
DetectInfiniteLoop(const char *s, const std::string &name, std::vector< std::pair< const char *, std::string > > &refs, std::unordered_map< std::string, bool > &has_error_cache)
Definition peglib.h:1979
std::string error_name
Definition peglib.h:2030
Definition peglib.h:1862
void visit(TokenBoundary &ope) override
Definition peglib.h:1906
bool done_
Definition peglib.h:1923
void visit(PrecedenceClimbing &ope) override
Definition peglib.h:1914
void visit(AnyCharacter &) override
Definition peglib.h:1903
void visit(Sequence &ope) override
Definition peglib.h:1867
void visit(AndPredicate &ope) override
Definition peglib.h:1891
void visit(PrioritizedChoice &ope) override
Definition peglib.h:1878
void visit(BackReference &) override
Definition peglib.h:1913
const char * error_s
Definition peglib.h:1918
void visit(Ignore &ope) override
Definition peglib.h:1907
void visit(Repetition &ope) override
Definition peglib.h:1887
void visit(WeakHolder &ope) override
Definition peglib.h:1909
std::unordered_set< std::string > refs_
Definition peglib.h:1922
void visit(Cut &) override
Definition peglib.h:1916
void visit(CaptureScope &ope) override
Definition peglib.h:1904
void visit(Character &) override
Definition peglib.h:1902
std::string name_
Definition peglib.h:1921
void visit(Recovery &ope) override
Definition peglib.h:1915
void visit(LiteralString &ope) override
Definition peglib.h:1900
void visit(CharacterClass &) override
Definition peglib.h:1901
DetectLeftRecursion(const std::string &name)
Definition peglib.h:1865
void visit(Whitespace &ope) override
Definition peglib.h:1912
void visit(Dictionary &) override
Definition peglib.h:1899
void visit(NotPredicate &ope) override
Definition peglib.h:1895
void visit(Capture &ope) override
Definition peglib.h:1905
void visit(Holder &ope) override
Definition peglib.h:1910
void visit(User &) override
Definition peglib.h:1908
Definition peglib.h:4389
Definition peglib.h:684
std::vector< std::pair< const char *, const Definition * > > expected_tokens
Definition peglib.h:686
std::string replace_all(std::string str, const std::string &from, const std::string &to) const
Definition peglib.h:737
void clear()
Definition peglib.h:693
const char * message_pos
Definition peglib.h:687
int cast_char(char c) const
Definition peglib.h:710
std::string heuristic_error_token(const char *s, size_t n, const char *pos) const
Definition peglib.h:712
const char * last_output_pos
Definition peglib.h:690
void output_log(const Log &log, const char *s, size_t n)
Definition peglib.h:2554
bool keep_previous_token
Definition peglib.h:691
void add(const char *error_literal, const Definition *error_rule)
Definition peglib.h:700
std::string message
Definition peglib.h:688
std::string label
Definition peglib.h:689
const char * error_pos
Definition peglib.h:685
Definition peglib.h:1843
void visit(LiteralString &ope) override
Definition peglib.h:1846
static const char * token(Ope &ope)
Definition peglib.h:1852
void visit(TokenBoundary &ope) override
Definition peglib.h:1847
const char * token_
Definition peglib.h:1859
void visit(Ignore &ope) override
Definition peglib.h:1848
void visit(Recovery &ope) override
Definition peglib.h:1850
Definition peglib.h:2112
const std::vector< std::string > & params_
Definition peglib.h:2193
void visit(Repetition &ope) override
Definition peglib.h:2135
void visit(WeakHolder &ope) override
Definition peglib.h:2172
void visit(Character &ope) override
Definition peglib.h:2154
void visit(CharacterClass &ope) override
Definition peglib.h:2151
void visit(PrioritizedChoice &ope) override
Definition peglib.h:2127
void visit(Ignore &ope) override
Definition peglib.h:2168
void visit(Cut &ope) override
Definition peglib.h:2187
void visit(AnyCharacter &ope) override
Definition peglib.h:2155
void visit(LiteralString &ope) override
Definition peglib.h:2148
void visit(Recovery &ope) override
Definition peglib.h:2183
void visit(Holder &ope) override
Definition peglib.h:2173
const std::vector< std::shared_ptr< Ope > > & args_
Definition peglib.h:2192
void visit(Dictionary &ope) override
Definition peglib.h:2147
void visit(Whitespace &ope) override
Definition peglib.h:2175
void visit(TokenBoundary &ope) override
Definition peglib.h:2164
std::shared_ptr< Ope > found_ope
Definition peglib.h:2189
void visit(NotPredicate &ope) override
Definition peglib.h:2143
FindReference(const std::vector< std::shared_ptr< Ope > > &args, const std::vector< std::string > &params)
Definition peglib.h:2115
void visit(Sequence &ope) override
Definition peglib.h:2119
void visit(AndPredicate &ope) override
Definition peglib.h:2139
void visit(Capture &ope) override
Definition peglib.h:2160
void visit(CaptureScope &ope) override
Definition peglib.h:2156
void visit(PrecedenceClimbing &ope) override
Definition peglib.h:2179
Definition peglib.h:1926
void visit(Capture &ope) override
Definition peglib.h:1953
std::string error_name
Definition peglib.h:1965
void visit(Sequence &ope) override
Definition peglib.h:3157
void visit(WeakHolder &ope) override
Definition peglib.h:1956
bool is_empty
Definition peglib.h:1963
void visit(Repetition &ope) override
Definition peglib.h:1940
const char * error_s
Definition peglib.h:1964
void visit(NotPredicate &) override
Definition peglib.h:1948
std::vector< std::pair< const char *, std::string > > & refs_
Definition peglib.h:1972
void visit(LiteralString &ope) override
Definition peglib.h:1949
void visit(AndPredicate &) override
Definition peglib.h:1947
void visit(CaptureScope &ope) override
Definition peglib.h:1952
void visit(Whitespace &ope) override
Definition peglib.h:1959
std::unordered_map< std::string, bool > & has_error_cache_
Definition peglib.h:1973
void visit(PrecedenceClimbing &ope) override
Definition peglib.h:1960
void visit(Holder &ope) override
Definition peglib.h:1957
void set_error()
Definition peglib.h:1968
void visit(Recovery &ope) override
Definition peglib.h:1961
void visit(TokenBoundary &ope) override
Definition peglib.h:1954
void visit(PrioritizedChoice &ope) override
Definition peglib.h:1934
void visit(Ignore &ope) override
Definition peglib.h:1955
HasEmptyElement(std::vector< std::pair< const char *, std::string > > &refs, std::unordered_map< std::string, bool > &has_error_cache)
Definition peglib.h:1929
Definition peglib.h:1782
void visit(Dictionary &) override
Definition peglib.h:1792
void visit(LiteralString &) override
Definition peglib.h:1793
bool result_
Definition peglib.h:1802
void visit(PrioritizedChoice &ope) override
Definition peglib.h:1785
static bool check(Ope &ope)
Definition peglib.h:1795
Definition peglib.h:2077
void visit(Ignore &ope) override
Definition peglib.h:2099
const std::vector< std::string > & params_
Definition peglib.h:2109
void visit(Capture &ope) override
Definition peglib.h:2097
void visit(TokenBoundary &ope) override
Definition peglib.h:2098
void visit(PrecedenceClimbing &ope) override
Definition peglib.h:2104
void visit(AndPredicate &ope) override
Definition peglib.h:2094
Grammar & grammar_
Definition peglib.h:2108
void visit(Recovery &ope) override
Definition peglib.h:2105
void visit(Whitespace &ope) override
Definition peglib.h:2103
void visit(Holder &ope) override
Definition peglib.h:2101
void visit(WeakHolder &ope) override
Definition peglib.h:2100
LinkReferences(Grammar &grammar, const std::vector< std::string > &params)
Definition peglib.h:2080
void visit(NotPredicate &ope) override
Definition peglib.h:2095
void visit(Sequence &ope) override
Definition peglib.h:2083
void visit(CaptureScope &ope) override
Definition peglib.h:2096
void visit(PrioritizedChoice &ope) override
Definition peglib.h:2088
void visit(Repetition &ope) override
Definition peglib.h:2093
Definition peglib.h:1688
virtual void visit(WeakHolder &)
Definition peglib.h:1705
virtual void visit(TokenBoundary &)
Definition peglib.h:1702
virtual void visit(Repetition &)
Definition peglib.h:1692
virtual void visit(Dictionary &)
Definition peglib.h:1695
virtual void visit(Character &)
Definition peglib.h:1698
virtual ~Visitor()
Definition peglib.h:1689
virtual void visit(AndPredicate &)
Definition peglib.h:1693
virtual void visit(LiteralString &)
Definition peglib.h:1696
virtual void visit(Reference &)
Definition peglib.h:1707
virtual void visit(CharacterClass &)
Definition peglib.h:1697
virtual void visit(PrioritizedChoice &)
Definition peglib.h:1691
virtual void visit(Ignore &)
Definition peglib.h:1703
virtual void visit(PrecedenceClimbing &)
Definition peglib.h:1710
virtual void visit(CaptureScope &)
Definition peglib.h:1700
virtual void visit(Sequence &)
Definition peglib.h:1690
virtual void visit(Holder &)
Definition peglib.h:1706
virtual void visit(Capture &)
Definition peglib.h:1701
virtual void visit(NotPredicate &)
Definition peglib.h:1694
virtual void visit(BackReference &)
Definition peglib.h:1709
virtual void visit(Cut &)
Definition peglib.h:1712
virtual void visit(AnyCharacter &)
Definition peglib.h:1699
virtual void visit(Whitespace &)
Definition peglib.h:1708
virtual void visit(Recovery &)
Definition peglib.h:1711
virtual void visit(User &)
Definition peglib.h:1704
Definition peglib.h:3345
Data()
Definition peglib.h:3361
std::vector< std::pair< std::string, const char * > > duplicates_of_definition
Definition peglib.h:3350
bool enablePackratParsing
Definition peglib.h:3359
std::map< std::string, std::vector< Instruction > > instructions
Definition peglib.h:3353
std::string start
Definition peglib.h:3347
std::vector< std::pair< std::string, const char * > > duplicates_of_instruction
Definition peglib.h:3352
const char * start_pos
Definition peglib.h:3348
std::set< std::string_view > captures_in_current_definition
Definition peglib.h:3358
std::vector< std::pair< std::string, const char * > > undefined_back_references
Definition peglib.h:3355
std::shared_ptr< Grammar > grammar
Definition peglib.h:3346
std::vector< std::set< std::string_view > > captures_stack
Definition peglib.h:3356
Definition peglib.h:3339
std::any data
Definition peglib.h:3341
std::string type
Definition peglib.h:3340
std::string_view sv
Definition peglib.h:3342
Definition peglib.h:3301
std::shared_ptr< Grammar > grammar
Definition peglib.h:3302
bool enablePackratParsing
Definition peglib.h:3304
std::string start
Definition peglib.h:3303
Definition peglib.h:2037
void visit(Sequence &ope) override
Definition peglib.h:2044
void visit(Whitespace &ope) override
Definition peglib.h:2064
std::unordered_set< std::string > referenced
Definition peglib.h:2070
std::unordered_map< std::string, const char * > error_s
Definition peglib.h:2068
void visit(Holder &ope) override
Definition peglib.h:2062
const std::vector< std::string > & params_
Definition peglib.h:2074
void visit(PrecedenceClimbing &ope) override
Definition peglib.h:2065
void visit(Repetition &ope) override
Definition peglib.h:2054
void visit(WeakHolder &ope) override
Definition peglib.h:2061
void visit(Recovery &ope) override
Definition peglib.h:2066
void visit(NotPredicate &ope) override
Definition peglib.h:2056
std::unordered_map< std::string, std::string > error_message
Definition peglib.h:2069
void visit(TokenBoundary &ope) override
Definition peglib.h:2059
ReferenceChecker(const Grammar &grammar, const std::vector< std::string > &params)
Definition peglib.h:2040
void visit(Capture &ope) override
Definition peglib.h:2058
void visit(PrioritizedChoice &ope) override
Definition peglib.h:2049
void visit(Ignore &ope) override
Definition peglib.h:2060
const Grammar & grammar_
Definition peglib.h:2073
void visit(AndPredicate &ope) override
Definition peglib.h:2055
void visit(CaptureScope &ope) override
Definition peglib.h:2057
Definition peglib.h:503
std::pair< size_t, size_t > line_info() const
Definition peglib.h:2549
std::string token_to_string(size_t id=0) const
Definition peglib.h:538
void append(SemanticValues &chvs)
Definition peglib.h:558
std::vector< std::string_view > tokens
Definition peglib.h:529
size_t choice_
Definition peglib.h:606
Context * c_
Definition peglib.h:603
std::string name_
Definition peglib.h:607
friend class Holder
Definition peglib.h:600
friend class Sequence
Definition peglib.h:597
std::string_view token(size_t id=0) const
Definition peglib.h:531
SemanticValues()=default
std::string_view sv() const
Definition peglib.h:512
std::string_view sv_
Definition peglib.h:604
size_t choice() const
Definition peglib.h:526
T token_to_number() const
Definition peglib.h:542
const char * ss
Definition peglib.h:509
size_t choice_count_
Definition peglib.h:605
friend class Dictionary
Definition peglib.h:596
size_t choice_count() const
Definition peglib.h:523
std::vector< T > transform(size_t beg=0, size_t end=static_cast< size_t >(-1)) const
Definition peglib.h:548
const char * path
Definition peglib.h:508
std::vector< unsigned int > tags
Definition peglib.h:517
const std::string & name() const
Definition peglib.h:515
friend class Repetition
Definition peglib.h:599
friend class PrecedenceClimbing
Definition peglib.h:601
SemanticValues(Context *c)
Definition peglib.h:505
friend class Context
Definition peglib.h:595
friend class PrioritizedChoice
Definition peglib.h:598
Definition peglib.h:1805
void visit(Ignore &ope) override
Definition peglib.h:1822
void visit(Holder &ope) override
Definition peglib.h:1824
void visit(PrecedenceClimbing &ope) override
Definition peglib.h:1827
bool has_rule_
Definition peglib.h:1840
void visit(Sequence &ope) override
Definition peglib.h:1808
void visit(PrioritizedChoice &ope) override
Definition peglib.h:1813
void visit(Whitespace &ope) override
Definition peglib.h:1826
void visit(Capture &ope) override
Definition peglib.h:1820
void visit(Recovery &ope) override
Definition peglib.h:1828
bool has_token_boundary_
Definition peglib.h:1839
void visit(CaptureScope &ope) override
Definition peglib.h:1819
void visit(TokenBoundary &) override
Definition peglib.h:1821
void visit(WeakHolder &) override
Definition peglib.h:1823
void visit(Repetition &ope) override
Definition peglib.h:1818
static bool is_token(Ope &ope)
Definition peglib.h:1830
Definition peglib.h:1715
void visit(Recovery &) override
Definition peglib.h:1739
void visit(Cut &) override
Definition peglib.h:1740
void visit(Holder &ope) override
Definition peglib.h:1734
void visit(User &) override
Definition peglib.h:1732
void visit(NotPredicate &) override
Definition peglib.h:1722
void visit(TokenBoundary &) override
Definition peglib.h:1730
void visit(LiteralString &) override
Definition peglib.h:1724
void visit(AnyCharacter &) override
Definition peglib.h:1727
void visit(Whitespace &) override
Definition peglib.h:1736
void visit(WeakHolder &) override
Definition peglib.h:1733
void visit(Repetition &) override
Definition peglib.h:1720
void visit(Character &) override
Definition peglib.h:1726
void visit(CharacterClass &) override
Definition peglib.h:1725
void visit(Reference &) override
Definition peglib.h:1735
static std::string get(Ope &ope)
Definition peglib.h:1742
const char * name_
Definition peglib.h:1749
void visit(Capture &) override
Definition peglib.h:1729
void visit(CaptureScope &) override
Definition peglib.h:1728
void visit(PrecedenceClimbing &) override
Definition peglib.h:1738
void visit(Sequence &) override
Definition peglib.h:1718
void visit(PrioritizedChoice &) override
Definition peglib.h:1719
void visit(Ignore &) override
Definition peglib.h:1731
void visit(AndPredicate &) override
Definition peglib.h:1721
void visit(BackReference &) override
Definition peglib.h:1737
void visit(Dictionary &) override
Definition peglib.h:1723
Definition peglib.h:438
bool match
Definition peglib.h:440
size_t id
Definition peglib.h:441
bool done
Definition peglib.h:439
Definition peglib.h:627
Definition peglib.h:53
bool execute_on_destruction
Definition peglib.h:75
scope_exit(scope_exit &&rhs)
Definition peglib.h:57
EF exit_function
Definition peglib.h:74
~scope_exit()
Definition peglib.h:63
scope_exit(EF &&f)
Definition peglib.h:54
scope_exit(const scope_exit &)=delete
void operator=(const scope_exit &)=delete
scope_exit & operator=(scope_exit &&)=delete
void release()
Definition peglib.h:67