Cockatrice 2026-06-01-Development-3.1.0-beta.3
A virtual tabletop for multiplayer card games
Loading...
Searching...
No Matches
peglib.h
Go to the documentation of this file.
1//
2// peglib.h
3//
4// Copyright (c) 2022 Yuji Hirose. All rights reserved.
5// MIT License
6//
7
8#pragma once
9
10/*
11 * Configuration
12 */
13
14#ifndef CPPPEGLIB_HEURISTIC_ERROR_TOKEN_MAX_CHAR_COUNT
15#define CPPPEGLIB_HEURISTIC_ERROR_TOKEN_MAX_CHAR_COUNT 32
16#endif
17
18#include <algorithm>
19#include <any>
20#include <bitset>
21#include <cassert>
22#include <cctype>
23#if __has_include(<charconv>)
24#include <charconv>
25#endif
26#include <cstring>
27#include <functional>
28#include <initializer_list>
29#include <iostream>
30#include <limits>
31#include <map>
32#include <memory>
33#include <mutex>
34#include <optional>
35#include <set>
36#include <sstream>
37#include <string>
38#include <unordered_map>
39#include <unordered_set>
40#include <utility>
41#include <vector>
42
43#if !defined(__cplusplus) || __cplusplus < 201703L
44#error "Requires complete C++17 support"
45#endif
46
47namespace peg {
48
49/*-----------------------------------------------------------------------------
50 * scope_exit
51 *---------------------------------------------------------------------------*/
52
53// This is based on
54// "http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4189".
55
56template <typename EF> struct scope_exit {
57 explicit scope_exit(EF &&f)
58 : exit_function(std::move(f)), execute_on_destruction{true} {}
59
61 : exit_function(std::move(rhs.exit_function)),
63 rhs.release();
64 }
65
68 }
69
70 void release() { this->execute_on_destruction = false; }
71
72private:
73 scope_exit(const scope_exit &) = delete;
74 void operator=(const scope_exit &) = delete;
76
79};
80
81/*-----------------------------------------------------------------------------
82 * UTF8 functions
83 *---------------------------------------------------------------------------*/
84
85inline size_t codepoint_length(const char *s8, size_t l) {
86 if (l) {
87 auto b = static_cast<uint8_t>(s8[0]);
88 if ((b & 0x80) == 0) {
89 return 1;
90 } else if ((b & 0xE0) == 0xC0 && l >= 2) {
91 return 2;
92 } else if ((b & 0xF0) == 0xE0 && l >= 3) {
93 return 3;
94 } else if ((b & 0xF8) == 0xF0 && l >= 4) {
95 return 4;
96 }
97 }
98 return 0;
99}
100
101inline size_t codepoint_count(const char *s8, size_t l) {
102 size_t count = 0;
103 for (size_t i = 0; i < l;) {
104 auto len = codepoint_length(s8 + i, l - i);
105 if (len == 0) {
106 // Invalid UTF-8 byte, treat as single byte to avoid infinite loop
107 len = 1;
108 }
109 i += len;
110 count++;
111 }
112 return count;
113}
114
115inline size_t encode_codepoint(char32_t cp, char *buff) {
116 if (cp < 0x0080) {
117 buff[0] = static_cast<char>(cp & 0x7F);
118 return 1;
119 } else if (cp < 0x0800) {
120 buff[0] = static_cast<char>(0xC0 | ((cp >> 6) & 0x1F));
121 buff[1] = static_cast<char>(0x80 | (cp & 0x3F));
122 return 2;
123 } else if (cp < 0xD800) {
124 buff[0] = static_cast<char>(0xE0 | ((cp >> 12) & 0xF));
125 buff[1] = static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
126 buff[2] = static_cast<char>(0x80 | (cp & 0x3F));
127 return 3;
128 } else if (cp < 0xE000) {
129 // D800 - DFFF is invalid...
130 return 0;
131 } else if (cp < 0x10000) {
132 buff[0] = static_cast<char>(0xE0 | ((cp >> 12) & 0xF));
133 buff[1] = static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
134 buff[2] = static_cast<char>(0x80 | (cp & 0x3F));
135 return 3;
136 } else if (cp < 0x110000) {
137 buff[0] = static_cast<char>(0xF0 | ((cp >> 18) & 0x7));
138 buff[1] = static_cast<char>(0x80 | ((cp >> 12) & 0x3F));
139 buff[2] = static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
140 buff[3] = static_cast<char>(0x80 | (cp & 0x3F));
141 return 4;
142 }
143 return 0;
144}
145
146inline std::string encode_codepoint(char32_t cp) {
147 char buff[4];
148 auto l = encode_codepoint(cp, buff);
149 return std::string(buff, l);
150}
151
152inline bool decode_codepoint(const char *s8, size_t l, size_t &bytes,
153 char32_t &cp) {
154 if (l) {
155 auto b = static_cast<uint8_t>(s8[0]);
156 if ((b & 0x80) == 0) {
157 bytes = 1;
158 cp = b;
159 return true;
160 } else if ((b & 0xE0) == 0xC0) {
161 if (l >= 2) {
162 bytes = 2;
163 cp = ((static_cast<char32_t>(s8[0] & 0x1F)) << 6) |
164 (static_cast<char32_t>(s8[1] & 0x3F));
165 return true;
166 }
167 } else if ((b & 0xF0) == 0xE0) {
168 if (l >= 3) {
169 bytes = 3;
170 cp = ((static_cast<char32_t>(s8[0] & 0x0F)) << 12) |
171 ((static_cast<char32_t>(s8[1] & 0x3F)) << 6) |
172 (static_cast<char32_t>(s8[2] & 0x3F));
173 return true;
174 }
175 } else if ((b & 0xF8) == 0xF0) {
176 if (l >= 4) {
177 bytes = 4;
178 cp = ((static_cast<char32_t>(s8[0] & 0x07)) << 18) |
179 ((static_cast<char32_t>(s8[1] & 0x3F)) << 12) |
180 ((static_cast<char32_t>(s8[2] & 0x3F)) << 6) |
181 (static_cast<char32_t>(s8[3] & 0x3F));
182 return true;
183 }
184 }
185 }
186 return false;
187}
188
189inline size_t decode_codepoint(const char *s8, size_t l, char32_t &cp) {
190 size_t bytes;
191 if (decode_codepoint(s8, l, bytes, cp)) { return bytes; }
192 return 0;
193}
194
195inline char32_t decode_codepoint(const char *s8, size_t l) {
196 char32_t cp = 0;
197 decode_codepoint(s8, l, cp);
198 return cp;
199}
200
201inline std::u32string decode(const char *s8, size_t l) {
202 std::u32string out;
203 size_t i = 0;
204 while (i < l) {
205 auto beg = i++;
206 while (i < l && (s8[i] & 0xc0) == 0x80) {
207 i++;
208 }
209 out += decode_codepoint(&s8[beg], (i - beg));
210 }
211 return out;
212}
213
214template <typename T> const char *u8(const T *s) {
215 return reinterpret_cast<const char *>(s);
216}
217
218/*-----------------------------------------------------------------------------
219 * escape_characters
220 *---------------------------------------------------------------------------*/
221
222inline std::string escape_characters(const char *s, size_t n) {
223 std::string str;
224 for (size_t i = 0; i < n; i++) {
225 auto c = s[i];
226 switch (c) {
227 case '\f': str += "\\f"; break;
228 case '\n': str += "\\n"; break;
229 case '\r': str += "\\r"; break;
230 case '\t': str += "\\t"; break;
231 case '\v': str += "\\v"; break;
232 default: str += c; break;
233 }
234 }
235 return str;
236}
237
238inline std::string escape_characters(std::string_view sv) {
239 return escape_characters(sv.data(), sv.size());
240}
241
242/*-----------------------------------------------------------------------------
243 * resolve_escape_sequence
244 *---------------------------------------------------------------------------*/
245
246inline bool is_hex(char c, int &v) {
247 if ('0' <= c && c <= '9') {
248 v = c - '0';
249 return true;
250 } else if ('a' <= c && c <= 'f') {
251 v = c - 'a' + 10;
252 return true;
253 } else if ('A' <= c && c <= 'F') {
254 v = c - 'A' + 10;
255 return true;
256 }
257 return false;
258}
259
260inline bool is_digit(char c, int &v) {
261 if ('0' <= c && c <= '9') {
262 v = c - '0';
263 return true;
264 }
265 return false;
266}
267
268inline std::pair<int, size_t> parse_hex_number(const char *s, size_t n,
269 size_t i) {
270 int ret = 0;
271 int val;
272 while (i < n && is_hex(s[i], val)) {
273 ret = static_cast<int>(ret * 16 + val);
274 i++;
275 }
276 return std::pair(ret, i);
277}
278
279inline std::pair<int, size_t> parse_octal_number(const char *s, size_t n,
280 size_t i) {
281 int ret = 0;
282 int val;
283 while (i < n && is_digit(s[i], val)) {
284 ret = static_cast<int>(ret * 8 + val);
285 i++;
286 }
287 return std::pair(ret, i);
288}
289
290inline std::string resolve_escape_sequence(const char *s, size_t n) {
291 std::string r;
292 r.reserve(n);
293
294 size_t i = 0;
295 while (i < n) {
296 auto ch = s[i];
297 if (ch == '\\') {
298 i++;
299 assert(i < n);
300
301 switch (s[i]) {
302 case 'f':
303 r += '\f';
304 i++;
305 break;
306 case 'n':
307 r += '\n';
308 i++;
309 break;
310 case 'r':
311 r += '\r';
312 i++;
313 break;
314 case 't':
315 r += '\t';
316 i++;
317 break;
318 case 'v':
319 r += '\v';
320 i++;
321 break;
322 case '\'':
323 r += '\'';
324 i++;
325 break;
326 case '"':
327 r += '"';
328 i++;
329 break;
330 case '[':
331 r += '[';
332 i++;
333 break;
334 case ']':
335 r += ']';
336 i++;
337 break;
338 case '\\':
339 r += '\\';
340 i++;
341 break;
342 case 'x':
343 case 'u': {
344 char32_t cp;
345 std::tie(cp, i) = parse_hex_number(s, n, i + 1);
346 r += encode_codepoint(cp);
347 break;
348 }
349 default: {
350 char32_t cp;
351 std::tie(cp, i) = parse_octal_number(s, n, i);
352 r += encode_codepoint(cp);
353 break;
354 }
355 }
356 } else {
357 r += ch;
358 i++;
359 }
360 }
361 return r;
362}
363
364/*-----------------------------------------------------------------------------
365 * token_to_number_ - This function should be removed eventually
366 *---------------------------------------------------------------------------*/
367
368template <typename T> T token_to_number_(std::string_view sv) {
369 T n = 0;
370#if __has_include(<charconv>)
371 if constexpr (!std::is_floating_point<T>::value) {
372 std::from_chars(sv.data(), sv.data() + sv.size(), n);
373#else
374 if constexpr (false) {
375#endif
376 } else {
377 auto s = std::string(sv);
378 std::istringstream ss(s);
379 ss >> n;
380 }
381 return n;
382}
383
384inline std::string to_lower(std::string s) {
385 for (auto &c : s) {
386 c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
387 }
388 return s;
389}
390
391/*-----------------------------------------------------------------------------
392 * Trie
393 *---------------------------------------------------------------------------*/
394
395class Trie {
396public:
397 Trie(const std::vector<std::string> &items, bool ignore_case)
398 : ignore_case_(ignore_case), items_count_(items.size()) {
399 size_t id = 0;
400 for (const auto &item : items) {
401 const auto &s = ignore_case ? to_lower(item) : item;
402 if (item.size() > max_len_) { max_len_ = item.size(); }
403 for (size_t len = 1; len <= item.size(); len++) {
404 auto last = len == item.size();
405 std::string_view sv(s.data(), len);
406 auto it = dic_.find(sv);
407 if (it == dic_.end()) {
408 dic_.emplace(sv, Info{last, last, id});
409 } else if (last) {
410 it->second.match = true;
411 } else {
412 it->second.done = false;
413 }
414 }
415 id++;
416 }
417 }
418
419 size_t match(const char *text, size_t text_len, size_t &id) const {
420 auto limit = std::min(text_len, max_len_);
421 std::string lower_text;
422 if (ignore_case_) {
423 lower_text = to_lower(std::string(text, limit));
424 text = lower_text.data();
425 }
426
427 size_t match_len = 0;
428 auto done = false;
429 size_t len = 1;
430 while (!done && len <= limit) {
431 std::string_view sv(text, len);
432 auto it = dic_.find(sv);
433 if (it == dic_.end()) {
434 done = true;
435 } else {
436 if (it->second.match) {
437 match_len = len;
438 id = it->second.id;
439 }
440 if (it->second.done) { done = true; }
441 }
442 len += 1;
443 }
444 return match_len;
445 }
446
447 size_t size() const { return dic_.size(); }
448 size_t items_count() const { return items_count_; }
449
450 friend struct ComputeFirstSet;
451
452private:
453 struct Info {
454 bool done;
455 bool match;
456 size_t id;
457 };
458
459 // TODO: Use unordered_map when heterogeneous lookup is supported in C++20
460 // std::unordered_map<std::string, Info> dic_;
461 std::map<std::string, Info, std::less<>> dic_;
462
465 size_t max_len_ = 0;
466};
467
468/*-----------------------------------------------------------------------------
469 * PEG
470 *---------------------------------------------------------------------------*/
471
472/*
473 * Line information utility function
474 */
475inline std::pair<size_t, size_t> line_info(const char *start, const char *cur) {
476 auto p = start;
477 auto col_ptr = p;
478 auto no = 1;
479
480 while (p < cur) {
481 if (*p == '\n') {
482 no++;
483 col_ptr = p + 1;
484 }
485 p++;
486 }
487
488 auto col = codepoint_count(col_ptr, p - col_ptr) + 1;
489
490 return std::pair(no, col);
491}
492
493/*
494 * String tag
495 */
496inline constexpr unsigned int str2tag_core(const char *s, size_t l,
497 unsigned int h) {
498 return (l == 0) ? h
499 : str2tag_core(s + 1, l - 1,
500 (h * 33) ^ static_cast<unsigned char>(*s));
501}
502
503inline constexpr unsigned int str2tag(std::string_view sv) {
504 return str2tag_core(sv.data(), sv.size(), 0);
505}
506
507namespace udl {
508
509inline constexpr unsigned int operator"" _(const char *s, size_t l) {
510 return str2tag_core(s, l, 0);
511}
512
513} // namespace udl
514
515/*
516 * Semantic values
517 */
518class Context;
519
520struct SemanticValues : protected std::vector<std::any> {
521 SemanticValues() = default;
523
524 // Input text
525 const char *path = nullptr;
526 const char *ss = nullptr;
527
528 // Matched string
529 std::string_view sv() const { return sv_; }
530
531 // Definition name
532 const std::string &name() const { return name_; }
533
534 std::vector<unsigned int> tags;
535
536 // Line number and column at which the matched string is
537 std::pair<size_t, size_t> line_info() const;
538
539 // Choice count
540 size_t choice_count() const { return choice_count_; }
541
542 // Choice number (0 based index)
543 size_t choice() const { return choice_; }
544
545 // Tokens
546 std::vector<std::string_view> tokens;
547
548 std::string_view token(size_t id = 0) const {
549 if (tokens.empty()) { return sv_; }
550 assert(id < tokens.size());
551 return tokens[id];
552 }
553
554 // Token conversion
555 std::string token_to_string(size_t id = 0) const {
556 return std::string(token(id));
557 }
558
559 template <typename T> T token_to_number() const {
560 return token_to_number_<T>(token());
561 }
562
563 // Transform the semantic value vector to another vector
564 template <typename T>
565 std::vector<T> transform(size_t beg = 0,
566 size_t end = static_cast<size_t>(-1)) const {
567 std::vector<T> r;
568 end = (std::min)(end, size());
569 for (size_t i = beg; i < end; i++) {
570 r.emplace_back(std::any_cast<T>((*this)[i]));
571 }
572 return r;
573 }
574
575 using std::vector<std::any>::iterator;
576 using std::vector<std::any>::const_iterator;
577 using std::vector<std::any>::size;
578 using std::vector<std::any>::empty;
579 using std::vector<std::any>::assign;
580 using std::vector<std::any>::begin;
581 using std::vector<std::any>::end;
582 using std::vector<std::any>::rbegin;
583 using std::vector<std::any>::rend;
584 using std::vector<std::any>::operator[];
585 using std::vector<std::any>::at;
586 using std::vector<std::any>::resize;
587 using std::vector<std::any>::front;
588 using std::vector<std::any>::back;
589 using std::vector<std::any>::push_back;
590 using std::vector<std::any>::pop_back;
591 using std::vector<std::any>::insert;
592 using std::vector<std::any>::erase;
593 using std::vector<std::any>::clear;
594 using std::vector<std::any>::swap;
595 using std::vector<std::any>::emplace;
596 using std::vector<std::any>::emplace_back;
597
598private:
599 friend class Context;
600 friend class Dictionary;
601 friend class Sequence;
602 friend class PrioritizedChoice;
603 friend class Repetition;
604 friend class Holder;
605 friend class PrecedenceClimbing;
606
607 Context *c_ = nullptr;
608 std::string_view sv_;
609 size_t choice_count_ = 0;
610 size_t choice_ = 0;
611 std::string name_;
612};
613
614/*
615 * Semantic action
616 */
617template <typename F, typename... Args> std::any call(F fn, Args &&...args) {
618 using R = decltype(fn(std::forward<Args>(args)...));
619 if constexpr (std::is_void<R>::value) {
620 fn(std::forward<Args>(args)...);
621 return std::any();
622 } else if constexpr (std::is_same<typename std::remove_cv<R>::type,
623 std::any>::value) {
624 return fn(std::forward<Args>(args)...);
625 } else {
626 return std::any(fn(std::forward<Args>(args)...));
627 }
628}
629
630template <typename T>
631struct argument_count : argument_count<decltype(&T::operator())> {};
632template <typename R, typename... Args>
633struct argument_count<R (*)(Args...)>
634 : std::integral_constant<unsigned, sizeof...(Args)> {};
635template <typename R, typename C, typename... Args>
636struct argument_count<R (C::*)(Args...)>
637 : std::integral_constant<unsigned, sizeof...(Args)> {};
638template <typename R, typename C, typename... Args>
639struct argument_count<R (C::*)(Args...) const>
640 : std::integral_constant<unsigned, sizeof...(Args)> {};
641
642class Action {
643public:
644 Action() = default;
645 Action(Action &&rhs) = default;
646 template <typename F> Action(F fn) : fn_(make_adaptor(fn)) {}
647 template <typename F> void operator=(F fn) { fn_ = make_adaptor(fn); }
648 Action &operator=(const Action &rhs) = default;
649
650 operator bool() const { return bool(fn_); }
651
652 std::any operator()(SemanticValues &vs, std::any &dt,
653 const std::any &predicate_data) const {
654 return fn_(vs, dt, predicate_data);
655 }
656
657private:
658 using Fty = std::function<std::any(SemanticValues &vs, std::any &dt,
659 const std::any &predicate_data)>;
660
661 template <typename F> Fty make_adaptor(F fn) {
662 if constexpr (argument_count<F>::value == 1) {
663 return [fn](auto &vs, auto & /*dt*/, const auto & /*predicate_data*/) {
664 return call(fn, vs);
665 };
666 } else if constexpr (argument_count<F>::value == 2) {
667 return [fn](auto &vs, auto &dt, const auto & /*predicate_data*/) {
668 return call(fn, vs, dt);
669 };
670 } else {
671 return [fn](auto &vs, auto &dt, const auto &predicate_data) {
672 return call(fn, vs, dt, predicate_data);
673 };
674 }
675 }
676
678};
679
681public:
682 Predicate() = default;
683 Predicate(Predicate &&rhs) = default;
684 template <typename F> Predicate(F fn) : fn_(make_adaptor(fn)) {}
685 template <typename F> void operator=(F fn) { fn_ = make_adaptor(fn); }
686 Predicate &operator=(const Predicate &rhs) = default;
687
688 operator bool() const { return bool(fn_); }
689
690 bool operator()(const SemanticValues &vs, const std::any &dt,
691 std::string &msg, std::any &predicate_data) const {
692 return fn_(vs, dt, msg, predicate_data);
693 }
694
695private:
696 using Fty = std::function<bool(const SemanticValues &vs, const std::any &dt,
697 std::string &msg, std::any &predicate_data)>;
698
699 template <typename F> Fty make_adaptor(F fn) {
700 if constexpr (argument_count<F>::value == 3) {
701 return [fn](const auto &vs, const auto &dt, auto &msg,
702 auto & /*predicate_data*/) { return fn(vs, dt, msg); };
703 } else {
704 return [fn](const auto &vs, const auto &dt, auto &msg,
705 auto &predicate_data) {
706 return fn(vs, dt, msg, predicate_data);
707 };
708 }
709 }
710
712};
713
714/*
715 * Parse result helper
716 */
717inline bool success(size_t len) { return len != static_cast<size_t>(-1); }
718
719inline bool fail(size_t len) { return len == static_cast<size_t>(-1); }
720
721/*
722 * Log
723 */
724using Log = std::function<void(size_t line, size_t col, const std::string &msg,
725 const std::string &rule)>;
726
727/*
728 * ErrorInfo
729 */
730class Definition;
731
732struct ErrorInfo {
733 const char *error_pos = nullptr;
734 std::vector<std::pair<const char *, const Definition *>> expected_tokens;
735 const char *message_pos = nullptr;
736 std::string message;
737 std::string label;
738 const char *last_output_pos = nullptr;
740
741 void clear() {
742 error_pos = nullptr;
743 expected_tokens.clear();
744 message_pos = nullptr;
745 message.clear();
746 }
747
748 void add(const char *error_literal, const Definition *error_rule) {
749 for (const auto &[t, r] : expected_tokens) {
750 if (t == error_literal && r == error_rule) { return; }
751 }
752 expected_tokens.emplace_back(error_literal, error_rule);
753 }
754
755 void output_log(const Log &log, const char *s, size_t n);
756
757private:
758 int cast_char(char c) const { return static_cast<unsigned char>(c); }
759
760 std::string heuristic_error_token(const char *s, size_t n,
761 const char *pos) const {
762 auto len = n - std::distance(s, pos);
763 if (len) {
764 size_t i = 0;
765 auto c = cast_char(pos[i++]);
766 if (!std::ispunct(c) && !std::isspace(c)) {
767 while (i < len && !std::ispunct(cast_char(pos[i])) &&
768 !std::isspace(cast_char(pos[i]))) {
769 i++;
770 }
771 }
772
774 size_t j = 0;
775 while (count > 0 && j < i) {
776 j += codepoint_length(&pos[j], i - j);
777 count--;
778 }
779
780 return escape_characters(pos, j);
781 }
782 return std::string();
783 }
784
785 std::string replace_all(std::string str, const std::string &from,
786 const std::string &to) const {
787 size_t pos = 0;
788 while ((pos = str.find(from, pos)) != std::string::npos) {
789 str.replace(pos, from.length(), to);
790 pos += to.length();
791 }
792 return str;
793 }
794};
795
796/*
797 * Context
798 */
799class Ope;
800
801using TracerEnter = std::function<void(
802 const Ope &name, const char *s, size_t n, const SemanticValues &vs,
803 const Context &c, const std::any &dt, std::any &trace_data)>;
804
805using TracerLeave = std::function<void(
806 const Ope &ope, const char *s, size_t n, const SemanticValues &vs,
807 const Context &c, const std::any &dt, size_t, std::any &trace_data)>;
808
809using TracerStartOrEnd = std::function<void(std::any &trace_data)>;
810
811class Context {
812public:
813 const char *path;
814 const char *s;
815 const size_t l;
816
818 bool recovered = false;
819
820 std::vector<std::shared_ptr<SemanticValues>> value_stack;
822
823 std::vector<Definition *> rule_stack;
824 std::vector<std::vector<std::shared_ptr<Ope>>> args_stack;
825
827
828 std::shared_ptr<Ope> whitespaceOpe;
829 bool in_whitespace = false;
830
831 std::shared_ptr<Ope> wordOpe;
832
833 std::vector<std::pair<std::string_view, std::string>> capture_entries;
834
835 std::vector<bool> cut_stack;
836
837 const size_t def_count;
839 std::vector<bool> cache_registered;
840 std::vector<bool> cache_success;
841
842 std::map<std::pair<size_t, size_t>, std::tuple<size_t, std::any>>
844
845 // Left recursion support
846 struct LRMemo {
847 size_t len = static_cast<size_t>(-1);
848 std::any val;
849 };
850 std::map<std::pair<const Definition *, const char *>, LRMemo> lr_memo;
851
852 // Rules whose lr_memo was hit during the current parse scope.
853 // Used to track LR cycle membership.
854 std::set<const Definition *> lr_refs_hit;
855
856 // Rules currently in their seeding/growing phase at a given position.
857 // Protected from having their lr_memo erased by inner growers.
858 std::set<std::pair<const Definition *, const char *>> lr_active_seeds;
859
860 void clear_packrat_cache(const char *pos, size_t def_id) {
861 if (!enablePackratParsing) { return; }
862 auto col = static_cast<size_t>(pos - s);
863 auto idx = def_count * col + def_id;
864 if (idx < cache_registered.size()) {
865 cache_registered[idx] = false;
866 cache_success[idx] = false;
867 }
868 cache_values.erase(std::make_pair(col, def_id));
869 }
870
871 void write_packrat_cache(const char *pos, size_t def_id, size_t len,
872 const std::any &val) {
873 if (!enablePackratParsing) { return; }
874 auto col = pos - s;
875 auto idx = def_count * static_cast<size_t>(col) + def_id;
876 if (idx >= cache_registered.size()) { return; }
877 cache_registered[idx] = true;
878 cache_success[idx] = true;
879 auto key = std::pair(col, def_id);
880 cache_values[key] = std::pair(len, val);
881 }
882
885 std::any trace_data;
886 const bool verbose_trace;
887
889
904
906 assert(!value_stack_size);
907 assert(cut_stack.empty());
908 }
909
910 Context(const Context &) = delete;
911 Context(Context &&) = delete;
912 Context operator=(const Context &) = delete;
913
914 // Per-rule packrat stats (populated when packrat_stats is non-null)
916 size_t hits = 0;
917 size_t misses = 0;
918 };
919 std::vector<PackratStats> *packrat_stats = nullptr;
920
921 // Per-rule packrat filter: if set, only rules with filter[def_id]=true
922 // use full memoization (cache_values map). Others use bitvector-only
923 // re-entry guard.
924 const std::vector<bool> *packrat_rule_filter = nullptr;
925
926 template <typename T>
927 void packrat(const char *a_s, size_t def_id, size_t &len, std::any &val,
928 T fn) {
930 fn(val);
931 return;
932 }
933
934 auto col = a_s - s;
935 auto idx = def_count * static_cast<size_t>(col) + def_id;
936
937 if (cache_registered[idx]) {
938 if (packrat_stats && def_id < packrat_stats->size()) {
939 (*packrat_stats)[def_id].hits++;
940 }
941 if (cache_success[idx]) {
942 auto key = std::pair(col, def_id);
943 std::tie(len, val) = cache_values[key];
944 return;
945 } else {
946 len = static_cast<size_t>(-1);
947 return;
948 }
949 } else {
950 // Pre-register as failure (re-entry guard for all rules)
951 cache_registered[idx] = true;
952 cache_success[idx] = false;
953
954 if (packrat_stats && def_id < packrat_stats->size()) {
955 (*packrat_stats)[def_id].misses++;
956 }
957
958 fn(val);
959
960 bool full_memo =
961 !packrat_rule_filter || (def_id < packrat_rule_filter->size() &&
962 (*packrat_rule_filter)[def_id]);
963 if (full_memo) {
964 if (success(len)) { write_packrat_cache(a_s, def_id, len, val); }
965 } else {
966 // Guard-only: undo registration so future calls re-parse
967 cache_registered[idx] = false;
968 }
969 return;
970 }
971 }
972
973 // Semantic values
975 assert(value_stack_size <= value_stack.size());
976 if (value_stack_size == value_stack.size()) {
977 value_stack.emplace_back(std::make_shared<SemanticValues>(this));
978 } else {
979 auto &vs = *value_stack[value_stack_size];
980 if (!vs.empty()) {
981 vs.clear();
982 if (!vs.tags.empty()) { vs.tags.clear(); }
983 }
984 vs.sv_ = std::string_view();
985 vs.choice_count_ = 0;
986 vs.choice_ = 0;
987 if (!vs.tokens.empty()) { vs.tokens.clear(); }
988 }
989
990 auto &vs = *value_stack[value_stack_size++];
991 vs.path = path;
992 vs.ss = s;
993 return vs;
994 }
995
997
998 // Arguments
999 void push_args(std::vector<std::shared_ptr<Ope>> &&args) {
1000 args_stack.emplace_back(std::move(args));
1001 }
1002
1003 void pop_args() { args_stack.pop_back(); }
1004
1005 const std::vector<std::shared_ptr<Ope>> &top_args() const {
1006 return args_stack[args_stack.size() - 1];
1007 }
1008
1009 // Snapshot/Rollback
1010 struct Snapshot {
1011 size_t sv_size;
1014 std::string_view sv_sv;
1016 size_t choice;
1018 };
1019
1021 return {vs.size(), vs.tags.size(), vs.tokens.size(), vs.sv_,
1022 vs.choice_count_, vs.choice_, capture_entries.size()};
1023 }
1024
1025 void rollback(SemanticValues &vs, const Snapshot &snap) {
1026 vs.resize(snap.sv_size);
1027 vs.tags.resize(snap.sv_tags_size);
1028 vs.tokens.resize(snap.sv_tokens_size);
1029 vs.sv_ = snap.sv_sv;
1030 vs.choice_count_ = snap.choice_count;
1031 vs.choice_ = snap.choice;
1032 capture_entries.resize(snap.capture_size);
1033 }
1034
1035 // Skip trailing whitespace with trace suppression.
1036 // Returns whitespace length, or -1 on failure.
1037 // No-op (returns 0) if inside a token boundary or no whitespaceOpe.
1038 size_t skip_whitespace(const char *a_s, size_t n, SemanticValues &vs,
1039 std::any &dt);
1040
1041 // Error
1042 void set_error_pos(const char *a_s, const char *literal = nullptr);
1043
1044 // Trace
1045 void trace_enter(const Ope &ope, const char *a_s, size_t n,
1046 const SemanticValues &vs, std::any &dt);
1047 void trace_leave(const Ope &ope, const char *a_s, size_t n,
1048 const SemanticValues &vs, std::any &dt, size_t len);
1049 bool is_traceable(const Ope &ope) const;
1050
1051 // Line info
1052 std::pair<size_t, size_t> line_info(const char *cur) const {
1053 std::call_once(source_line_index_init_, [this]() {
1054 for (size_t pos = 0; pos < l; pos++) {
1055 if (s[pos] == '\n') { source_line_index.push_back(pos); }
1056 }
1057 source_line_index.push_back(l);
1058 });
1059
1060 auto pos = static_cast<size_t>(std::distance(s, cur));
1061
1062 auto it = std::lower_bound(
1063 source_line_index.begin(), source_line_index.end(), pos,
1064 [](size_t element, size_t value) { return element < value; });
1065
1066 auto id = static_cast<size_t>(std::distance(source_line_index.begin(), it));
1067 auto off = pos - (id == 0 ? 0 : source_line_index[id - 1] + 1);
1068 return std::pair(id + 1, off + 1);
1069 }
1070
1071 size_t next_trace_id = 0;
1072 std::vector<size_t> trace_ids;
1074 mutable std::once_flag source_line_index_init_;
1075 mutable std::vector<size_t> source_line_index;
1076};
1077
1078/*
1079 * Parser operators
1080 */
1081class Ope {
1082public:
1083 struct Visitor;
1084
1085 virtual ~Ope() = default;
1086 size_t parse(const char *s, size_t n, SemanticValues &vs, Context &c,
1087 std::any &dt) const;
1088 virtual size_t parse_core(const char *s, size_t n, SemanticValues &vs,
1089 Context &c, std::any &dt) const = 0;
1090 virtual void accept(Visitor &v) = 0;
1091
1092 bool is_token_boundary = false;
1093 bool is_choice_like = false;
1094};
1095
1096// Keyword-guarded identifier data, heap-allocated only for matching Sequences.
1097// Avoids bloating all Sequence objects with bitsets and keyword sets.
1099 std::bitset<256> identifier_first; // first char of identifier
1100 std::bitset<256> identifier_rest; // subsequent chars of identifier
1101 std::vector<std::string> exact_keywords; // single-word keywords (lowercase)
1102 std::vector<std::string> prefix_keywords; // first word of compound keywords
1105
1106 static bool matches_any(const std::vector<std::string> &keywords,
1107 std::string_view input) {
1108 return std::any_of(keywords.begin(), keywords.end(),
1109 [&](const auto &kw) { return kw == input; });
1110 }
1111};
1112
1113class Sequence : public Ope {
1114public:
1115 template <typename... Args>
1116 Sequence(const Args &...args)
1117 : opes_{static_cast<std::shared_ptr<Ope>>(args)...} {}
1118 Sequence(const std::vector<std::shared_ptr<Ope>> &opes) : opes_(opes) {}
1119 Sequence(std::vector<std::shared_ptr<Ope>> &&opes) : opes_(std::move(opes)) {}
1120
1121 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1122 std::any &dt) const override {
1123 // Keyword-guarded identifier fast path:
1124 // Fuses !ReservedKeyword <identifier> into scan-then-lookup
1125 if (kw_guard_) {
1126 if (auto result = parse_keyword_guarded(s, n, vs, c, dt)) {
1127 return *result;
1128 }
1129 // nullopt means prefix keyword match — fall through to normal path
1130 }
1131 size_t i = 0;
1132 for (const auto &ope : opes_) {
1133 auto len = ope->parse(s + i, n - i, vs, c, dt);
1134 if (fail(len)) { return len; }
1135 i += len;
1136 }
1137 return i;
1138 }
1139
1140 void accept(Visitor &v) override;
1141
1142 std::vector<std::shared_ptr<Ope>> opes_;
1143
1144private:
1145 friend struct SetupFirstSets;
1146 std::unique_ptr<KeywordGuardData> kw_guard_;
1147
1148 // Returns parse result, or nullopt to fall through to normal path
1149 std::optional<size_t> parse_keyword_guarded(const char *s, size_t n,
1150 SemanticValues &vs, Context &c,
1151 std::any &dt) const {
1152 const auto &kw = *kw_guard_;
1153 if (n < 1 || !kw.identifier_first.test(static_cast<unsigned char>(*s))) {
1154 c.set_error_pos(s);
1155 return static_cast<size_t>(-1);
1156 }
1157 // Scan identifier using bitset
1158 size_t id_len = 1;
1159 while (id_len < n &&
1160 kw.identifier_rest.test(static_cast<unsigned char>(s[id_len]))) {
1161 id_len++;
1162 }
1163 // Skip keyword matching if identifier length is out of range
1164 if (id_len >= kw.min_keyword_len && id_len <= kw.max_keyword_len) {
1165 char lower_buf[64];
1166 std::unique_ptr<char[]> lower_heap;
1167 char *lower = lower_buf;
1168 if (id_len > sizeof(lower_buf)) {
1169 lower_heap.reset(new char[id_len]);
1170 lower = lower_heap.get();
1171 }
1172 std::transform(s, s + id_len, lower, [](unsigned char ch) {
1173 return static_cast<char>(std::tolower(ch));
1174 });
1175 std::string_view lower_sv(lower, id_len);
1176
1177 if (KeywordGuardData::matches_any(kw.exact_keywords, lower_sv)) {
1178 c.set_error_pos(s);
1179 return static_cast<size_t>(-1);
1180 }
1181 if (KeywordGuardData::matches_any(kw.prefix_keywords, lower_sv)) {
1182 return std::nullopt;
1183 }
1184 }
1185 // Success: emit token and consume trailing whitespace
1186 vs.tokens.emplace_back(std::string_view(s, id_len));
1187 auto wl = c.skip_whitespace(s + id_len, n - id_len, vs, dt);
1188 if (fail(wl)) { return wl; }
1189 return id_len + wl;
1190 }
1191};
1192
1193struct FirstSet {
1194 // First-Set: set of possible first bytes for an expression.
1195 // Used by PrioritizedChoice to skip alternatives that cannot match.
1196 std::bitset<256> chars; // byte values that can appear as the first byte
1197 bool can_be_empty = false; // true if the expression can match empty string
1198 bool any_char = false; // true if any character can appear (cannot filter)
1199 const char *first_literal = nullptr; // first literal for error reporting
1201 nullptr; // first token rule for error reporting
1202
1203 void merge(const FirstSet &other) {
1204 chars |= other.chars;
1205 if (other.can_be_empty) { can_be_empty = true; }
1206 if (other.any_char) { any_char = true; }
1207 // Note: first_literal/first_rule are NOT merged — per-alternative
1208 }
1209};
1210
1211class PrioritizedChoice : public Ope {
1212public:
1213 template <typename... Args>
1214 PrioritizedChoice(bool for_label, const Args &...args)
1215 : opes_{static_cast<std::shared_ptr<Ope>>(args)...},
1216 for_label_(for_label) {
1217 is_choice_like = true;
1218 }
1219 PrioritizedChoice(const std::vector<std::shared_ptr<Ope>> &opes)
1220 : opes_(opes) {
1221 is_choice_like = true;
1222 }
1223 PrioritizedChoice(std::vector<std::shared_ptr<Ope>> &&opes)
1224 : opes_(std::move(opes)) {
1225 is_choice_like = true;
1226 }
1227
1228 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1229 std::any &dt) const override {
1230 size_t len = static_cast<size_t>(-1);
1231
1232 if (!for_label_) { c.cut_stack.push_back(false); }
1233 auto se = scope_exit([&]() {
1234 if (!for_label_) { c.cut_stack.pop_back(); }
1235 });
1236
1237 size_t id = 0;
1238 for (const auto &ope : opes_) {
1239 // First-Set filtering: skip if next byte cannot start this alternative
1240 if (n > 0 && id < first_sets_.size()) {
1241 const auto &fs = first_sets_[id];
1242 if (!fs.any_char && !fs.can_be_empty &&
1243 !fs.chars.test(static_cast<unsigned char>(*s))) {
1244 if (c.log && (fs.first_literal || fs.first_rule)) {
1245 if (c.error_info.error_pos <= s) {
1246 if (c.error_info.error_pos < s || !(id > 0)) {
1247 c.error_info.error_pos = s;
1248 c.error_info.expected_tokens.clear();
1249 }
1250 if (fs.first_literal) {
1251 c.error_info.add(fs.first_literal, nullptr);
1252 } else {
1253 c.error_info.add(nullptr, fs.first_rule);
1254 }
1255 }
1256 }
1257 id++;
1258 continue;
1259 }
1260 }
1261
1262 if (!c.cut_stack.empty()) { c.cut_stack.back() = false; }
1263
1264 auto snap = c.snapshot(vs);
1266
1267 len = ope->parse(s, n, vs, c, dt);
1268
1269 if (success(len)) {
1270 vs.choice_count_ = opes_.size();
1271 vs.choice_ = id;
1272 break;
1273 }
1274
1275 c.rollback(vs, snap);
1276
1277 if (!c.cut_stack.empty() && c.cut_stack.back()) { break; }
1278
1279 id++;
1280 }
1281
1283 return len;
1284 }
1285
1286 void accept(Visitor &v) override;
1287
1288 size_t size() const { return opes_.size(); }
1289
1290 std::vector<std::shared_ptr<Ope>> opes_;
1291 bool for_label_ = false;
1292 std::vector<FirstSet> first_sets_;
1293};
1294
1295class Repetition : public Ope {
1296public:
1297 Repetition(const std::shared_ptr<Ope> &ope, size_t min, size_t max)
1298 : ope_(ope), min_(min), max_(max) {}
1299
1300 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1301 std::any &dt) const override {
1302 // ISpan fast path: tight loop for ASCII CharacterClass repetition.
1303 // Safe because each ASCII match is exactly 1 byte, so byte count == match
1304 // count.
1305 if (span_bitset_) {
1306 const auto &bitset = *span_bitset_;
1307 size_t i = 0;
1308 if (max_ == std::numeric_limits<size_t>::max()) {
1309 // Unbounded repetition (*, +): no per-iteration max check
1310 while (i < n && bitset.test(static_cast<unsigned char>(s[i]))) {
1311 i++;
1312 }
1313 } else {
1314 auto limit = std::min(n, max_);
1315 while (i < limit && bitset.test(static_cast<unsigned char>(s[i]))) {
1316 i++;
1317 }
1318 }
1319 if (i < min_) {
1320 c.set_error_pos(s + i);
1321 return static_cast<size_t>(-1);
1322 }
1323 return i;
1324 }
1325
1326 size_t count = 0;
1327 size_t i = 0;
1328 while (count < min_) {
1329 auto len = ope_->parse(s + i, n - i, vs, c, dt);
1330 if (fail(len)) { return len; }
1331 i += len;
1332 count++;
1333 }
1334
1335 while (count < max_) {
1336 auto snap = c.snapshot(vs);
1337 auto len = ope_->parse(s + i, n - i, vs, c, dt);
1338 if (fail(len)) {
1339 c.rollback(vs, snap);
1340 break;
1341 }
1342 i += len;
1343 count++;
1344 }
1345 return i;
1346 }
1347
1348 void accept(Visitor &v) override;
1349
1350 bool is_zom() const {
1351 return min_ == 0 && max_ == std::numeric_limits<size_t>::max();
1352 }
1353
1354 static std::shared_ptr<Repetition> zom(const std::shared_ptr<Ope> &ope) {
1355 return std::make_shared<Repetition>(ope, 0,
1356 std::numeric_limits<size_t>::max());
1357 }
1358
1359 static std::shared_ptr<Repetition> oom(const std::shared_ptr<Ope> &ope) {
1360 return std::make_shared<Repetition>(ope, 1,
1361 std::numeric_limits<size_t>::max());
1362 }
1363
1364 static std::shared_ptr<Repetition> opt(const std::shared_ptr<Ope> &ope) {
1365 return std::make_shared<Repetition>(ope, 0, 1);
1366 }
1367
1368 std::shared_ptr<Ope> ope_;
1369 size_t min_;
1370 size_t max_;
1371 const std::bitset<256> *span_bitset_ =
1372 nullptr; // non-owning, set by SetupFirstSets
1373};
1374
1375class AndPredicate : public Ope {
1376public:
1377 AndPredicate(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1378
1379 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1380 std::any &dt) const override {
1381 auto snap = c.snapshot(vs);
1382 auto len = ope_->parse(s, n, vs, c, dt);
1383 c.rollback(vs, snap); // Always rollback — predicates consume nothing
1384 if (success(len)) {
1385 return 0;
1386 } else {
1387 return len;
1388 }
1389 }
1390
1391 void accept(Visitor &v) override;
1392
1393 std::shared_ptr<Ope> ope_;
1394};
1395
1396class NotPredicate : public Ope {
1397public:
1398 NotPredicate(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1399
1400 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1401 std::any &dt) const override {
1402 auto snap = c.snapshot(vs);
1403 auto len = ope_->parse(s, n, vs, c, dt);
1404 c.rollback(vs, snap); // Always rollback — predicates consume nothing
1405 if (success(len)) {
1406 c.set_error_pos(s);
1407 return static_cast<size_t>(-1);
1408 } else {
1409 return 0;
1410 }
1411 }
1412
1413 void accept(Visitor &v) override;
1414
1415 std::shared_ptr<Ope> ope_;
1416};
1417
1418class Dictionary : public Ope, public std::enable_shared_from_this<Dictionary> {
1419public:
1420 Dictionary(const std::vector<std::string> &v, bool ignore_case)
1421 : trie_(v, ignore_case) {
1422 is_choice_like = true;
1423 }
1424
1425 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1426 std::any &dt) const override;
1427
1428 void accept(Visitor &v) override;
1429
1431};
1432
1433class LiteralString : public Ope,
1434 public std::enable_shared_from_this<LiteralString> {
1435public:
1436 LiteralString(std::string &&s, bool ignore_case)
1437 : lit_(std::move(s)), ignore_case_(ignore_case),
1438 lower_lit_(ignore_case ? to_lower(lit_) : std::string()),
1439 is_word_(false) {}
1440
1441 LiteralString(const std::string &s, bool ignore_case)
1442 : lit_(s), ignore_case_(ignore_case),
1443 lower_lit_(ignore_case ? to_lower(lit_) : std::string()),
1444 is_word_(false) {}
1445
1446 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1447 std::any &dt) const override;
1448
1449 void accept(Visitor &v) override;
1450
1451 std::string lit_;
1453 std::string lower_lit_; // pre-computed for ignore_case
1454 mutable std::once_flag init_is_word_;
1455 mutable bool is_word_;
1456};
1457
1458class CharacterClass : public Ope,
1459 public std::enable_shared_from_this<CharacterClass> {
1460public:
1461 CharacterClass(const std::string &s, bool negated, bool ignore_case)
1462 : negated_(negated), ignore_case_(ignore_case) {
1463 auto chars = decode(s.data(), s.length());
1464 auto i = 0u;
1465 while (i < chars.size()) {
1466 if (i + 2 < chars.size() && chars[i + 1] == '-') {
1467 auto cp1 = chars[i];
1468 auto cp2 = chars[i + 2];
1469 ranges_.emplace_back(std::pair(cp1, cp2));
1470 i += 3;
1471 } else {
1472 auto cp = chars[i];
1473 ranges_.emplace_back(std::pair(cp, cp));
1474 i += 1;
1475 }
1476 }
1477 assert(!ranges_.empty());
1479 }
1480
1481 CharacterClass(const std::vector<std::pair<char32_t, char32_t>> &ranges,
1482 bool negated, bool ignore_case)
1483 : ranges_(ranges), negated_(negated), ignore_case_(ignore_case) {
1484 assert(!ranges_.empty());
1486 }
1487
1488 size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1489 Context &c, std::any & /*dt*/) const override {
1490 if (n < 1) {
1491 c.set_error_pos(s);
1492 return static_cast<size_t>(-1);
1493 }
1494
1495 char32_t cp = 0;
1496 auto len = decode_codepoint(s, n, cp);
1497
1498 for (const auto &range : ranges_) {
1499 if (in_range(range, cp)) {
1500 if (negated_) {
1501 c.set_error_pos(s);
1502 return static_cast<size_t>(-1);
1503 } else {
1504 return len;
1505 }
1506 }
1507 }
1508
1509 if (negated_) {
1510 return len;
1511 } else {
1512 c.set_error_pos(s);
1513 return static_cast<size_t>(-1);
1514 }
1515 }
1516
1517 void accept(Visitor &v) override;
1518
1519 friend struct ComputeFirstSet;
1520
1521 bool is_ascii_only() const { return is_ascii_only_; }
1522 const std::bitset<256> &ascii_bitset() const { return ascii_bitset_; }
1523
1524private:
1525 bool in_range(const std::pair<char32_t, char32_t> &range, char32_t cp) const {
1526 if (ignore_case_) {
1527 auto cpl = std::tolower(cp);
1528 return std::tolower(range.first) <= cpl &&
1529 cpl <= std::tolower(range.second);
1530 } else {
1531 return range.first <= cp && cp <= range.second;
1532 }
1533 }
1534
1536 if (negated_) { return; } // negated classes can match non-ASCII
1537 for (const auto &[lo, hi] : ranges_) {
1538 if (lo > 0x7F || hi > 0x7F) { return; }
1539 }
1540 is_ascii_only_ = true;
1541 for (const auto &[lo, hi] : ranges_) {
1542 for (auto cp = lo; cp <= hi; cp++) {
1543 auto ch = static_cast<unsigned char>(cp);
1544 ascii_bitset_.set(ch);
1545 if (ignore_case_) {
1546 ascii_bitset_.set(static_cast<unsigned char>(std::toupper(ch)));
1547 ascii_bitset_.set(static_cast<unsigned char>(std::tolower(ch)));
1548 }
1549 }
1550 }
1551 }
1552
1553 std::vector<std::pair<char32_t, char32_t>> ranges_;
1556 std::bitset<256> ascii_bitset_;
1557 bool is_ascii_only_ = false;
1558};
1559
1560class Character : public Ope, public std::enable_shared_from_this<Character> {
1561public:
1562 Character(char32_t ch) : ch_(ch) {}
1563
1564 size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1565 Context &c, std::any & /*dt*/) const override {
1566 if (n < 1) {
1567 c.set_error_pos(s);
1568 return static_cast<size_t>(-1);
1569 }
1570
1571 char32_t cp = 0;
1572 auto len = decode_codepoint(s, n, cp);
1573
1574 if (cp != ch_) {
1575 c.set_error_pos(s);
1576 return static_cast<size_t>(-1);
1577 }
1578 return len;
1579 }
1580
1581 void accept(Visitor &v) override;
1582
1583 char32_t ch_;
1584};
1585
1586class AnyCharacter : public Ope,
1587 public std::enable_shared_from_this<AnyCharacter> {
1588public:
1589 size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1590 Context &c, std::any & /*dt*/) const override {
1591 auto len = codepoint_length(s, n);
1592 if (len < 1) {
1593 c.set_error_pos(s);
1594 return static_cast<size_t>(-1);
1595 }
1596 return len;
1597 }
1598
1599 void accept(Visitor &v) override;
1600};
1601
1602class CaptureScope : public Ope {
1603public:
1604 CaptureScope(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1605
1606 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1607 std::any &dt) const override {
1608 auto cap_snap = c.capture_entries.size();
1609 auto len = ope_->parse(s, n, vs, c, dt);
1610 c.capture_entries.resize(cap_snap); // Always rollback (isolation)
1611 return len;
1612 }
1613
1614 void accept(Visitor &v) override;
1615
1616 std::shared_ptr<Ope> ope_;
1617};
1618
1619class Capture : public Ope {
1620public:
1621 using MatchAction = std::function<void(const char *s, size_t n, Context &c)>;
1622
1623 Capture(const std::shared_ptr<Ope> &ope, MatchAction ma)
1624 : ope_(ope), match_action_(ma) {}
1625
1626 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1627 std::any &dt) const override {
1628 auto len = ope_->parse(s, n, vs, c, dt);
1629 if (success(len) && match_action_) { match_action_(s, len, c); }
1630 return len;
1631 }
1632
1633 void accept(Visitor &v) override;
1634
1635 std::shared_ptr<Ope> ope_;
1637};
1638
1639class TokenBoundary : public Ope {
1640public:
1641 TokenBoundary(const std::shared_ptr<Ope> &ope) : ope_(ope) {
1642 is_token_boundary = true;
1643 }
1644
1645 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1646 std::any &dt) const override;
1647
1648 void accept(Visitor &v) override;
1649
1650 std::shared_ptr<Ope> ope_;
1651};
1652
1653class Ignore : public Ope {
1654public:
1655 Ignore(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1656
1657 size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1658 Context &c, std::any &dt) const override {
1659 auto &chvs = c.push_semantic_values_scope();
1660 auto se = scope_exit([&]() { c.pop_semantic_values_scope(); });
1661 return ope_->parse(s, n, chvs, c, dt);
1662 }
1663
1664 void accept(Visitor &v) override;
1665
1666 std::shared_ptr<Ope> ope_;
1667};
1668
1669using Parser = std::function<size_t(const char *s, size_t n, SemanticValues &vs,
1670 std::any &dt)>;
1671
1672class User : public Ope {
1673public:
1674 User(Parser fn) : fn_(fn) {}
1675 size_t parse_core(const char *s, size_t n, SemanticValues &vs,
1676 Context & /*c*/, std::any &dt) const override {
1677 assert(fn_);
1678 return fn_(s, n, vs, dt);
1679 }
1680 void accept(Visitor &v) override;
1681 std::function<size_t(const char *s, size_t n, SemanticValues &vs,
1682 std::any &dt)>
1684};
1685
1686class WeakHolder : public Ope {
1687public:
1688 WeakHolder(const std::shared_ptr<Ope> &ope) : weak_(ope) {}
1689
1690 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1691 std::any &dt) const override {
1692 auto ope = weak_.lock();
1693 assert(ope);
1694 return ope->parse(s, n, vs, c, dt);
1695 }
1696
1697 void accept(Visitor &v) override;
1698
1699 std::weak_ptr<Ope> weak_;
1700};
1701
1702class Holder : public Ope {
1703public:
1704 Holder(Definition *outer) : outer_(outer) {}
1705
1706 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1707 std::any &dt) const override;
1708
1709 void accept(Visitor &v) override;
1710
1711 std::any reduce(SemanticValues &vs, std::any &dt,
1712 const std::any &predicate_data) const;
1713
1714 const std::string &name() const;
1715 const std::string &trace_name() const;
1716
1717 std::shared_ptr<Ope> ope_;
1719 mutable std::once_flag trace_name_init_;
1720 mutable std::string trace_name_;
1721
1722 friend class Definition;
1723};
1724
1725using Grammar = std::unordered_map<std::string, Definition>;
1726
1727class Reference : public Ope, public std::enable_shared_from_this<Reference> {
1728public:
1729 Reference(const Grammar &grammar, const std::string &name, const char *s,
1730 bool is_macro, const std::vector<std::shared_ptr<Ope>> &args)
1731 : grammar_(grammar), name_(name), s_(s), is_macro_(is_macro), args_(args),
1732 rule_(nullptr), iarg_(0) {}
1733
1734 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1735 std::any &dt) const override;
1736
1737 void accept(Visitor &v) override;
1738
1739 std::shared_ptr<Ope> get_core_operator() const;
1740
1742 const std::string name_;
1743 const char *s_;
1744
1745 const bool is_macro_;
1746 const std::vector<std::shared_ptr<Ope>> args_;
1747
1749 size_t iarg_;
1750};
1751
1752class Whitespace : public Ope {
1753public:
1754 Whitespace(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1755
1756 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1757 std::any &dt) const override {
1758 if (c.in_whitespace) { return 0; }
1759 c.in_whitespace = true;
1760 auto se = scope_exit([&]() { c.in_whitespace = false; });
1761 return ope_->parse(s, n, vs, c, dt);
1762 }
1763
1764 void accept(Visitor &v) override;
1765
1766 std::shared_ptr<Ope> ope_;
1767};
1768
1769class BackReference : public Ope {
1770public:
1771 BackReference(std::string &&name) : name_(std::move(name)) {}
1772
1773 BackReference(const std::string &name) : name_(name) {}
1774
1775 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1776 std::any &dt) const override;
1777
1778 void accept(Visitor &v) override;
1779
1780 std::string name_;
1781};
1782
1783class PrecedenceClimbing : public Ope {
1784public:
1785 using BinOpeInfo = std::map<std::string_view, std::pair<size_t, char>>;
1786
1787 PrecedenceClimbing(const std::shared_ptr<Ope> &atom,
1788 const std::shared_ptr<Ope> &binop, const BinOpeInfo &info,
1789 const Definition &rule)
1790 : atom_(atom), binop_(binop), info_(info), rule_(rule) {}
1791
1792 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1793 std::any &dt) const override {
1794 return parse_expression(s, n, vs, c, dt, 0);
1795 }
1796
1797 void accept(Visitor &v) override;
1798
1799 std::shared_ptr<Ope> atom_;
1800 std::shared_ptr<Ope> binop_;
1803
1804private:
1805 size_t parse_expression(const char *s, size_t n, SemanticValues &vs,
1806 Context &c, std::any &dt, size_t min_prec) const;
1807
1809};
1810
1811class Recovery : public Ope {
1812public:
1813 Recovery(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1814
1815 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1816 std::any &dt) const override;
1817
1818 void accept(Visitor &v) override;
1819
1820 std::shared_ptr<Ope> ope_;
1821};
1822
1823class Cut : public Ope, public std::enable_shared_from_this<Cut> {
1824public:
1825 size_t parse_core(const char * /*s*/, size_t /*n*/, SemanticValues & /*vs*/,
1826 Context &c, std::any & /*dt*/) const override {
1827 if (!c.cut_stack.empty()) { c.cut_stack.back() = true; }
1828 return 0;
1829 }
1830
1831 void accept(Visitor &v) override;
1832};
1833
1834/*
1835 * Factories
1836 */
1837template <typename... Args> std::shared_ptr<Ope> seq(Args &&...args) {
1838 return std::make_shared<Sequence>(static_cast<std::shared_ptr<Ope>>(args)...);
1839}
1840
1841template <typename... Args> std::shared_ptr<Ope> cho(Args &&...args) {
1842 return std::make_shared<PrioritizedChoice>(
1843 false, static_cast<std::shared_ptr<Ope>>(args)...);
1844}
1845
1846template <typename... Args> std::shared_ptr<Ope> cho4label_(Args &&...args) {
1847 return std::make_shared<PrioritizedChoice>(
1848 true, static_cast<std::shared_ptr<Ope>>(args)...);
1849}
1850
1851inline std::shared_ptr<Ope> zom(const std::shared_ptr<Ope> &ope) {
1852 return Repetition::zom(ope);
1853}
1854
1855inline std::shared_ptr<Ope> oom(const std::shared_ptr<Ope> &ope) {
1856 return Repetition::oom(ope);
1857}
1858
1859inline std::shared_ptr<Ope> opt(const std::shared_ptr<Ope> &ope) {
1860 return Repetition::opt(ope);
1861}
1862
1863inline std::shared_ptr<Ope> rep(const std::shared_ptr<Ope> &ope, size_t min,
1864 size_t max) {
1865 return std::make_shared<Repetition>(ope, min, max);
1866}
1867
1868inline std::shared_ptr<Ope> apd(const std::shared_ptr<Ope> &ope) {
1869 return std::make_shared<AndPredicate>(ope);
1870}
1871
1872inline std::shared_ptr<Ope> npd(const std::shared_ptr<Ope> &ope) {
1873 return std::make_shared<NotPredicate>(ope);
1874}
1875
1876inline std::shared_ptr<Ope> dic(const std::vector<std::string> &v,
1877 bool ignore_case) {
1878 return std::make_shared<Dictionary>(v, ignore_case);
1879}
1880
1881inline std::shared_ptr<Ope> lit(std::string &&s) {
1882 return std::make_shared<LiteralString>(s, false);
1883}
1884
1885inline std::shared_ptr<Ope> liti(std::string &&s) {
1886 return std::make_shared<LiteralString>(s, true);
1887}
1888
1889inline std::shared_ptr<Ope> cls(const std::string &s) {
1890 return std::make_shared<CharacterClass>(s, false, false);
1891}
1892
1893inline std::shared_ptr<Ope>
1894cls(const std::vector<std::pair<char32_t, char32_t>> &ranges,
1895 bool ignore_case = false) {
1896 return std::make_shared<CharacterClass>(ranges, false, ignore_case);
1897}
1898
1899inline std::shared_ptr<Ope> ncls(const std::string &s) {
1900 return std::make_shared<CharacterClass>(s, true, false);
1901}
1902
1903inline std::shared_ptr<Ope>
1904ncls(const std::vector<std::pair<char32_t, char32_t>> &ranges,
1905 bool ignore_case = false) {
1906 return std::make_shared<CharacterClass>(ranges, true, ignore_case);
1907}
1908
1909inline std::shared_ptr<Ope> chr(char32_t dt) {
1910 return std::make_shared<Character>(dt);
1911}
1912
1913inline std::shared_ptr<Ope> dot() { return std::make_shared<AnyCharacter>(); }
1914
1915inline std::shared_ptr<Ope> csc(const std::shared_ptr<Ope> &ope) {
1916 return std::make_shared<CaptureScope>(ope);
1917}
1918
1919inline std::shared_ptr<Ope> cap(const std::shared_ptr<Ope> &ope,
1921 return std::make_shared<Capture>(ope, ma);
1922}
1923
1924inline std::shared_ptr<Ope> tok(const std::shared_ptr<Ope> &ope) {
1925 return std::make_shared<TokenBoundary>(ope);
1926}
1927
1928inline std::shared_ptr<Ope> ign(const std::shared_ptr<Ope> &ope) {
1929 return std::make_shared<Ignore>(ope);
1930}
1931
1932inline std::shared_ptr<Ope>
1933usr(std::function<size_t(const char *s, size_t n, SemanticValues &vs,
1934 std::any &dt)>
1935 fn) {
1936 return std::make_shared<User>(fn);
1937}
1938
1939inline std::shared_ptr<Ope> ref(const Grammar &grammar, const std::string &name,
1940 const char *s, bool is_macro,
1941 const std::vector<std::shared_ptr<Ope>> &args) {
1942 return std::make_shared<Reference>(grammar, name, s, is_macro, args);
1943}
1944
1945inline std::shared_ptr<Ope> wsp(const std::shared_ptr<Ope> &ope) {
1946 return std::make_shared<Whitespace>(std::make_shared<Ignore>(ope));
1947}
1948
1949inline std::shared_ptr<Ope> bkr(std::string &&name) {
1950 return std::make_shared<BackReference>(name);
1951}
1952
1953inline std::shared_ptr<Ope> pre(const std::shared_ptr<Ope> &atom,
1954 const std::shared_ptr<Ope> &binop,
1956 const Definition &rule) {
1957 return std::make_shared<PrecedenceClimbing>(atom, binop, info, rule);
1958}
1959
1960inline std::shared_ptr<Ope> rec(const std::shared_ptr<Ope> &ope) {
1961 return std::make_shared<Recovery>(ope);
1962}
1963
1964inline std::shared_ptr<Ope> cut() { return std::make_shared<Cut>(); }
1965
1966/*
1967 * Visitor
1968 */
1970 virtual ~Visitor() {}
1971 virtual void visit(Sequence &) {}
1972 virtual void visit(PrioritizedChoice &) {}
1973 virtual void visit(Repetition &) {}
1974 virtual void visit(AndPredicate &) {}
1975 virtual void visit(NotPredicate &) {}
1976 virtual void visit(Dictionary &) {}
1977 virtual void visit(LiteralString &) {}
1978 virtual void visit(CharacterClass &) {}
1979 virtual void visit(Character &) {}
1980 virtual void visit(AnyCharacter &) {}
1981 virtual void visit(CaptureScope &) {}
1982 virtual void visit(Capture &) {}
1983 virtual void visit(TokenBoundary &) {}
1984 virtual void visit(Ignore &) {}
1985 virtual void visit(User &) {}
1986 virtual void visit(WeakHolder &) {}
1987 virtual void visit(Holder &) {}
1988 virtual void visit(Reference &) {}
1989 virtual void visit(Whitespace &) {}
1990 virtual void visit(BackReference &) {}
1991 virtual void visit(PrecedenceClimbing &) {}
1992 virtual void visit(Recovery &) {}
1993 virtual void visit(Cut &) {}
1994};
1995
1998 void visit(Sequence &ope) override {
1999 for (auto &op : ope.opes_) {
2000 op->accept(*this);
2001 }
2002 }
2003 void visit(PrioritizedChoice &ope) override {
2004 for (auto &op : ope.opes_) {
2005 op->accept(*this);
2006 }
2007 }
2008 void visit(Repetition &ope) override { ope.ope_->accept(*this); }
2009 void visit(AndPredicate &ope) override { ope.ope_->accept(*this); }
2010 void visit(NotPredicate &ope) override { ope.ope_->accept(*this); }
2011 void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
2012 void visit(Capture &ope) override { ope.ope_->accept(*this); }
2013 void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
2014 void visit(Ignore &ope) override { ope.ope_->accept(*this); }
2015 void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
2016 void visit(Holder &ope) override { ope.ope_->accept(*this); }
2017 void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
2018 void visit(Recovery &ope) override { ope.ope_->accept(*this); }
2019 void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); }
2020};
2021
2023 using Ope::Visitor::visit;
2024
2025 void visit(Sequence &) override { name_ = "Sequence"; }
2026 void visit(PrioritizedChoice &) override { name_ = "PrioritizedChoice"; }
2027 void visit(Repetition &) override { name_ = "Repetition"; }
2028 void visit(AndPredicate &) override { name_ = "AndPredicate"; }
2029 void visit(NotPredicate &) override { name_ = "NotPredicate"; }
2030 void visit(Dictionary &) override { name_ = "Dictionary"; }
2031 void visit(LiteralString &) override { name_ = "LiteralString"; }
2032 void visit(CharacterClass &) override { name_ = "CharacterClass"; }
2033 void visit(Character &) override { name_ = "Character"; }
2034 void visit(AnyCharacter &) override { name_ = "AnyCharacter"; }
2035 void visit(CaptureScope &) override { name_ = "CaptureScope"; }
2036 void visit(Capture &) override { name_ = "Capture"; }
2037 void visit(TokenBoundary &) override { name_ = "TokenBoundary"; }
2038 void visit(Ignore &) override { name_ = "Ignore"; }
2039 void visit(User &) override { name_ = "User"; }
2040 void visit(WeakHolder &) override { name_ = "WeakHolder"; }
2041 void visit(Holder &ope) override { name_ = ope.trace_name().data(); }
2042 void visit(Reference &) override { name_ = "Reference"; }
2043 void visit(Whitespace &) override { name_ = "Whitespace"; }
2044 void visit(BackReference &) override { name_ = "BackReference"; }
2045 void visit(PrecedenceClimbing &) override { name_ = "PrecedenceClimbing"; }
2046 void visit(Recovery &) override { name_ = "Recovery"; }
2047 void visit(Cut &) override { name_ = "Cut"; }
2048
2049 static std::string get(Ope &ope) {
2050 TraceOpeName vis;
2051 ope.accept(vis);
2052 return vis.name_;
2053 }
2054
2055private:
2056 const char *name_ = nullptr;
2057};
2058
2061
2062 void visit(Holder &ope) override;
2063 void visit(Reference &ope) override;
2064 void visit(PrecedenceClimbing &ope) override;
2065
2066 std::unordered_map<void *, size_t> ids;
2067};
2068
2070 using Ope::Visitor::visit;
2071
2072 void visit(PrioritizedChoice &ope) override {
2073 for (const auto &op : ope.opes_) {
2074 if (!IsLiteralToken::check(*op)) { return; }
2075 }
2076 result_ = true;
2077 }
2078
2079 void visit(Dictionary &) override { result_ = true; }
2080 void visit(LiteralString &) override { result_ = true; }
2081
2082 static bool check(Ope &ope) {
2083 IsLiteralToken vis;
2084 ope.accept(vis);
2085 return vis.result_;
2086 }
2087
2088private:
2089 bool result_ = false;
2090};
2091
2094
2095 void visit(TokenBoundary &) override { has_token_boundary_ = true; }
2096 void visit(AndPredicate &) override {}
2097 void visit(NotPredicate &) override {}
2098 void visit(WeakHolder &) override { has_rule_ = true; }
2099 void visit(Reference &ope) override;
2100
2101 static bool is_token(Ope &ope) {
2102 if (IsLiteralToken::check(ope)) { return true; }
2103
2104 TokenChecker vis;
2105 ope.accept(vis);
2106 return vis.has_token_boundary_ || !vis.has_rule_;
2107 }
2108
2109private:
2111 bool has_rule_ = false;
2112};
2113
2115 using Ope::Visitor::visit;
2116
2117 void visit(LiteralString &ope) override { token_ = ope.lit_.data(); }
2118 void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
2119 void visit(Ignore &ope) override { ope.ope_->accept(*this); }
2120 void visit(Reference &ope) override;
2121 void visit(Recovery &ope) override { ope.ope_->accept(*this); }
2122
2123 static const char *token(Ope &ope) {
2124 FindLiteralToken vis;
2125 ope.accept(vis);
2126 return vis.token_;
2127 }
2128
2129private:
2130 const char *token_ = nullptr;
2131};
2132
2135
2136 DetectLeftRecursion(const std::string &name) : name_(name) {}
2137
2138 void visit(Sequence &ope) override {
2139 for (const auto &op : ope.opes_) {
2140 op->accept(*this);
2141 if (done_) {
2142 break;
2143 } else if (error_s) {
2144 done_ = true;
2145 break;
2146 }
2147 }
2148 }
2149 void visit(PrioritizedChoice &ope) override {
2150 for (const auto &op : ope.opes_) {
2151 op->accept(*this);
2152 if (error_s) {
2153 done_ = true;
2154 break;
2155 }
2156 }
2157 }
2158 void visit(Repetition &ope) override {
2159 ope.ope_->accept(*this);
2160 done_ = ope.min_ > 0;
2161 }
2162 void visit(AndPredicate &ope) override {
2163 ope.ope_->accept(*this);
2164 done_ = false;
2165 }
2166 void visit(NotPredicate &ope) override {
2167 ope.ope_->accept(*this);
2168 done_ = false;
2169 }
2170 void visit(Dictionary &) override { done_ = true; }
2171 void visit(LiteralString &ope) override { done_ = !ope.lit_.empty(); }
2172 void visit(CharacterClass &) override { done_ = true; }
2173 void visit(Character &) override { done_ = true; }
2174 void visit(AnyCharacter &) override { done_ = true; }
2175 void visit(User &) override { done_ = true; }
2176 void visit(Reference &ope) override;
2177 void visit(BackReference &) override { done_ = true; }
2178 void visit(Cut &) override { done_ = true; }
2179
2180 const char *error_s = nullptr;
2181
2182 std::shared_ptr<Ope> resolve_macro_arg(size_t iarg) const;
2183
2184private:
2185 std::string name_;
2186 std::unordered_set<std::string> refs_;
2187 bool done_ = false;
2188 std::vector<const std::vector<std::shared_ptr<Ope>> *> macro_args_stack_;
2189};
2190
2193
2194 bool result = false;
2195
2196 void visit(Sequence &ope) override {
2197 result = std::all_of(ope.opes_.begin(), ope.opes_.end(), [](auto &op) {
2198 ComputeCanBeEmpty vis;
2199 op->accept(vis);
2200 return vis.result;
2201 });
2202 }
2203 void visit(PrioritizedChoice &ope) override {
2204 result = std::any_of(ope.opes_.begin(), ope.opes_.end(), [](auto &op) {
2205 ComputeCanBeEmpty vis;
2206 op->accept(vis);
2207 return vis.result;
2208 });
2209 }
2210 void visit(Repetition &ope) override { result = ope.min_ == 0; }
2211 void visit(AndPredicate &) override { result = true; }
2212 void visit(NotPredicate &) override { result = true; }
2213 void visit(Dictionary &) override { result = false; }
2214 void visit(LiteralString &ope) override { result = ope.lit_.empty(); }
2215 void visit(CharacterClass &) override { result = false; }
2216 void visit(Character &) override { result = false; }
2217 void visit(AnyCharacter &) override { result = false; }
2218 void visit(User &) override { result = false; }
2219 void visit(Reference &ope) override;
2220 void visit(BackReference &) override { result = false; }
2221 void visit(Cut &) override { result = false; }
2222};
2223
2226
2227 HasEmptyElement(std::vector<std::pair<const char *, std::string>> &refs,
2228 std::unordered_map<std::string, bool> &has_error_cache)
2229 : refs_(refs), has_error_cache_(has_error_cache) {}
2230
2231 void visit(Sequence &ope) override;
2232 void visit(PrioritizedChoice &ope) override {
2233 for (const auto &op : ope.opes_) {
2234 op->accept(*this);
2235 if (is_empty) { return; }
2236 }
2237 }
2238 void visit(Repetition &ope) override {
2239 if (ope.min_ == 0) {
2240 set_error();
2241 } else {
2242 ope.ope_->accept(*this);
2243 }
2244 }
2245 void visit(AndPredicate &) override { set_error(); }
2246 void visit(NotPredicate &) override { set_error(); }
2247 void visit(LiteralString &ope) override {
2248 if (ope.lit_.empty()) { set_error(); }
2249 }
2250 void visit(Reference &ope) override;
2251
2252 bool is_empty = false;
2253 const char *error_s = nullptr;
2254 std::string error_name;
2255
2256private:
2257 void set_error() {
2258 is_empty = true;
2259 tie(error_s, error_name) = refs_.back();
2260 }
2261 std::vector<std::pair<const char *, std::string>> &refs_;
2262 std::unordered_map<std::string, bool> &has_error_cache_;
2263};
2264
2267
2268 DetectInfiniteLoop(const char *s, const std::string &name,
2269 std::vector<std::pair<const char *, std::string>> &refs,
2270 std::unordered_map<std::string, bool> &has_error_cache)
2271 : refs_(refs), has_error_cache_(has_error_cache) {
2272 refs_.emplace_back(s, name);
2273 }
2274
2275 DetectInfiniteLoop(std::vector<std::pair<const char *, std::string>> &refs,
2276 std::unordered_map<std::string, bool> &has_error_cache)
2277 : refs_(refs), has_error_cache_(has_error_cache) {}
2278
2279 void visit(Sequence &ope) override {
2280 for (const auto &op : ope.opes_) {
2281 op->accept(*this);
2282 if (has_error) { return; }
2283 }
2284 }
2285 void visit(PrioritizedChoice &ope) override {
2286 for (const auto &op : ope.opes_) {
2287 op->accept(*this);
2288 if (has_error) { return; }
2289 }
2290 }
2291 void visit(Repetition &ope) override {
2292 if (ope.max_ == std::numeric_limits<size_t>::max()) {
2294 ope.ope_->accept(vis);
2295 if (vis.is_empty) {
2296 has_error = true;
2297 error_s = vis.error_s;
2298 error_name = vis.error_name;
2299 }
2300 } else {
2301 ope.ope_->accept(*this);
2302 }
2303 }
2304 void visit(Reference &ope) override;
2305
2306 bool has_error = false;
2307 const char *error_s = nullptr;
2308 std::string error_name;
2309
2310private:
2311 std::vector<std::pair<const char *, std::string>> &refs_;
2312 std::unordered_map<std::string, bool> &has_error_cache_;
2313};
2314
2317
2319 const std::vector<std::string> &params)
2320 : grammar_(grammar), params_(params) {}
2321
2322 void visit(Reference &ope) override;
2323
2324 std::unordered_map<std::string, const char *> error_s;
2325 std::unordered_map<std::string, std::string> error_message;
2326 std::unordered_set<std::string> referenced;
2327
2328private:
2330 const std::vector<std::string> &params_;
2331};
2332
2335
2336 LinkReferences(Grammar &grammar, const std::vector<std::string> &params)
2337 : grammar_(grammar), params_(params) {}
2338
2339 void visit(Reference &ope) override;
2340
2341private:
2343 const std::vector<std::string> &params_;
2344};
2345
2347 using Ope::Visitor::visit;
2348
2349 FindReference(const std::vector<std::shared_ptr<Ope>> &args,
2350 const std::vector<std::string> &params)
2351 : args_(args), params_(params) {}
2352
2353 void visit(Sequence &ope) override {
2354 std::vector<std::shared_ptr<Ope>> opes;
2355 for (const auto &o : ope.opes_) {
2356 o->accept(*this);
2357 opes.emplace_back(std::move(found_ope));
2358 }
2359 found_ope = std::make_shared<Sequence>(opes);
2360 }
2361 void visit(PrioritizedChoice &ope) override {
2362 std::vector<std::shared_ptr<Ope>> opes;
2363 for (const auto &o : ope.opes_) {
2364 o->accept(*this);
2365 opes.emplace_back(std::move(found_ope));
2366 }
2367 found_ope = std::make_shared<PrioritizedChoice>(opes);
2368 }
2369 void visit(Repetition &ope) override {
2370 ope.ope_->accept(*this);
2371 found_ope = rep(found_ope, ope.min_, ope.max_);
2372 }
2373 void visit(AndPredicate &ope) override {
2374 ope.ope_->accept(*this);
2376 }
2377 void visit(NotPredicate &ope) override {
2378 ope.ope_->accept(*this);
2380 }
2381 void visit(Dictionary &ope) override { found_ope = ope.shared_from_this(); }
2382 void visit(LiteralString &ope) override {
2383 found_ope = ope.shared_from_this();
2384 }
2385 void visit(CharacterClass &ope) override {
2386 found_ope = ope.shared_from_this();
2387 }
2388 void visit(Character &ope) override { found_ope = ope.shared_from_this(); }
2389 void visit(AnyCharacter &ope) override { found_ope = ope.shared_from_this(); }
2390 void visit(CaptureScope &ope) override {
2391 ope.ope_->accept(*this);
2393 }
2394 void visit(Capture &ope) override {
2395 ope.ope_->accept(*this);
2397 }
2398 void visit(TokenBoundary &ope) override {
2399 ope.ope_->accept(*this);
2401 }
2402 void visit(Ignore &ope) override {
2403 ope.ope_->accept(*this);
2405 }
2406 void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
2407 void visit(Holder &ope) override { ope.ope_->accept(*this); }
2408 void visit(Reference &ope) override;
2409 void visit(Whitespace &ope) override {
2410 ope.ope_->accept(*this);
2412 }
2413 void visit(PrecedenceClimbing &ope) override {
2414 ope.atom_->accept(*this);
2416 }
2417 void visit(Recovery &ope) override {
2418 ope.ope_->accept(*this);
2420 }
2421 void visit(Cut &ope) override { found_ope = ope.shared_from_this(); }
2422
2423 std::shared_ptr<Ope> found_ope;
2424
2425private:
2426 const std::vector<std::shared_ptr<Ope>> &args_;
2427 const std::vector<std::string> &params_;
2428};
2429
2430/*
2431 * First-Set computation
2432 */
2435
2436 void visit(Sequence &ope) override {
2437 for (const auto &op : ope.opes_) {
2438 FirstSet element_fs;
2439 auto save = result_;
2440 result_ = FirstSet{};
2441 op->accept(*this);
2442 element_fs = result_;
2443 result_ = save;
2444 result_.chars |= element_fs.chars;
2445 if (element_fs.any_char) { result_.any_char = true; }
2446 if (!result_.first_literal) {
2447 result_.first_literal = element_fs.first_literal;
2448 }
2449 if (!result_.first_rule) { result_.first_rule = element_fs.first_rule; }
2450 if (!element_fs.can_be_empty) { return; }
2451 // This element can be empty, continue to next
2452 }
2453 result_.can_be_empty = true;
2454 }
2455 void visit(PrioritizedChoice &ope) override {
2456 auto save = result_;
2457 for (const auto &op : ope.opes_) {
2458 result_ = FirstSet{};
2459 op->accept(*this);
2460 save.merge(result_);
2461 }
2462 result_ = save;
2463 }
2464 void visit(Repetition &ope) override {
2465 ope.ope_->accept(*this);
2466 if (ope.min_ == 0) { result_.can_be_empty = true; }
2467 }
2468 void visit(AndPredicate &) override { result_.can_be_empty = true; }
2469 void visit(NotPredicate &) override { result_.can_be_empty = true; }
2470 void visit(Dictionary &ope) override {
2471 for (const auto &[key, info] : ope.trie_.dic_) {
2472 if (!key.empty()) {
2473 auto ch = static_cast<unsigned char>(key[0]);
2474 result_.chars.set(ch);
2475 if (ope.trie_.ignore_case_) {
2476 result_.chars.set(static_cast<unsigned char>(std::toupper(ch)));
2477 result_.chars.set(static_cast<unsigned char>(std::tolower(ch)));
2478 }
2479 }
2480 }
2481 }
2482 void visit(LiteralString &ope) override {
2483 if (ope.lit_.empty()) {
2484 result_.can_be_empty = true;
2485 } else {
2486 auto ch = static_cast<unsigned char>(ope.lit_[0]);
2487 result_.chars.set(ch);
2488 if (ope.ignore_case_) {
2489 result_.chars.set(static_cast<unsigned char>(std::toupper(ch)));
2490 result_.chars.set(static_cast<unsigned char>(std::tolower(ch)));
2491 }
2492 if (!result_.first_literal) { result_.first_literal = ope.lit_.c_str(); }
2493 }
2494 }
2495 void visit(CharacterClass &ope) override {
2496 for (const auto &range : ope.ranges_) {
2497 auto cp1 = range.first;
2498 auto cp2 = range.second;
2499 if (cp1 > 0x7F || cp2 > 0x7F) {
2500 // Non-ASCII range: conservative fallback
2501 result_.any_char = true;
2502 return;
2503 }
2504 for (auto cp = cp1; cp <= cp2; cp++) {
2505 auto ch = static_cast<unsigned char>(cp);
2506 result_.chars.set(ch);
2507 if (ope.ignore_case_) {
2508 result_.chars.set(static_cast<unsigned char>(std::toupper(ch)));
2509 result_.chars.set(static_cast<unsigned char>(std::tolower(ch)));
2510 }
2511 }
2512 }
2513 if (ope.negated_) {
2514 result_.chars.flip();
2515 result_.any_char = true; // negated class can match non-ASCII
2516 }
2517 }
2518 void visit(Character &ope) override {
2519 if (ope.ch_ > 0x7F) {
2520 result_.any_char = true;
2521 } else {
2522 result_.chars.set(static_cast<unsigned char>(ope.ch_));
2523 }
2524 }
2525 void visit(AnyCharacter &) override { result_.any_char = true; }
2526 void visit(User &) override { result_.any_char = true; }
2527 void visit(Reference &ope) override;
2528 void visit(BackReference &) override { result_.any_char = true; }
2529 void visit(Cut &) override { result_.can_be_empty = true; }
2530
2531 // Per-rule cache shared across a SetupFirstSets traversal. Without it,
2532 // every alternative of every PrioritizedChoice re-walks referenced
2533 // rules — O(refs^depth) work for grammars with many cross-references.
2534 // Only cycle-free rule computations are cached; results computed under
2535 // a cycle (left recursion) would be incomplete and unsafe to reuse from
2536 // a different call context.
2537 using FirstSetCache = std::unordered_map<const Definition *, FirstSet>;
2538
2539 explicit ComputeFirstSet(FirstSetCache &cache) : cache_(cache) {}
2540
2542
2543private:
2545 std::unordered_set<const Definition *> refs_;
2546 size_t cycle_count_ = 0;
2547};
2548
2551
2552 void visit(Sequence &ope) override;
2554
2555 void visit(PrioritizedChoice &ope) override {
2556 ope.first_sets_.clear();
2557 ope.first_sets_.reserve(ope.opes_.size());
2558 for (const auto &op : ope.opes_) {
2560 op->accept(cfs);
2561 ope.first_sets_.push_back(cfs.result_);
2562 }
2563 for (const auto &op : ope.opes_) {
2564 op->accept(*this);
2565 }
2566 }
2567 void visit(Repetition &ope) override {
2568 ope.ope_->accept(*this);
2569 // ISpan optimization: detect Repetition + ASCII CharacterClass
2570 auto cc = dynamic_cast<CharacterClass *>(ope.ope_.get());
2571 if (cc && cc->is_ascii_only()) { ope.span_bitset_ = &cc->ascii_bitset(); }
2572 }
2573 void visit(Reference &ope) override;
2574
2575private:
2577 std::unordered_set<const Definition *> visited_rules_;
2578};
2579
2580/*
2581 * Keywords
2582 */
2583static const char *WHITESPACE_DEFINITION_NAME = "%whitespace";
2584static const char *WORD_DEFINITION_NAME = "%word";
2585static const char *RECOVER_DEFINITION_NAME = "%recover";
2586
2587/*
2588 * Definition
2589 */
2591public:
2592 struct Result {
2593 bool ret;
2595 size_t len;
2597 };
2598
2599 Definition() : holder_(std::make_shared<Holder>(this)) {}
2600
2601 Definition(const Definition &rhs) : name(rhs.name), holder_(rhs.holder_) {
2602 holder_->outer_ = this;
2603 }
2604
2605 Definition(const std::shared_ptr<Ope> &ope)
2606 : holder_(std::make_shared<Holder>(this)) {
2607 *this <= ope;
2608 }
2609
2610 operator std::shared_ptr<Ope>() {
2611 return std::make_shared<WeakHolder>(holder_);
2612 }
2613
2614 Definition &operator<=(const std::shared_ptr<Ope> &ope) {
2615 holder_->ope_ = ope;
2616 return *this;
2617 }
2618
2619 Result parse(const char *s, size_t n, const char *path = nullptr,
2620 Log log = nullptr) const {
2621 SemanticValues vs;
2622 std::any dt;
2623 return parse_core(s, n, vs, dt, path, log);
2624 }
2625
2626 Result parse(const char *s, const char *path = nullptr,
2627 Log log = nullptr) const {
2628 auto n = strlen(s);
2629 return parse(s, n, path, log);
2630 }
2631
2632 Result parse(const char *s, size_t n, std::any &dt,
2633 const char *path = nullptr, Log log = nullptr) const {
2634 SemanticValues vs;
2635 return parse_core(s, n, vs, dt, path, log);
2636 }
2637
2638 Result parse(const char *s, std::any &dt, const char *path = nullptr,
2639 Log log = nullptr) const {
2640 auto n = strlen(s);
2641 return parse(s, n, dt, path, log);
2642 }
2643
2644 template <typename T>
2645 Result parse_and_get_value(const char *s, size_t n, T &val,
2646 const char *path = nullptr,
2647 Log log = nullptr) const {
2648 SemanticValues vs;
2649 std::any dt;
2650 auto r = parse_core(s, n, vs, dt, path, log);
2651 if (r.ret && !vs.empty() && vs.front().has_value()) {
2652 val = std::any_cast<T>(vs[0]);
2653 }
2654 return r;
2655 }
2656
2657 template <typename T>
2658 Result parse_and_get_value(const char *s, T &val, const char *path = nullptr,
2659 Log log = nullptr) const {
2660 auto n = strlen(s);
2661 return parse_and_get_value(s, n, val, path, log);
2662 }
2663
2664 template <typename T>
2665 Result parse_and_get_value(const char *s, size_t n, std::any &dt, T &val,
2666 const char *path = nullptr,
2667 Log log = nullptr) const {
2668 SemanticValues vs;
2669 auto r = parse_core(s, n, vs, dt, path, log);
2670 if (r.ret && !vs.empty() && vs.front().has_value()) {
2671 val = std::any_cast<T>(vs[0]);
2672 }
2673 return r;
2674 }
2675
2676 template <typename T>
2677 Result parse_and_get_value(const char *s, std::any &dt, T &val,
2678 const char *path = nullptr,
2679 Log log = nullptr) const {
2680 auto n = strlen(s);
2681 return parse_and_get_value(s, n, dt, val, path, log);
2682 }
2683
2684#if defined(__cpp_lib_char8_t)
2685 Result parse(const char8_t *s, size_t n, const char *path = nullptr,
2686 Log log = nullptr) const {
2687 return parse(reinterpret_cast<const char *>(s), n, path, log);
2688 }
2689
2690 Result parse(const char8_t *s, const char *path = nullptr,
2691 Log log = nullptr) const {
2692 return parse(reinterpret_cast<const char *>(s), path, log);
2693 }
2694
2695 Result parse(const char8_t *s, size_t n, std::any &dt,
2696 const char *path = nullptr, Log log = nullptr) const {
2697 return parse(reinterpret_cast<const char *>(s), n, dt, path, log);
2698 }
2699
2700 Result parse(const char8_t *s, std::any &dt, const char *path = nullptr,
2701 Log log = nullptr) const {
2702 return parse(reinterpret_cast<const char *>(s), dt, path, log);
2703 }
2704
2705 template <typename T>
2706 Result parse_and_get_value(const char8_t *s, size_t n, T &val,
2707 const char *path = nullptr,
2708 Log log = nullptr) const {
2709 return parse_and_get_value(reinterpret_cast<const char *>(s), n, val, path,
2710 log);
2711 }
2712
2713 template <typename T>
2714 Result parse_and_get_value(const char8_t *s, T &val,
2715 const char *path = nullptr,
2716 Log log = nullptr) const {
2717 return parse_and_get_value(reinterpret_cast<const char *>(s), val, path,
2718 log);
2719 }
2720
2721 template <typename T>
2722 Result parse_and_get_value(const char8_t *s, size_t n, std::any &dt, T &val,
2723 const char *path = nullptr,
2724 Log log = nullptr) const {
2725 return parse_and_get_value(reinterpret_cast<const char *>(s), n, dt, val,
2726 path, log);
2727 }
2728
2729 template <typename T>
2730 Result parse_and_get_value(const char8_t *s, std::any &dt, T &val,
2731 const char *path = nullptr,
2732 Log log = nullptr) const {
2733 return parse_and_get_value(reinterpret_cast<const char *>(s), dt, val, path,
2734 log);
2735 }
2736#endif
2737
2738 void operator=(Action a) { action = a; }
2739
2740 template <typename T> Definition &operator,(T fn) {
2741 operator=(fn);
2742 return *this;
2743 }
2744
2746 ignoreSemanticValue = true;
2747 return *this;
2748 }
2749
2750 void accept(Ope::Visitor &v) { holder_->accept(v); }
2751
2752 std::shared_ptr<Ope> get_core_operator() const { return holder_->ope_; }
2753
2754 bool is_token() const {
2755 std::call_once(is_token_init_, [this]() {
2757 });
2758 return is_token_;
2759 }
2760
2761 std::string name;
2762 const char *s_ = nullptr;
2763 std::pair<size_t, size_t> line_ = {1, 1};
2764
2766
2767 size_t id = 0;
2769 std::function<void(const Context &c, const char *s, size_t n, std::any &dt)>
2771 std::function<void(const Context &c, const char *s, size_t n, size_t matchlen,
2772 std::any &value, std::any &dt)>
2775 std::shared_ptr<Ope> whitespaceOpe;
2776 std::shared_ptr<Ope> wordOpe;
2778 bool is_macro = false;
2779 std::vector<std::string> params;
2780 bool disable_action = false;
2781 bool is_left_recursive = false;
2782 bool can_be_empty = false;
2783
2786 bool verbose_trace = false;
2789
2790 std::string error_message;
2791 bool no_ast_opt = false;
2792
2793 bool eoi_check = true;
2794
2795 // Per-rule packrat stats (optional, for profiling)
2796 mutable bool collect_packrat_stats = false;
2797 mutable std::vector<Context::PackratStats> packrat_stats_;
2798
2799private:
2800 friend class Reference;
2801 friend class ParserGenerator;
2802
2805
2807 std::call_once(definition_ids_init_, [&]() {
2809 holder_->accept(vis);
2810 if (whitespaceOpe) { whitespaceOpe->accept(vis); }
2811 if (wordOpe) { wordOpe->accept(vis); }
2812 definition_ids_.swap(vis.ids);
2813 });
2814 }
2815
2816 void initialize_packrat_filter() const;
2817
2818 Result parse_core(const char *s, size_t n, SemanticValues &vs, std::any &dt,
2819 const char *path, Log log) const {
2821
2822 std::shared_ptr<Ope> ope = holder_;
2823
2824 std::any trace_data;
2825 if (tracer_start) { tracer_start(trace_data); }
2826 auto se = scope_exit([&]() {
2827 if (tracer_end) { tracer_end(trace_data); }
2828 });
2829
2830 Context c(path, s, n, definition_ids_.size(), whitespaceOpe, wordOpe,
2832 verbose_trace, log);
2833
2835 packrat_stats_.resize(definition_ids_.size());
2836 c.packrat_stats = &packrat_stats_;
2837 }
2838
2841 if (!packrat_filter_.empty()) {
2842 c.packrat_rule_filter = &packrat_filter_;
2843 }
2844 }
2845
2846 size_t i = 0;
2847
2848 if (whitespaceOpe) {
2849 auto save_ignore_trace_state = c.ignore_trace_state;
2850 c.ignore_trace_state = !c.verbose_trace;
2851 auto se =
2852 scope_exit([&]() { c.ignore_trace_state = save_ignore_trace_state; });
2853
2854 auto len = whitespaceOpe->parse(s, n, vs, c, dt);
2855 if (fail(len)) { return Result{false, c.recovered, i, c.error_info}; }
2856
2857 i = len;
2858 }
2859
2860 auto len = ope->parse(s + i, n - i, vs, c, dt);
2861 auto ret = success(len);
2862 if (ret) {
2863 i += len;
2864 if (eoi_check) {
2865 if (i < n) {
2866 if (c.error_info.error_pos - c.s < s + i - c.s) {
2867 c.error_info.message_pos = s + i;
2868 c.error_info.message = "expected end of input";
2869 }
2870 ret = false;
2871 }
2872 }
2873 }
2874 return Result{ret, c.recovered, i, c.error_info};
2875 }
2876
2877 std::shared_ptr<Holder> holder_;
2878 mutable std::once_flag is_token_init_;
2879 mutable bool is_token_ = false;
2880 mutable std::once_flag assign_id_to_definition_init_;
2881 mutable std::once_flag definition_ids_init_;
2882 mutable std::unordered_map<void *, size_t> definition_ids_;
2883 mutable std::once_flag packrat_filter_init_;
2884 mutable std::vector<bool> packrat_filter_;
2885};
2886
2887/*
2888 * Implementations
2889 */
2890
2891inline size_t parse_literal(const char *s, size_t n, SemanticValues &vs,
2892 Context &c, std::any &dt, const std::string &lit,
2893 std::once_flag &init_is_word, bool &is_word,
2894 bool ignore_case, const std::string &lower_lit) {
2895 size_t i = 0;
2896 for (; i < lit.size(); i++) {
2897 if (i >= n ||
2898 (ignore_case ? (static_cast<char>(std::tolower(
2899 static_cast<unsigned char>(s[i]))) != lower_lit[i])
2900 : (s[i] != lit[i]))) {
2901 c.set_error_pos(s, lit.data());
2902 return static_cast<size_t>(-1);
2903 }
2904 }
2905
2906 // Word check
2907 if (c.wordOpe) {
2908 auto save_ignore_trace_state = c.ignore_trace_state;
2910 auto se =
2911 scope_exit([&]() { c.ignore_trace_state = save_ignore_trace_state; });
2912
2913 std::call_once(init_is_word, [&]() {
2914 SemanticValues dummy_vs;
2915 Context dummy_c(nullptr, c.s, c.l, 0, nullptr, nullptr, false, nullptr,
2916 nullptr, nullptr, false, nullptr);
2917 std::any dummy_dt;
2918
2919 auto len =
2920 c.wordOpe->parse(lit.data(), lit.size(), dummy_vs, dummy_c, dummy_dt);
2921 is_word = success(len);
2922 });
2923
2924 if (is_word) {
2925 SemanticValues dummy_vs;
2926 Context dummy_c(nullptr, c.s, c.l, 0, nullptr, nullptr, false, nullptr,
2927 nullptr, nullptr, false, nullptr);
2928 std::any dummy_dt;
2929
2930 NotPredicate ope(c.wordOpe);
2931 auto len = ope.parse(s + i, n - i, dummy_vs, dummy_c, dummy_dt);
2932 if (fail(len)) {
2933 c.set_error_pos(s, lit.data());
2934 return len;
2935 }
2936 i += len;
2937 }
2938 }
2939
2940 // Skip whitespace
2941 auto wl = c.skip_whitespace(s + i, n - i, vs, dt);
2942 if (fail(wl)) { return wl; }
2943 i += wl;
2944
2945 return i;
2946}
2947
2948inline std::pair<size_t, size_t> SemanticValues::line_info() const {
2949 assert(c_);
2950 return c_->line_info(sv_.data());
2951}
2952
2953inline void ErrorInfo::output_log(const Log &log, const char *s, size_t n) {
2954 if (message_pos) {
2957 auto line = line_info(s, message_pos);
2958 std::string msg;
2959 if (auto unexpected_token = heuristic_error_token(s, n, message_pos);
2960 !unexpected_token.empty()) {
2961 msg = replace_all(message, "%t", unexpected_token);
2962
2963 auto unexpected_char = unexpected_token.substr(
2964 0,
2965 codepoint_length(unexpected_token.data(), unexpected_token.size()));
2966
2967 msg = replace_all(msg, "%c", unexpected_char);
2968 } else {
2969 msg = message;
2970 }
2971 log(line.first, line.second, msg, label);
2972 }
2973 } else if (error_pos) {
2974 if (error_pos > last_output_pos) {
2976 auto line = line_info(s, error_pos);
2977
2978 std::string msg;
2979 if (expected_tokens.empty()) {
2980 msg = "syntax error.";
2981 } else {
2982 msg = "syntax error";
2983
2984 // unexpected token
2985 if (auto unexpected_token = heuristic_error_token(s, n, error_pos);
2986 !unexpected_token.empty()) {
2987 msg += ", unexpected '";
2988 msg += unexpected_token;
2989 msg += "'";
2990 }
2991
2992 auto first_item = true;
2993 size_t i = 0;
2994 while (i < expected_tokens.size()) {
2995 auto [error_literal, error_rule] = expected_tokens[i];
2996
2997 // Skip rules start with '_'
2998 if (!(error_rule && error_rule->name[0] == '_')) {
2999 msg += (first_item ? ", expecting " : ", ");
3000 if (error_literal) {
3001 msg += "'";
3002 msg += error_literal;
3003 msg += "'";
3004 } else {
3005 msg += "<" + error_rule->name + ">";
3006 if (label.empty()) { label = error_rule->name; }
3007 }
3008 first_item = false;
3009 }
3010
3011 i++;
3012 }
3013 msg += ".";
3014 }
3015 log(line.first, line.second, msg, label);
3016 }
3017 }
3018}
3019
3020inline size_t Context::skip_whitespace(const char *a_s, size_t n,
3021 SemanticValues &vs, std::any &dt) {
3022 if (in_token_boundary_count || !whitespaceOpe) { return 0; }
3023 auto save = ignore_trace_state;
3025 auto se = scope_exit([&]() { ignore_trace_state = save; });
3026 return whitespaceOpe->parse(a_s, n, vs, *this, dt);
3027}
3028
3029inline void Context::set_error_pos(const char *a_s, const char *literal) {
3030 if (log) {
3031 if (error_info.error_pos <= a_s) {
3032 if (error_info.error_pos < a_s || !error_info.keep_previous_token) {
3033 error_info.error_pos = a_s;
3034 error_info.expected_tokens.clear();
3035 }
3036
3037 const char *error_literal = nullptr;
3038 const Definition *error_rule = nullptr;
3039
3040 if (literal) {
3041 error_literal = literal;
3042 } else if (!rule_stack.empty()) {
3043 auto rule = rule_stack.back();
3044 auto ope = rule->get_core_operator();
3045 if (auto token = FindLiteralToken::token(*ope);
3046 token && token[0] != '\0') {
3047 error_literal = token;
3048 }
3049 }
3050
3051 for (auto r : rule_stack) {
3052 error_rule = r;
3053 if (r->is_token()) { break; }
3054 }
3055
3056 if (error_literal || error_rule) {
3057 error_info.add(error_literal, error_rule);
3058 }
3059 }
3060 }
3061}
3062
3063inline void Context::trace_enter(const Ope &ope, const char *a_s, size_t n,
3064 const SemanticValues &vs, std::any &dt) {
3065 trace_ids.push_back(next_trace_id++);
3066 tracer_enter(ope, a_s, n, vs, *this, dt, trace_data);
3067}
3068
3069inline void Context::trace_leave(const Ope &ope, const char *a_s, size_t n,
3070 const SemanticValues &vs, std::any &dt,
3071 size_t len) {
3072 tracer_leave(ope, a_s, n, vs, *this, dt, len, trace_data);
3073 trace_ids.pop_back();
3074}
3075
3076inline bool Context::is_traceable(const Ope &ope) const {
3077 if (tracer_enter && tracer_leave) {
3078 if (ignore_trace_state) { return false; }
3079 return !dynamic_cast<const peg::Reference *>(&ope);
3080 }
3081 return false;
3082}
3083
3084inline size_t Ope::parse(const char *s, size_t n, SemanticValues &vs,
3085 Context &c, std::any &dt) const {
3086 if (c.is_traceable(*this)) {
3087 c.trace_enter(*this, s, n, vs, dt);
3088 auto len = parse_core(s, n, vs, c, dt);
3089 c.trace_leave(*this, s, n, vs, dt, len);
3090 return len;
3091 }
3092 return parse_core(s, n, vs, c, dt);
3093}
3094
3095inline size_t Dictionary::parse_core(const char *s, size_t n,
3096 SemanticValues &vs, Context &c,
3097 std::any &dt) const {
3098 size_t id;
3099 auto i = trie_.match(s, n, id);
3100
3101 if (i == 0) {
3102 c.set_error_pos(s);
3103 return static_cast<size_t>(-1);
3104 }
3105
3106 vs.choice_count_ = trie_.items_count();
3107 vs.choice_ = id;
3108
3109 // Word check
3110 if (c.wordOpe) {
3111 auto save_ignore_trace_state = c.ignore_trace_state;
3113 auto se =
3114 scope_exit([&]() { c.ignore_trace_state = save_ignore_trace_state; });
3115
3116 {
3117 SemanticValues dummy_vs;
3118 Context dummy_c(nullptr, c.s, c.l, 0, nullptr, nullptr, false, nullptr,
3119 nullptr, nullptr, false, nullptr);
3120 std::any dummy_dt;
3121
3122 NotPredicate ope(c.wordOpe);
3123 auto len = ope.parse(s + i, n - i, dummy_vs, dummy_c, dummy_dt);
3124 if (fail(len)) {
3125 c.set_error_pos(s);
3126 return len;
3127 }
3128 i += len;
3129 }
3130 }
3131
3132 // Skip whitespace
3133 auto wl = c.skip_whitespace(s + i, n - i, vs, dt);
3134 if (fail(wl)) { return wl; }
3135 i += wl;
3136
3137 return i;
3138}
3139
3140inline size_t LiteralString::parse_core(const char *s, size_t n,
3141 SemanticValues &vs, Context &c,
3142 std::any &dt) const {
3143 return parse_literal(s, n, vs, c, dt, lit_, init_is_word_, is_word_,
3145}
3146
3147inline size_t TokenBoundary::parse_core(const char *s, size_t n,
3148 SemanticValues &vs, Context &c,
3149 std::any &dt) const {
3150 auto save_ignore_trace_state = c.ignore_trace_state;
3152 auto se =
3153 scope_exit([&]() { c.ignore_trace_state = save_ignore_trace_state; });
3154
3155 size_t len;
3156 {
3158 auto se = scope_exit([&]() { c.in_token_boundary_count--; });
3159 len = ope_->parse(s, n, vs, c, dt);
3160 }
3161
3162 if (success(len)) {
3163 vs.tokens.emplace_back(std::string_view(s, len));
3164
3165 auto wl = c.skip_whitespace(s + len, n - len, vs, dt);
3166 if (fail(wl)) { return wl; }
3167 len += wl;
3168 }
3169 return len;
3170}
3171
3172inline size_t Holder::parse_core(const char *s, size_t n, SemanticValues &vs,
3173 Context &c, std::any &dt) const {
3174 if (!ope_) {
3175 throw std::logic_error("Uninitialized definition ope was used...");
3176 }
3177
3178 // Macro reference
3179 if (outer_->is_macro) {
3180 c.rule_stack.push_back(outer_);
3181 auto len = ope_->parse(s, n, vs, c, dt);
3182 c.rule_stack.pop_back();
3183 return len;
3184 }
3185
3186 size_t len;
3187 std::any val;
3188
3189 // Shared parse body: invokes enter/leave callbacks, parses the rule's
3190 // operator, handles actions/predicates/errors, and calls reduce.
3191 // Returns {parse_len, parse_val}.
3192 auto do_parse = [&]() {
3193 size_t parse_len;
3194 std::any parse_val;
3195
3196 if (outer_->enter) { outer_->enter(c, s, n, dt); }
3197 auto &chvs = c.push_semantic_values_scope();
3198 auto se = scope_exit([&]() {
3200 if (outer_->leave) { outer_->leave(c, s, n, parse_len, parse_val, dt); }
3201 });
3202
3203 c.rule_stack.push_back(outer_);
3204 parse_len = ope_->parse(s, n, chvs, c, dt);
3205 c.rule_stack.pop_back();
3206
3207 if (success(parse_len)) {
3208 chvs.sv_ = std::string_view(s, parse_len);
3209 chvs.name_ = outer_->name;
3210
3211 auto ope_ptr = ope_.get();
3212 if (ope_ptr->is_token_boundary) {
3213 ope_ptr = static_cast<const peg::TokenBoundary *>(ope_ptr)->ope_.get();
3214 }
3215 if (!ope_ptr->is_choice_like) {
3216 chvs.choice_count_ = 0;
3217 chvs.choice_ = 0;
3218 }
3219
3220 std::string msg;
3221 std::any predicate_data;
3222 if (outer_->predicate) {
3223 if (!outer_->predicate(chvs, dt, msg, predicate_data)) {
3224 if (c.log && !msg.empty() && c.error_info.message_pos < s) {
3225 c.error_info.message_pos = s;
3226 c.error_info.message = msg;
3227 c.error_info.label = outer_->name;
3228 }
3229 parse_len = static_cast<size_t>(-1);
3230 }
3231 }
3232
3233 if (success(parse_len)) {
3234 if (!c.recovered) { parse_val = reduce(chvs, dt, predicate_data); }
3235 } else {
3236 if (c.log && !msg.empty() && c.error_info.message_pos < s) {
3237 c.error_info.message_pos = s;
3238 c.error_info.message = msg;
3239 c.error_info.label = outer_->name;
3240 }
3241 }
3242 } else {
3243 if (c.log && !outer_->error_message.empty() &&
3244 c.error_info.message_pos < s) {
3245 c.error_info.message_pos = s;
3246 c.error_info.message = outer_->error_message;
3247 c.error_info.label = outer_->name;
3248 }
3249 }
3250
3251 return std::make_pair(parse_len, std::move(parse_val));
3252 };
3253
3254 if (outer_->is_left_recursive) {
3255 auto lr_key = std::make_pair(outer_, s);
3256
3257 // Check LR memo first
3258 auto it = c.lr_memo.find(lr_key);
3259 if (it != c.lr_memo.end()) {
3260 if (success(it->second.len)) {
3261 len = it->second.len;
3262 val = it->second.val;
3263 } else {
3264 len = static_cast<size_t>(-1);
3265 }
3266 // Record that this rule's lr_memo was accessed.
3267 // Any LR rule currently seeding will know we're in its cycle.
3268 c.lr_refs_hit.insert(outer_);
3269 } else {
3270 // Seed with FAIL
3271 c.lr_memo[lr_key] = {static_cast<size_t>(-1), {}};
3272
3273 // Mark as active seed (protects our lr_memo from inner growers)
3274 c.lr_active_seeds.insert(lr_key);
3275 auto seed_guard = scope_exit([&]() { c.lr_active_seeds.erase(lr_key); });
3276
3277 // Track which LR rules are referenced during our parse
3278 // to identify cycle members
3279 auto saved_refs = std::move(c.lr_refs_hit);
3280 c.lr_refs_hit.clear();
3281
3282 // Initial parse (self-references will hit the FAIL seed)
3283 auto [initial_len, initial_val] = do_parse();
3284
3285 // Rules whose lr_memo was hit during our parse are in our cycle.
3286 // If we detected cycle members, we ourselves are also part of
3287 // the cycle, so add self — this lets parent seeders see us as
3288 // a transitive cycle member.
3289 auto cycle_rules = c.lr_refs_hit;
3290 if (!cycle_rules.empty()) { cycle_rules.insert(outer_); }
3291
3292 // Restore parent's refs and propagate cycle info upward
3293 c.lr_refs_hit = std::move(saved_refs);
3294 c.lr_refs_hit.insert(cycle_rules.begin(), cycle_rules.end());
3295
3296 if (!success(initial_len)) {
3297 // Keep FAIL in lr_memo so we don't re-seed
3298 len = static_cast<size_t>(-1);
3299 } else {
3300 // Got initial seed, now grow
3301 len = initial_len;
3302 val = std::move(initial_val);
3303 c.lr_memo[lr_key] = {len, val};
3304
3305 while (true) {
3306 // Clear this rule's packrat cache
3307 c.clear_packrat_cache(s, outer_->id);
3308
3309 // Clear lr_memo for cycle-dependent rules at this position,
3310 // but NOT for rules currently in their own seeding phase
3311 // (lr_active_seeds) — those are outer growers we must not
3312 // interfere with.
3313 for (auto memo_it = c.lr_memo.begin(); memo_it != c.lr_memo.end();) {
3314 if (memo_it->first.second == s && memo_it->first.first != outer_ &&
3315 cycle_rules.count(memo_it->first.first) &&
3316 !c.lr_active_seeds.count(memo_it->first)) {
3317 memo_it = c.lr_memo.erase(memo_it);
3318 } else {
3319 ++memo_it;
3320 }
3321 }
3322
3323 auto [new_len, new_val] = do_parse();
3324
3325 if (!success(new_len) || new_len <= len) {
3326 break; // No improvement, done growing
3327 }
3328
3329 len = new_len;
3330 val = std::move(new_val);
3331 c.lr_memo[lr_key] = {len, val};
3332 }
3333 }
3334
3335 // Write final result to packrat cache (lr_memo entry is kept as
3336 // the primary lookup for LR rules at this position)
3337 if (success(len)) { c.write_packrat_cache(s, outer_->id, len, val); }
3338 }
3339 } else {
3340 if (c.enablePackratParsing) {
3341 // Packrat cache acts as re-entry guard (pre-registered as
3342 // failure before fn is called).
3343 c.packrat(s, outer_->id, len, val, [&](std::any &a_val) {
3344 auto [parse_len, parse_val] = do_parse();
3345 len = parse_len;
3346 if (success(len)) { a_val = std::move(parse_val); }
3347 });
3348 } else {
3349 // Without packrat, use lr_memo as re-entry guard to prevent
3350 // stack overflow from undetected left recursion.
3351 auto guard_key = std::make_pair(outer_, s);
3352 if (c.lr_memo.count(guard_key)) {
3353 len = static_cast<size_t>(-1);
3354 } else {
3355 c.lr_memo[guard_key] = {static_cast<size_t>(-1), {}};
3356 auto [parse_len, parse_val] = do_parse();
3357 len = parse_len;
3358 val = std::move(parse_val);
3359 c.lr_memo.erase(guard_key);
3360 }
3361 }
3362 }
3363
3364 if (success(len)) {
3365 if (!outer_->ignoreSemanticValue) {
3366 vs.emplace_back(std::move(val));
3367 vs.tags.emplace_back(str2tag(outer_->name));
3368 }
3369 }
3370
3371 return len;
3372}
3373
3374inline std::any Holder::reduce(SemanticValues &vs, std::any &dt,
3375 const std::any &predicate_data) const {
3376 if (outer_->action && !outer_->disable_action) {
3377 return outer_->action(vs, dt, predicate_data);
3378 } else if (vs.empty()) {
3379 return std::any();
3380 } else {
3381 return std::move(vs.front());
3382 }
3383}
3384
3385inline const std::string &Holder::name() const { return outer_->name; }
3386
3387inline const std::string &Holder::trace_name() const {
3388 std::call_once(trace_name_init_,
3389 [this]() { trace_name_ = "[" + outer_->name + "]"; });
3390 return trace_name_;
3391}
3392
3393inline size_t Reference::parse_core(const char *s, size_t n, SemanticValues &vs,
3394 Context &c, std::any &dt) const {
3395 auto save_ignore_trace_state = c.ignore_trace_state;
3396 if (rule_ && rule_->ignoreSemanticValue) {
3398 }
3399 auto se =
3400 scope_exit([&]() { c.ignore_trace_state = save_ignore_trace_state; });
3401
3402 if (rule_) {
3403 // Reference rule
3404 if (rule_->is_macro) {
3405 // Macro
3406 FindReference vis(c.top_args(), c.rule_stack.back()->params);
3407
3408 // Collect arguments
3409 std::vector<std::shared_ptr<Ope>> args;
3410 for (const auto &arg : args_) {
3411 arg->accept(vis);
3412 args.emplace_back(std::move(vis.found_ope));
3413 }
3414
3415 c.push_args(std::move(args));
3416 auto se = scope_exit([&]() { c.pop_args(); });
3417 return rule_->holder_->parse(s, n, vs, c, dt);
3418 } else {
3419 // Definition
3420 c.push_args(std::vector<std::shared_ptr<Ope>>());
3421 auto se2 = scope_exit([&]() { c.pop_args(); });
3422 return rule_->holder_->parse(s, n, vs, c, dt);
3423 }
3424 } else {
3425 // Reference parameter in macro
3426 const auto &args = c.top_args();
3427 return args[iarg_]->parse(s, n, vs, c, dt);
3428 }
3429}
3430
3431inline std::shared_ptr<Ope> Reference::get_core_operator() const {
3432 return rule_->holder_;
3433}
3434
3435inline size_t BackReference::parse_core(const char *s, size_t n,
3436 SemanticValues &vs, Context &c,
3437 std::any &dt) const {
3438 for (auto it = c.capture_entries.rbegin(); it != c.capture_entries.rend();
3439 ++it) {
3440 if (it->first == name_) {
3441 const auto &lit = it->second;
3442 std::once_flag init_is_word;
3443 auto is_word = false;
3444 static const std::string empty;
3445 return parse_literal(s, n, vs, c, dt, lit, init_is_word, is_word, false,
3446 empty);
3447 }
3448 }
3449
3450 c.error_info.message_pos = s;
3451 c.error_info.message = "undefined back reference '$" + name_ + "'...";
3452 return static_cast<size_t>(-1);
3453}
3454
3455inline Definition &
3457 if (rule_.is_macro) {
3458 // Reference parameter in macro
3459 const auto &args = c.top_args();
3460 auto iarg = dynamic_cast<Reference &>(*binop_).iarg_;
3461 auto arg = args[iarg];
3462 return *dynamic_cast<Reference &>(*arg).rule_;
3463 }
3464
3465 return *dynamic_cast<Reference &>(*binop_).rule_;
3466}
3467
3468inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n,
3469 SemanticValues &vs,
3470 Context &c, std::any &dt,
3471 size_t min_prec) const {
3472 auto len = atom_->parse(s, n, vs, c, dt);
3473 if (fail(len)) { return len; }
3474
3475 std::string tok;
3476 auto &rule = get_reference_for_binop(c);
3477 auto action = std::move(rule.action);
3478
3479 rule.action = [&](SemanticValues &vs2, std::any &dt2,
3480 const std::any &predicate_data2) {
3481 tok = vs2.token();
3482 if (action) {
3483 return action(vs2, dt2, predicate_data2);
3484 } else if (!vs2.empty()) {
3485 return vs2[0];
3486 }
3487 return std::any();
3488 };
3489 auto action_se = scope_exit([&]() { rule.action = std::move(action); });
3490
3491 auto i = len;
3492 while (i < n) {
3493 std::vector<std::any> save_values(vs.begin(), vs.end());
3494 auto save_tokens = vs.tokens;
3495
3496 auto chvs = c.push_semantic_values_scope();
3497 auto chlen = binop_->parse(s + i, n - i, chvs, c, dt);
3499
3500 if (fail(chlen)) { break; }
3501
3502 auto it = info_.find(tok);
3503 if (it == info_.end()) { break; }
3504
3505 auto level = std::get<0>(it->second);
3506 auto assoc = std::get<1>(it->second);
3507
3508 if (level < min_prec) { break; }
3509
3510 vs.emplace_back(std::move(chvs[0]));
3511 i += chlen;
3512
3513 auto next_min_prec = level;
3514 if (assoc == 'L') { next_min_prec = level + 1; }
3515
3516 chvs = c.push_semantic_values_scope();
3517 chlen = parse_expression(s + i, n - i, chvs, c, dt, next_min_prec);
3519
3520 if (fail(chlen)) {
3521 vs.assign(save_values.begin(), save_values.end());
3522 vs.tokens = save_tokens;
3523 i = chlen;
3524 break;
3525 }
3526
3527 vs.emplace_back(std::move(chvs[0]));
3528 i += chlen;
3529
3530 std::any val;
3531 if (rule_.action) {
3532 vs.sv_ = std::string_view(s, i);
3533 static const std::any empty_predicate_data;
3534 val = rule_.action(vs, dt, empty_predicate_data);
3535 } else if (!vs.empty()) {
3536 val = vs[0];
3537 }
3538 vs.clear();
3539 vs.emplace_back(std::move(val));
3540 }
3541
3542 return i;
3543}
3544
3545inline size_t Recovery::parse_core(const char *s, size_t n,
3546 SemanticValues & /*vs*/, Context &c,
3547 std::any & /*dt*/) const {
3548 const auto &rule = dynamic_cast<Reference &>(*ope_);
3549
3550 // Custom error message
3551 if (c.log) {
3552 auto label = dynamic_cast<Reference *>(rule.args_[0].get());
3553 if (label && !label->rule_->error_message.empty()) {
3554 c.error_info.message_pos = s;
3555 c.error_info.message = label->rule_->error_message;
3556 c.error_info.label = label->rule_->name;
3557 }
3558 }
3559
3560 // Recovery
3561 auto len = static_cast<size_t>(-1);
3562 {
3563 auto save_log = c.log;
3564 c.log = nullptr;
3565 auto se = scope_exit([&]() { c.log = save_log; });
3566
3567 SemanticValues dummy_vs;
3568 std::any dummy_dt;
3569
3570 len = rule.parse(s, n, dummy_vs, c, dummy_dt);
3571 }
3572
3573 if (success(len)) {
3574 c.recovered = true;
3575
3576 if (c.log) {
3577 c.error_info.output_log(c.log, c.s, c.l);
3578 c.error_info.clear();
3579 }
3580 }
3581
3582 // Cut
3583 if (!c.cut_stack.empty()) {
3584 c.cut_stack.back() = true;
3585
3586 if (c.cut_stack.size() == 1) {
3587 // TODO: Remove unneeded entries in packrat memoise table
3588 }
3589 }
3590
3591 return len;
3592}
3593
3594inline void Sequence::accept(Visitor &v) { v.visit(*this); }
3595inline void PrioritizedChoice::accept(Visitor &v) { v.visit(*this); }
3596inline void Repetition::accept(Visitor &v) { v.visit(*this); }
3597inline void AndPredicate::accept(Visitor &v) { v.visit(*this); }
3598inline void NotPredicate::accept(Visitor &v) { v.visit(*this); }
3599inline void Dictionary::accept(Visitor &v) { v.visit(*this); }
3600inline void LiteralString::accept(Visitor &v) { v.visit(*this); }
3601inline void CharacterClass::accept(Visitor &v) { v.visit(*this); }
3602inline void Character::accept(Visitor &v) { v.visit(*this); }
3603inline void AnyCharacter::accept(Visitor &v) { v.visit(*this); }
3604inline void CaptureScope::accept(Visitor &v) { v.visit(*this); }
3605inline void Capture::accept(Visitor &v) { v.visit(*this); }
3606inline void TokenBoundary::accept(Visitor &v) { v.visit(*this); }
3607inline void Ignore::accept(Visitor &v) { v.visit(*this); }
3608inline void User::accept(Visitor &v) { v.visit(*this); }
3609inline void WeakHolder::accept(Visitor &v) { v.visit(*this); }
3610inline void Holder::accept(Visitor &v) { v.visit(*this); }
3611inline void Reference::accept(Visitor &v) { v.visit(*this); }
3612inline void Whitespace::accept(Visitor &v) { v.visit(*this); }
3613inline void BackReference::accept(Visitor &v) { v.visit(*this); }
3614inline void PrecedenceClimbing::accept(Visitor &v) { v.visit(*this); }
3615inline void Recovery::accept(Visitor &v) { v.visit(*this); }
3616inline void Cut::accept(Visitor &v) { v.visit(*this); }
3617
3619 auto p = static_cast<void *>(ope.outer_);
3620 if (ids.count(p)) { return; }
3621 auto id = ids.size();
3622 ids[p] = id;
3623 ope.outer_->id = id;
3624 ope.ope_->accept(*this);
3625}
3626
3628 if (ope.rule_) {
3629 for (const auto &arg : ope.args_) {
3630 arg->accept(*this);
3631 }
3632 ope.rule_->accept(*this);
3633 }
3634}
3635
3637 ope.atom_->accept(*this);
3638 ope.binop_->accept(*this);
3639}
3640
3642 if (ope.is_macro_) {
3643 for (const auto &arg : ope.args_) {
3644 arg->accept(*this);
3645 }
3646 } else {
3647 has_rule_ = true;
3648 }
3649}
3650
3652 if (ope.is_macro_) {
3653 ope.rule_->accept(*this);
3654 for (const auto &arg : ope.args_) {
3655 arg->accept(*this);
3656 }
3657 }
3658}
3659
3661 result = ope.rule_ && ope.rule_->can_be_empty;
3662}
3663
3665 if (ope.name_ == name_) {
3666 error_s = ope.s_;
3667 } else if (!ope.rule_ && !macro_args_stack_.empty()) {
3668 // Macro parameter reference: resolve through nested macro arg
3669 // stacks (e.g. B(X) <- C(X) where X is itself a param ref).
3670 auto resolved = resolve_macro_arg(ope.iarg_);
3671 if (resolved) {
3672 resolved->accept(*this);
3673 if (done_ == false) { return; }
3674 }
3675 } else if (!refs_.count(ope.name_)) {
3676 refs_.insert(ope.name_);
3677 if (ope.rule_) {
3678 if (ope.is_macro_) { macro_args_stack_.push_back(&ope.args_); }
3679 ope.rule_->accept(*this);
3680 if (ope.is_macro_) { macro_args_stack_.pop_back(); }
3681 if (done_ == false) { return; }
3682 }
3683 }
3684 // If the referenced rule can match empty, don't mark as done —
3685 // the sequence may continue past this element to find LR.
3686 if (!ope.rule_ && !macro_args_stack_.empty()) {
3687 auto resolved = resolve_macro_arg(ope.iarg_);
3688 if (resolved) {
3690 resolved->accept(cbe);
3691 done_ = !cbe.result;
3692 } else {
3693 done_ = true;
3694 }
3695 } else {
3696 done_ = !(ope.rule_ && ope.rule_->can_be_empty);
3697 }
3698}
3699
3700inline std::shared_ptr<Ope>
3702 for (int i = static_cast<int>(macro_args_stack_.size()) - 1; i >= 0; i--) {
3703 auto &args = *macro_args_stack_[i];
3704 if (iarg >= args.size()) { return nullptr; }
3705 auto ref = dynamic_cast<Reference *>(args[iarg].get());
3706 if (ref && !ref->rule_) {
3707 // Another param ref — resolve using parent level's args
3708 iarg = ref->iarg_;
3709 continue;
3710 }
3711 return args[iarg];
3712 }
3713 return nullptr;
3714}
3715
3717 auto save_is_empty = false;
3718 const char *save_error_s = nullptr;
3719 std::string save_error_name;
3720
3721 auto it = ope.opes_.begin();
3722 while (it != ope.opes_.end()) {
3723 (*it)->accept(*this);
3724 if (!is_empty) {
3725 ++it;
3726 while (it != ope.opes_.end()) {
3728 (*it)->accept(vis);
3729 if (vis.has_error) {
3730 is_empty = true;
3731 error_s = vis.error_s;
3732 error_name = vis.error_name;
3733 }
3734 ++it;
3735 }
3736 return;
3737 }
3738
3739 save_is_empty = is_empty;
3740 save_error_s = error_s;
3741 save_error_name = error_name;
3742
3743 is_empty = false;
3744 error_name.clear();
3745 ++it;
3746 }
3747
3748 is_empty = save_is_empty;
3749 error_s = save_error_s;
3750 error_name = save_error_name;
3751}
3752
3754 auto it = std::find_if(refs_.begin(), refs_.end(),
3755 [&](const std::pair<const char *, std::string> &ref) {
3756 return ope.name_ == ref.second;
3757 });
3758 if (it != refs_.end()) { return; }
3759
3760 if (ope.rule_) {
3761 refs_.emplace_back(ope.s_, ope.name_);
3762 ope.rule_->accept(*this);
3763 refs_.pop_back();
3764 }
3765}
3766
3768 auto it = std::find_if(refs_.begin(), refs_.end(),
3769 [&](const std::pair<const char *, std::string> &ref) {
3770 return ope.name_ == ref.second;
3771 });
3772 if (it != refs_.end()) { return; }
3773
3774 if (ope.rule_) {
3775 auto it = has_error_cache_.find(ope.name_);
3776 if (it != has_error_cache_.end()) {
3777 has_error = it->second;
3778 } else {
3779 refs_.emplace_back(ope.s_, ope.name_);
3780 ope.rule_->accept(*this);
3781 refs_.pop_back();
3783 }
3784 }
3785
3786 if (ope.is_macro_) {
3787 for (const auto &arg : ope.args_) {
3788 arg->accept(*this);
3789 }
3790 }
3791}
3792
3794 auto it = std::find(params_.begin(), params_.end(), ope.name_);
3795 if (it != params_.end()) { return; }
3796
3797 if (!grammar_.count(ope.name_)) {
3798 error_s[ope.name_] = ope.s_;
3799 error_message[ope.name_] = "'" + ope.name_ + "' is not defined.";
3800 } else {
3801 if (!referenced.count(ope.name_)) { referenced.insert(ope.name_); }
3802 const auto &rule = grammar_.at(ope.name_);
3803 if (rule.is_macro) {
3804 if (!ope.is_macro_ || ope.args_.size() != rule.params.size()) {
3805 error_s[ope.name_] = ope.s_;
3806 error_message[ope.name_] = "incorrect number of arguments.";
3807 }
3808 } else if (ope.is_macro_) {
3809 error_s[ope.name_] = ope.s_;
3810 error_message[ope.name_] = "'" + ope.name_ + "' is not macro.";
3811 }
3812 for (const auto &arg : ope.args_) {
3813 arg->accept(*this);
3814 }
3815 }
3816}
3817
3819 if (!ope.rule_) {
3820 // Macro parameter reference — can't predict what it will match
3821 result_.any_char = true;
3822 return;
3823 }
3824
3825 auto it = cache_.find(ope.rule_);
3826 FirstSet computed;
3827 const FirstSet *rule_fs;
3828 if (it != cache_.end()) {
3829 rule_fs = &it->second;
3830 } else {
3831 if (!refs_.insert(ope.rule_).second) {
3832 cycle_count_++; // cycle / left recursion
3833 return;
3834 }
3835 auto save = std::exchange(result_, FirstSet{});
3836 auto saved_cycle_count = cycle_count_;
3837 ope.rule_->accept(*this);
3838 computed = std::move(result_);
3839 result_ = std::move(save);
3840 refs_.erase(ope.rule_);
3841 if (cycle_count_ == saved_cycle_count) {
3842 // Cycle-free: cached value is complete and safe to reuse.
3843 it = cache_.try_emplace(ope.rule_, std::move(computed)).first;
3844 rule_fs = &it->second;
3845 } else {
3846 // Cycle was hit during this rule's computation — its result may be
3847 // missing contributions from rules that were on the call stack.
3848 // Use the value here but do not cache it for other call contexts.
3849 rule_fs = &computed;
3850 }
3851 }
3852
3853 result_.merge(*rule_fs);
3854 if (!result_.first_literal) {
3855 result_.first_literal = rule_fs->first_literal;
3856 }
3857 if (!result_.first_rule) {
3858 result_.first_rule = rule_fs->first_rule
3859 ? rule_fs->first_rule
3860 : (ope.rule_->is_token() ? ope.rule_ : nullptr);
3861 }
3862}
3863
3865 if (!ope.rule_) { return; }
3866 if (!visited_rules_.insert(ope.rule_).second) { return; }
3867 ope.rule_->accept(*this);
3868}
3869
3871 ope.kw_guard_.reset();
3873 for (const auto &op : ope.opes_) {
3874 op->accept(*this);
3875 }
3876}
3877
3879 // Detect pattern: NotPredicate(Reference→PrioritizedChoice<literals>)
3880 // TokenBoundary(Sequence[CharacterClass,
3881 // Repetition(CharacterClass)])
3882 // This is the pattern used by: PlainIdentifier <- !ReservedKeyword
3883 // <[a-z_]i[a-z0-9_]i*>
3884 if (seq.opes_.size() != 2) { return; }
3885
3886 // Child 0 must be NotPredicate
3887 auto *not_pred = dynamic_cast<NotPredicate *>(seq.opes_[0].get());
3888 if (!not_pred) { return; }
3889
3890 // NotPredicate's child must be Reference to a rule
3891 auto *ref = dynamic_cast<Reference *>(not_pred->ope_.get());
3892 if (!ref || !ref->rule_) { return; }
3893
3894 // The referenced rule's inner operator (Holder) must contain
3895 // PrioritizedChoice
3896 auto *holder = dynamic_cast<Holder *>(ref->get_core_operator().get());
3897 if (!holder) { return; }
3898 auto *choice = dynamic_cast<PrioritizedChoice *>(holder->ope_.get());
3899 if (!choice) { return; }
3900
3901 // Extract keywords from PrioritizedChoice alternatives
3902 std::vector<std::string> exact_keywords;
3903 std::vector<std::string> prefix_keywords;
3904
3905 for (const auto &alt : choice->opes_) {
3906 auto *lit = dynamic_cast<LiteralString *>(alt.get());
3907 if (lit) {
3908 if (!lit->ignore_case_) { return; }
3909 exact_keywords.push_back(to_lower(lit->lit_));
3910 continue;
3911 }
3912 // Check for compound keyword (Sequence of LiteralStrings)
3913 auto *sub_seq = dynamic_cast<Sequence *>(alt.get());
3914 if (sub_seq && !sub_seq->opes_.empty()) {
3915 auto *first_lit = dynamic_cast<LiteralString *>(sub_seq->opes_[0].get());
3916 if (first_lit) {
3917 auto all_ignore_case_lits =
3918 std::all_of(sub_seq->opes_.begin(), sub_seq->opes_.end(),
3919 [](const auto &child) {
3920 auto *l = dynamic_cast<LiteralString *>(child.get());
3921 return l && l->ignore_case_;
3922 });
3923 if (all_ignore_case_lits) {
3924 prefix_keywords.push_back(to_lower(first_lit->lit_));
3925 continue;
3926 }
3927 }
3928 }
3929 // Unrecognized alternative — bail out
3930 return;
3931 }
3932
3933 if (exact_keywords.empty()) { return; }
3934
3935 // Child 1 must be TokenBoundary
3936 auto *tb = dynamic_cast<TokenBoundary *>(seq.opes_[1].get());
3937 if (!tb) { return; }
3938
3939 // TokenBoundary content: Sequence[CharacterClass, Repetition(CharacterClass)]
3940 // or just CharacterClass (single char identifier)
3941 CharacterClass *first_cc = nullptr;
3942 CharacterClass *rest_cc = nullptr;
3943
3944 auto *inner_seq = dynamic_cast<Sequence *>(tb->ope_.get());
3945 if (inner_seq && inner_seq->opes_.size() == 2) {
3946 first_cc = dynamic_cast<CharacterClass *>(inner_seq->opes_[0].get());
3947 auto *rep = dynamic_cast<Repetition *>(inner_seq->opes_[1].get());
3948 if (rep) { rest_cc = dynamic_cast<CharacterClass *>(rep->ope_.get()); }
3949 }
3950
3951 if (!first_cc || !rest_cc) { return; }
3952 if (!first_cc->is_ascii_only() || !rest_cc->is_ascii_only()) { return; }
3953
3954 // All conditions met — set up the fast path
3955 auto kw = std::make_unique<KeywordGuardData>();
3956 kw->identifier_first = first_cc->ascii_bitset();
3957 kw->identifier_rest = rest_cc->ascii_bitset();
3958
3959 // Compute keyword length range for early-out in hot path
3960 size_t min_len = SIZE_MAX, max_len = 0;
3961 for (const auto &k : exact_keywords) {
3962 min_len = std::min(min_len, k.size());
3963 max_len = std::max(max_len, k.size());
3964 }
3965 for (const auto &k : prefix_keywords) {
3966 min_len = std::min(min_len, k.size());
3967 max_len = std::max(max_len, k.size());
3968 }
3969 kw->min_keyword_len = min_len;
3970 kw->max_keyword_len = max_len;
3971
3972 kw->exact_keywords = std::move(exact_keywords);
3973 kw->prefix_keywords = std::move(prefix_keywords);
3974 seq.kw_guard_ = std::move(kw);
3975}
3976
3977// Compute which rules benefit from packrat memoization.
3978// A rule benefits if it's reachable from 2+ alternatives of the same
3979// PrioritizedChoice (backtracking will re-visit it at the same position).
3981 std::call_once(packrat_filter_init_, [&]() {
3982 auto def_count = definition_ids_.size();
3983 if (def_count == 0) { return; }
3984
3985 // Collect rule IDs reachable from an Ope subtree (bitvector indexed by
3986 // def_id)
3987 struct CollectReachableRules : public TraversalVisitor {
3989 std::vector<bool> reachable; // indexed by def_id
3990
3991 CollectReachableRules(size_t n) : reachable(n, false) {}
3992
3993 void visit(Holder &ope) override {
3994 auto id = ope.outer_->id;
3995 if (id < reachable.size()) { reachable[id] = true; }
3996 ope.ope_->accept(*this);
3997 }
3998 void visit(Reference &ope) override {
3999 if (ope.rule_ && ope.rule_->id < reachable.size() &&
4000 !reachable[ope.rule_->id]) {
4001 reachable[ope.rule_->id] = true;
4002 ope.rule_->accept(*this);
4003 }
4004 }
4005 };
4006
4007 // Find rules that benefit: reachable from 2+ alternatives of same choice
4008 std::vector<bool> benefits(def_count, false);
4009
4010 struct FindBacktrackRules : public TraversalVisitor {
4012 std::vector<bool> &benefits;
4013 size_t def_count;
4014 std::vector<bool> visited_rules; // indexed by def_id
4015
4016 FindBacktrackRules(std::vector<bool> &b, size_t n)
4017 : benefits(b), def_count(n), visited_rules(n, false) {}
4018
4019 void visit(PrioritizedChoice &ope) override {
4020 // For each alternative, collect reachable rules as bitvectors
4021 std::vector<std::vector<bool>> alt_reachable;
4022 for (auto &op : ope.opes_) {
4023 CollectReachableRules crr(def_count);
4024 op->accept(crr);
4025 alt_reachable.push_back(std::move(crr.reachable));
4026 }
4027
4028 // Mark rules reachable from 2+ alternatives
4029 for (size_t id = 0; id < def_count; id++) {
4030 size_t count = 0;
4031 for (auto &alt : alt_reachable) {
4032 if (alt[id]) { count++; }
4033 }
4034 if (count >= 2) { benefits[id] = true; }
4035 }
4036
4037 // Recurse into alternatives
4038 for (auto &op : ope.opes_) {
4039 op->accept(*this);
4040 }
4041 }
4042 void visit(Holder &ope) override {
4043 auto id = ope.outer_->id;
4044 if (id < visited_rules.size() && !visited_rules[id]) {
4045 visited_rules[id] = true;
4046 ope.ope_->accept(*this);
4047 }
4048 }
4049 void visit(Reference &ope) override {
4050 if (ope.rule_) { ope.rule_->accept(*this); }
4051 }
4052 };
4053
4054 FindBacktrackRules finder(benefits, def_count);
4055 holder_->accept(finder);
4056 if (whitespaceOpe) { whitespaceOpe->accept(finder); }
4057 if (wordOpe) { wordOpe->accept(finder); }
4058
4059 packrat_filter_ = std::move(benefits);
4060 });
4061}
4062
4064 // Check if the reference is a macro parameter
4065 auto found_param = false;
4066 for (size_t i = 0; i < params_.size(); i++) {
4067 const auto &param = params_[i];
4068 if (param == ope.name_) {
4069 ope.iarg_ = i;
4070 found_param = true;
4071 break;
4072 }
4073 }
4074
4075 // Check if the reference is a definition rule
4076 if (!found_param && grammar_.count(ope.name_)) {
4077 auto &rule = grammar_.at(ope.name_);
4078 ope.rule_ = &rule;
4079 }
4080
4081 for (const auto &arg : ope.args_) {
4082 arg->accept(*this);
4083 }
4084}
4085
4087 for (size_t i = 0; i < args_.size(); i++) {
4088 const auto &name = params_[i];
4089 if (name == ope.name_) {
4090 found_ope = args_[i];
4091 return;
4092 }
4093 }
4094 found_ope = ope.shared_from_this();
4095}
4096
4097/*-----------------------------------------------------------------------------
4098 * PEG parser generator
4099 *---------------------------------------------------------------------------*/
4100
4101using Rules = std::unordered_map<std::string, std::shared_ptr<Ope>>;
4102
4104public:
4106 std::shared_ptr<Grammar> grammar;
4107 std::string start;
4109 };
4110
4111 static ParserContext parse(const char *s, size_t n, const Rules &rules,
4112 Log log, std::string_view start,
4113 bool enable_left_recursion = true) {
4114 return get_instance().perform_core(s, n, rules, log, std::string(start),
4115 enable_left_recursion);
4116 }
4117
4118 // For debugging purpose
4119 static bool parse_test(const char *d, const char *s) {
4120 Data data;
4121 std::any dt = &data;
4122
4123 auto n = strlen(s);
4124 auto r = get_instance().g[d].parse(s, n, dt);
4125 return r.ret && r.len == n;
4126 }
4127
4128#if defined(__cpp_lib_char8_t)
4129 static bool parse_test(const char *d, const char8_t *s) {
4130 return parse_test(d, reinterpret_cast<const char *>(s));
4131 }
4132#endif
4133
4134private:
4136 static ParserGenerator instance;
4137 return instance;
4138 }
4139
4141 make_grammar();
4142 setup_actions();
4143 }
4144
4146 std::string type;
4147 std::any data;
4148 std::string_view sv;
4149 };
4150
4151 struct Data {
4152 std::shared_ptr<Grammar> grammar;
4153 std::string start;
4154 const char *start_pos = nullptr;
4155
4156 std::vector<std::pair<std::string, const char *>> duplicates_of_definition;
4157
4158 std::vector<std::pair<std::string, const char *>> duplicates_of_instruction;
4159 std::map<std::string, std::vector<Instruction>> instructions;
4160
4161 std::vector<std::pair<std::string, const char *>> undefined_back_references;
4162 std::vector<std::set<std::string_view>> captures_stack{{}};
4163
4164 std::set<std::string_view> captures_in_current_definition;
4166
4167 Data() : grammar(std::make_shared<Grammar>()) {}
4168 };
4169
4170 class SyntaxErrorException : public std::runtime_error {
4171 public:
4172 SyntaxErrorException(const char *what_arg, std::pair<size_t, size_t> r)
4173 : std::runtime_error(what_arg), r_(r) {}
4174
4175 std::pair<size_t, size_t> line_info() const { return r_; }
4176
4177 private:
4178 std::pair<size_t, size_t> r_;
4179 };
4180
4182 // Setup PEG syntax parser
4183 g["Grammar"] <= seq(g["Spacing"], oom(g["Definition"]), g["EndOfFile"]);
4184 g["Definition"] <=
4185 cho(seq(g["Ignore"], g["IdentCont"], g["Parameters"], g["LEFTARROW"],
4186 g["Expression"], opt(g["Instruction"])),
4187 seq(g["Ignore"], g["Identifier"], g["LEFTARROW"], g["Expression"],
4188 opt(g["Instruction"])));
4189 g["Expression"] <= seq(g["Sequence"], zom(seq(g["SLASH"], g["Sequence"])));
4190 g["Sequence"] <= zom(cho(g["CUT"], g["Prefix"]));
4191 g["Prefix"] <= seq(opt(cho(g["AND"], g["NOT"])), g["SuffixWithLabel"]);
4192 g["SuffixWithLabel"] <=
4193 seq(g["Suffix"], opt(seq(g["LABEL"], g["Identifier"])));
4194 g["Suffix"] <= seq(g["Primary"], opt(g["Loop"]));
4195 g["Loop"] <= cho(g["QUESTION"], g["STAR"], g["PLUS"], g["Repetition"]);
4196 g["Primary"] <= cho(seq(g["Ignore"], g["IdentCont"], g["Arguments"],
4197 npd(g["LEFTARROW"])),
4198 seq(g["Ignore"], g["Identifier"],
4199 npd(seq(opt(g["Parameters"]), g["LEFTARROW"]))),
4200 seq(g["OPEN"], g["Expression"], g["CLOSE"]),
4201 seq(g["BeginTok"], g["Expression"], g["EndTok"]),
4202 g["CapScope"],
4203 seq(g["BeginCap"], g["Expression"], g["EndCap"]),
4204 g["BackRef"], g["DictionaryI"], g["LiteralI"],
4205 g["Dictionary"], g["Literal"], g["NegatedClassI"],
4206 g["NegatedClass"], g["ClassI"], g["Class"], g["DOT"]);
4207
4208 g["Identifier"] <= seq(g["IdentCont"], g["Spacing"]);
4209 g["IdentCont"] <= tok(seq(g["IdentStart"], zom(g["IdentRest"])));
4210
4211 const static std::vector<std::pair<char32_t, char32_t>> range = {
4212 {0x0080, 0xFFFF}};
4213 g["IdentStart"] <= seq(npd(lit(u8(u8"↑"))), npd(lit(u8(u8"⇑"))),
4214 cho(cls("a-zA-Z_%"), cls(range)));
4215
4216 g["IdentRest"] <= cho(g["IdentStart"], cls("0-9"));
4217
4218 g["Dictionary"] <= seq(g["LiteralD"], oom(seq(g["PIPE"], g["LiteralD"])));
4219
4220 g["DictionaryI"] <=
4221 seq(g["LiteralID"], oom(seq(g["PIPE"], g["LiteralID"])));
4222
4223 auto lit_ope = cho(seq(cls("'"), tok(zom(seq(npd(cls("'")), g["Char"]))),
4224 cls("'"), g["Spacing"]),
4225 seq(cls("\""), tok(zom(seq(npd(cls("\"")), g["Char"]))),
4226 cls("\""), g["Spacing"]));
4227 g["Literal"] <= lit_ope;
4228 g["LiteralD"] <= lit_ope;
4229
4230 auto lit_case_ignore_ope =
4231 cho(seq(cls("'"), tok(zom(seq(npd(cls("'")), g["Char"]))), lit("'i"),
4232 g["Spacing"]),
4233 seq(cls("\""), tok(zom(seq(npd(cls("\"")), g["Char"]))), lit("\"i"),
4234 g["Spacing"]));
4235 g["LiteralI"] <= lit_case_ignore_ope;
4236 g["LiteralID"] <= lit_case_ignore_ope;
4237
4238 // NOTE: The original Brian Ford's paper uses 'zom' instead of 'oom'.
4239 g["Class"] <= seq(chr('['), npd(chr('^')),
4240 tok(oom(seq(npd(chr(']')), g["Range"]))), chr(']'),
4241 g["Spacing"]);
4242 g["ClassI"] <= seq(chr('['), npd(chr('^')),
4243 tok(oom(seq(npd(chr(']')), g["Range"]))), lit("]i"),
4244 g["Spacing"]);
4245
4246 g["NegatedClass"] <= seq(lit("[^"),
4247 tok(oom(seq(npd(chr(']')), g["Range"]))), chr(']'),
4248 g["Spacing"]);
4249 g["NegatedClassI"] <= seq(lit("[^"),
4250 tok(oom(seq(npd(chr(']')), g["Range"]))),
4251 lit("]i"), g["Spacing"]);
4252
4253 // NOTE: This is different from The original Brian Ford's paper, and this
4254 // modification allows us to specify `[+-]` as a valid char class.
4255 g["Range"] <=
4256 cho(seq(g["Char"], chr('-'), npd(chr(']')), g["Char"]), g["Char"]);
4257
4258 g["Char"] <=
4259 cho(seq(chr('\\'), cls("fnrtv'\"[]\\^-")),
4260 seq(chr('\\'), cls("0-3"), cls("0-7"), cls("0-7")),
4261 seq(chr('\\'), cls("0-7"), opt(cls("0-7"))),
4262 seq(lit("\\x"), cls("0-9a-fA-F"), opt(cls("0-9a-fA-F"))),
4263 seq(lit("\\u"),
4264 cho(seq(cho(seq(chr('0'), cls("0-9a-fA-F")), lit("10")),
4265 rep(cls("0-9a-fA-F"), 4, 4)),
4266 rep(cls("0-9a-fA-F"), 4, 5))),
4267 seq(npd(chr('\\')), dot()));
4268
4269 g["Repetition"] <=
4270 seq(g["BeginBracket"], g["RepetitionRange"], g["EndBracket"]);
4271 g["RepetitionRange"] <= cho(seq(g["Number"], g["COMMA"], g["Number"]),
4272 seq(g["Number"], g["COMMA"]), g["Number"],
4273 seq(g["COMMA"], g["Number"]));
4274 g["Number"] <= seq(oom(cls("0-9")), g["Spacing"]);
4275
4276 g["CapScope"] <= seq(g["BeginCapScope"], g["Expression"], g["EndCapScope"]);
4277
4278 g["LEFTARROW"] <= seq(cho(lit("<-"), lit(u8(u8"←"))), g["Spacing"]);
4279 ~g["SLASH"] <= seq(chr('/'), g["Spacing"]);
4280 ~g["PIPE"] <= seq(chr('|'), g["Spacing"]);
4281 g["AND"] <= seq(chr('&'), g["Spacing"]);
4282 g["NOT"] <= seq(chr('!'), g["Spacing"]);
4283 g["QUESTION"] <= seq(chr('?'), g["Spacing"]);
4284 g["STAR"] <= seq(chr('*'), g["Spacing"]);
4285 g["PLUS"] <= seq(chr('+'), g["Spacing"]);
4286 ~g["OPEN"] <= seq(chr('('), g["Spacing"]);
4287 ~g["CLOSE"] <= seq(chr(')'), g["Spacing"]);
4288 g["DOT"] <= seq(chr('.'), g["Spacing"]);
4289
4290 g["CUT"] <= seq(lit(u8(u8"↑")), g["Spacing"]);
4291 ~g["LABEL"] <= seq(cho(chr('^'), lit(u8(u8"⇑"))), g["Spacing"]);
4292
4293 ~g["Spacing"] <= zom(cho(g["Space"], g["Comment"]));
4294 g["Comment"] <= seq(chr('#'), zom(seq(npd(g["EndOfLine"]), dot())),
4295 opt(g["EndOfLine"]));
4296 g["Space"] <= cho(chr(' '), chr('\t'), g["EndOfLine"]);
4297 g["EndOfLine"] <= cho(lit("\r\n"), chr('\n'), chr('\r'));
4298 g["EndOfFile"] <= npd(dot());
4299
4300 ~g["BeginTok"] <= seq(chr('<'), g["Spacing"]);
4301 ~g["EndTok"] <= seq(chr('>'), g["Spacing"]);
4302
4303 ~g["BeginCapScope"] <= seq(chr('$'), chr('('), g["Spacing"]);
4304 ~g["EndCapScope"] <= seq(chr(')'), g["Spacing"]);
4305
4306 g["BeginCap"] <= seq(chr('$'), tok(g["IdentCont"]), chr('<'), g["Spacing"]);
4307 ~g["EndCap"] <= seq(chr('>'), g["Spacing"]);
4308
4309 g["BackRef"] <= seq(chr('$'), tok(g["IdentCont"]), g["Spacing"]);
4310
4311 g["IGNORE"] <= chr('~');
4312
4313 g["Ignore"] <= opt(g["IGNORE"]);
4314 g["Parameters"] <= seq(g["OPEN"], g["Identifier"],
4315 zom(seq(g["COMMA"], g["Identifier"])), g["CLOSE"]);
4316 g["Arguments"] <= seq(g["OPEN"], g["Expression"],
4317 zom(seq(g["COMMA"], g["Expression"])), g["CLOSE"]);
4318 ~g["COMMA"] <= seq(chr(','), g["Spacing"]);
4319
4320 // Instruction grammars
4321 g["Instruction"] <=
4322 seq(g["BeginBracket"],
4323 opt(seq(g["InstructionItem"], zom(seq(g["InstructionItemSeparator"],
4324 g["InstructionItem"])))),
4325 g["EndBracket"]);
4326 g["InstructionItem"] <=
4327 cho(g["PrecedenceClimbing"], g["ErrorMessage"], g["NoAstOpt"]);
4328 ~g["InstructionItemSeparator"] <= seq(chr(';'), g["Spacing"]);
4329
4330 ~g["SpacesZom"] <= zom(g["Space"]);
4331 ~g["SpacesOom"] <= oom(g["Space"]);
4332 ~g["BeginBracket"] <= seq(chr('{'), g["Spacing"]);
4333 ~g["EndBracket"] <= seq(chr('}'), g["Spacing"]);
4334
4335 // PrecedenceClimbing instruction
4336 g["PrecedenceClimbing"] <=
4337 seq(lit("precedence"), g["SpacesOom"], g["PrecedenceInfo"],
4338 zom(seq(g["SpacesOom"], g["PrecedenceInfo"])), g["SpacesZom"]);
4339 g["PrecedenceInfo"] <=
4340 seq(g["PrecedenceAssoc"],
4341 oom(seq(ign(g["SpacesOom"]), g["PrecedenceOpe"])));
4342 g["PrecedenceOpe"] <=
4343 cho(seq(cls("'"),
4344 tok(zom(seq(npd(cho(g["Space"], cls("'"))), g["Char"]))),
4345 cls("'")),
4346 seq(cls("\""),
4347 tok(zom(seq(npd(cho(g["Space"], cls("\""))), g["Char"]))),
4348 cls("\"")),
4349 tok(oom(seq(npd(cho(g["PrecedenceAssoc"], g["Space"], chr('}'))),
4350 dot()))));
4351 g["PrecedenceAssoc"] <= cls("LR");
4352
4353 // Error message instruction
4354 g["ErrorMessage"] <= seq(lit("error_message"), g["SpacesOom"],
4355 g["LiteralD"], g["SpacesZom"]);
4356
4357 // No Ast node optimization instruction
4358 g["NoAstOpt"] <= seq(lit("no_ast_opt"), g["SpacesZom"]);
4359
4360 // Set definition names
4361 for (auto &x : g) {
4362 x.second.name = x.first;
4363 }
4364 }
4365
4367 g["Definition"] = [&](const SemanticValues &vs, std::any &dt) {
4368 auto &data = *std::any_cast<Data *>(dt);
4369
4370 auto is_macro = vs.choice() == 0;
4371 auto ignore = std::any_cast<bool>(vs[0]);
4372 auto name = std::any_cast<std::string>(vs[1]);
4373
4374 std::vector<std::string> params;
4375 std::shared_ptr<Ope> ope;
4376 auto has_instructions = false;
4377
4378 if (is_macro) {
4379 params = std::any_cast<std::vector<std::string>>(vs[2]);
4380 ope = std::any_cast<std::shared_ptr<Ope>>(vs[4]);
4381 if (vs.size() == 6) { has_instructions = true; }
4382 } else {
4383 ope = std::any_cast<std::shared_ptr<Ope>>(vs[3]);
4384 if (vs.size() == 5) { has_instructions = true; }
4385 }
4386
4387 if (has_instructions) {
4388 auto index = is_macro ? 5 : 4;
4389 std::unordered_set<std::string> types;
4390 for (const auto &instruction :
4391 std::any_cast<std::vector<Instruction>>(vs[index])) {
4392 const auto &type = instruction.type;
4393 if (types.find(type) == types.end()) {
4394 data.instructions[name].push_back(instruction);
4395 types.insert(instruction.type);
4396 if (type == "declare_symbol" || type == "check_symbol") {
4397 if (!TokenChecker::is_token(*ope)) { ope = tok(ope); }
4398 }
4399 } else {
4400 data.duplicates_of_instruction.emplace_back(type,
4401 instruction.sv.data());
4402 }
4403 }
4404 }
4405
4406 auto &grammar = *data.grammar;
4407 if (!grammar.count(name)) {
4408 auto &rule = grammar[name];
4409 rule <= ope;
4410 rule.name = name;
4411 rule.s_ = vs.sv().data();
4412 rule.line_ = line_info(vs.ss, rule.s_);
4413 rule.ignoreSemanticValue = ignore;
4414 rule.is_macro = is_macro;
4415 rule.params = params;
4416
4417 if (data.start.empty()) {
4418 data.start = rule.name;
4419 data.start_pos = rule.s_;
4420 }
4421 } else {
4422 data.duplicates_of_definition.emplace_back(name, vs.sv().data());
4423 }
4424 };
4425
4426 g["Definition"].enter = [](const Context & /*c*/, const char * /*s*/,
4427 size_t /*n*/, std::any &dt) {
4428 auto &data = *std::any_cast<Data *>(dt);
4429 data.captures_in_current_definition.clear();
4430 };
4431
4432 g["Expression"] = [&](const SemanticValues &vs) {
4433 if (vs.size() == 1) {
4434 return std::any_cast<std::shared_ptr<Ope>>(vs[0]);
4435 } else {
4436 std::vector<std::shared_ptr<Ope>> opes;
4437 for (auto i = 0u; i < vs.size(); i++) {
4438 opes.emplace_back(std::any_cast<std::shared_ptr<Ope>>(vs[i]));
4439 }
4440 const std::shared_ptr<Ope> ope =
4441 std::make_shared<PrioritizedChoice>(opes);
4442 return ope;
4443 }
4444 };
4445
4446 g["Sequence"] = [&](const SemanticValues &vs) {
4447 if (vs.empty()) {
4448 return npd(lit(""));
4449 } else if (vs.size() == 1) {
4450 return std::any_cast<std::shared_ptr<Ope>>(vs[0]);
4451 } else {
4452 std::vector<std::shared_ptr<Ope>> opes;
4453 for (const auto &x : vs) {
4454 opes.emplace_back(std::any_cast<std::shared_ptr<Ope>>(x));
4455 }
4456 const std::shared_ptr<Ope> ope = std::make_shared<Sequence>(opes);
4457 return ope;
4458 }
4459 };
4460
4461 g["Prefix"] = [&](const SemanticValues &vs) {
4462 std::shared_ptr<Ope> ope;
4463 if (vs.size() == 1) {
4464 ope = std::any_cast<std::shared_ptr<Ope>>(vs[0]);
4465 } else {
4466 assert(vs.size() == 2);
4467 auto tok = std::any_cast<char>(vs[0]);
4468 ope = std::any_cast<std::shared_ptr<Ope>>(vs[1]);
4469 if (tok == '&') {
4470 ope = apd(ope);
4471 } else { // '!'
4472 ope = npd(ope);
4473 }
4474 }
4475 return ope;
4476 };
4477
4478 g["SuffixWithLabel"] = [&](const SemanticValues &vs, std::any &dt) {
4479 auto ope = std::any_cast<std::shared_ptr<Ope>>(vs[0]);
4480 if (vs.size() == 1) {
4481 return ope;
4482 } else {
4483 assert(vs.size() == 2);
4484 auto &data = *std::any_cast<Data *>(dt);
4485 const auto &ident = std::any_cast<std::string>(vs[1]);
4486 auto label = ref(*data.grammar, ident, vs.sv().data(), false, {});
4487 auto recovery = rec(ref(*data.grammar, RECOVER_DEFINITION_NAME,
4488 vs.sv().data(), true, {label}));
4489 return cho4label_(ope, recovery);
4490 }
4491 };
4492
4493 struct Loop {
4494 enum class Type { opt = 0, zom, oom, rep };
4495 Type type;
4496 std::pair<size_t, size_t> range;
4497 };
4498
4499 g["Suffix"] = [&](const SemanticValues &vs) {
4500 auto ope = std::any_cast<std::shared_ptr<Ope>>(vs[0]);
4501 if (vs.size() == 1) {
4502 return ope;
4503 } else {
4504 assert(vs.size() == 2);
4505 auto loop = std::any_cast<Loop>(vs[1]);
4506 switch (loop.type) {
4507 case Loop::Type::opt: return opt(ope);
4508 case Loop::Type::zom: return zom(ope);
4509 case Loop::Type::oom: return oom(ope);
4510 default: // Regex-like repetition
4511 return rep(ope, loop.range.first, loop.range.second);
4512 }
4513 }
4514 };
4515
4516 g["Loop"] = [&](const SemanticValues &vs) {
4517 switch (vs.choice()) {
4518 case 0: // Option
4519 return Loop{Loop::Type::opt, std::pair<size_t, size_t>()};
4520 case 1: // Zero or More
4521 return Loop{Loop::Type::zom, std::pair<size_t, size_t>()};
4522 case 2: // One or More
4523 return Loop{Loop::Type::oom, std::pair<size_t, size_t>()};
4524 default: // Regex-like repetition
4525 return Loop{Loop::Type::rep,
4526 std::any_cast<std::pair<size_t, size_t>>(vs[0])};
4527 }
4528 };
4529
4530 g["Primary"] = [&](const SemanticValues &vs, std::any &dt) {
4531 auto &data = *std::any_cast<Data *>(dt);
4532
4533 switch (vs.choice()) {
4534 case 0: // Macro Reference
4535 case 1: { // Reference
4536 auto is_macro = vs.choice() == 0;
4537 auto ignore = std::any_cast<bool>(vs[0]);
4538 const auto &ident = std::any_cast<std::string>(vs[1]);
4539
4540 std::vector<std::shared_ptr<Ope>> args;
4541 if (is_macro) {
4542 args = std::any_cast<std::vector<std::shared_ptr<Ope>>>(vs[2]);
4543 }
4544
4545 auto ope = ref(*data.grammar, ident, vs.sv().data(), is_macro, args);
4546 if (ident == RECOVER_DEFINITION_NAME) { ope = rec(ope); }
4547
4548 if (ignore) {
4549 return ign(ope);
4550 } else {
4551 return ope;
4552 }
4553 }
4554 case 2: { // (Expression)
4555 return std::any_cast<std::shared_ptr<Ope>>(vs[0]);
4556 }
4557 case 3: { // TokenBoundary
4558 return tok(std::any_cast<std::shared_ptr<Ope>>(vs[0]));
4559 }
4560 case 4: { // CaptureScope
4561 return csc(std::any_cast<std::shared_ptr<Ope>>(vs[0]));
4562 }
4563 case 5: { // Capture
4564 const auto &name = std::any_cast<std::string_view>(vs[0]);
4565 auto ope = std::any_cast<std::shared_ptr<Ope>>(vs[1]);
4566
4567 data.captures_stack.back().insert(name);
4568 data.captures_in_current_definition.insert(name);
4569
4570 return cap(ope, [name](const char *a_s, size_t a_n, Context &c) {
4571 c.capture_entries.emplace_back(name, std::string(a_s, a_n));
4572 });
4573 }
4574 default: {
4575 return std::any_cast<std::shared_ptr<Ope>>(vs[0]);
4576 }
4577 }
4578 };
4579
4580 g["IdentCont"] = [](const SemanticValues &vs) {
4581 return std::string(vs.sv().data(), vs.sv().length());
4582 };
4583
4584 g["Dictionary"] = [](const SemanticValues &vs) {
4585 auto items = vs.transform<std::string>();
4586 return dic(items, false);
4587 };
4588 g["DictionaryI"] = [](const SemanticValues &vs) {
4589 auto items = vs.transform<std::string>();
4590 return dic(items, true);
4591 };
4592
4593 g["Literal"] = [](const SemanticValues &vs) {
4594 const auto &tok = vs.tokens.front();
4595 return lit(resolve_escape_sequence(tok.data(), tok.size()));
4596 };
4597 g["LiteralI"] = [](const SemanticValues &vs) {
4598 const auto &tok = vs.tokens.front();
4599 return liti(resolve_escape_sequence(tok.data(), tok.size()));
4600 };
4601 g["LiteralD"] = [](const SemanticValues &vs) {
4602 auto &tok = vs.tokens.front();
4603 return resolve_escape_sequence(tok.data(), tok.size());
4604 };
4605 g["LiteralID"] = [](const SemanticValues &vs) {
4606 auto &tok = vs.tokens.front();
4607 return resolve_escape_sequence(tok.data(), tok.size());
4608 };
4609
4610 g["Class"] = [](const SemanticValues &vs) {
4611 auto ranges = vs.transform<std::pair<char32_t, char32_t>>();
4612 return cls(ranges);
4613 };
4614 g["ClassI"] = [](const SemanticValues &vs) {
4615 auto ranges = vs.transform<std::pair<char32_t, char32_t>>();
4616 return cls(ranges, true);
4617 };
4618 g["NegatedClass"] = [](const SemanticValues &vs) {
4619 auto ranges = vs.transform<std::pair<char32_t, char32_t>>();
4620 return ncls(ranges);
4621 };
4622 g["NegatedClassI"] = [](const SemanticValues &vs) {
4623 auto ranges = vs.transform<std::pair<char32_t, char32_t>>();
4624 return ncls(ranges, true);
4625 };
4626 g["Range"] = [](const SemanticValues &vs) {
4627 switch (vs.choice()) {
4628 case 0: {
4629 auto s1 = std::any_cast<std::string>(vs[0]);
4630 auto s2 = std::any_cast<std::string>(vs[1]);
4631 auto cp1 = decode_codepoint(s1.data(), s1.length());
4632 auto cp2 = decode_codepoint(s2.data(), s2.length());
4633 if (cp1 > cp2) {
4634 throw SyntaxErrorException("characer range is out of order...",
4635 vs.line_info());
4636 }
4637 return std::pair(cp1, cp2);
4638 }
4639 case 1: {
4640 auto s = std::any_cast<std::string>(vs[0]);
4641 auto cp = decode_codepoint(s.data(), s.length());
4642 return std::pair(cp, cp);
4643 }
4644 }
4645 return std::pair<char32_t, char32_t>(0, 0);
4646 };
4647 g["Char"] = [](const SemanticValues &vs) {
4648 return resolve_escape_sequence(vs.sv().data(), vs.sv().length());
4649 };
4650
4651 g["RepetitionRange"] = [&](const SemanticValues &vs) {
4652 switch (vs.choice()) {
4653 case 0: { // Number COMMA Number
4654 auto min = std::any_cast<size_t>(vs[0]);
4655 auto max = std::any_cast<size_t>(vs[1]);
4656 return std::pair(min, max);
4657 }
4658 case 1: // Number COMMA
4659 return std::pair(std::any_cast<size_t>(vs[0]),
4660 std::numeric_limits<size_t>::max());
4661 case 2: { // Number
4662 auto n = std::any_cast<size_t>(vs[0]);
4663 return std::pair(n, n);
4664 }
4665 default: // COMMA Number
4666 return std::pair(std::numeric_limits<size_t>::min(),
4667 std::any_cast<size_t>(vs[0]));
4668 }
4669 };
4670 g["Number"] = [&](const SemanticValues &vs) {
4671 return vs.token_to_number<size_t>();
4672 };
4673
4674 g["CapScope"].enter = [](const Context & /*c*/, const char * /*s*/,
4675 size_t /*n*/, std::any &dt) {
4676 auto &data = *std::any_cast<Data *>(dt);
4677 data.captures_stack.emplace_back();
4678 };
4679 g["CapScope"].leave = [](const Context & /*c*/, const char * /*s*/,
4680 size_t /*n*/, size_t /*matchlen*/,
4681 std::any & /*value*/, std::any &dt) {
4682 auto &data = *std::any_cast<Data *>(dt);
4683 data.captures_stack.pop_back();
4684 };
4685
4686 g["AND"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
4687 g["NOT"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
4688 g["QUESTION"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
4689 g["STAR"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
4690 g["PLUS"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
4691
4692 g["DOT"] = [](const SemanticValues & /*vs*/) { return dot(); };
4693
4694 g["CUT"] = [](const SemanticValues & /*vs*/) { return cut(); };
4695
4696 g["BeginCap"] = [](const SemanticValues &vs) { return vs.token(); };
4697
4698 g["BackRef"] = [&](const SemanticValues &vs, std::any &dt) {
4699 auto &data = *std::any_cast<Data *>(dt);
4700
4701 // Undefined back reference check
4702 {
4703 auto found = false;
4704 auto it = data.captures_stack.rbegin();
4705 while (it != data.captures_stack.rend()) {
4706 if (it->find(vs.token()) != it->end()) {
4707 found = true;
4708 break;
4709 }
4710 ++it;
4711 }
4712 if (!found) {
4713 auto ptr = vs.token().data() - 1; // include '$' symbol
4714 data.undefined_back_references.emplace_back(vs.token(), ptr);
4715 }
4716 }
4717
4718 // NOTE: Disable packrat parsing if a back reference is not defined in
4719 // captures in the current definition rule.
4720 if (data.captures_in_current_definition.find(vs.token()) ==
4721 data.captures_in_current_definition.end()) {
4722 data.enablePackratParsing = false;
4723 }
4724
4725 return bkr(vs.token_to_string());
4726 };
4727
4728 g["Ignore"] = [](const SemanticValues &vs) { return vs.size() > 0; };
4729
4730 g["Parameters"] = [](const SemanticValues &vs) {
4731 return vs.transform<std::string>();
4732 };
4733
4734 g["Arguments"] = [](const SemanticValues &vs) {
4735 return vs.transform<std::shared_ptr<Ope>>();
4736 };
4737
4738 g["PrecedenceClimbing"] = [](const SemanticValues &vs) {
4740 size_t level = 1;
4741 for (const auto &v : vs) {
4742 auto tokens = std::any_cast<std::vector<std::string_view>>(v);
4743 auto assoc = tokens[0][0];
4744 for (size_t i = 1; i < tokens.size(); i++) {
4745 binOpeInfo[tokens[i]] = std::pair(level, assoc);
4746 }
4747 level++;
4748 }
4749 Instruction instruction;
4750 instruction.type = "precedence";
4751 instruction.data = binOpeInfo;
4752 instruction.sv = vs.sv();
4753 return instruction;
4754 };
4755 g["PrecedenceInfo"] = [](const SemanticValues &vs) {
4756 return vs.transform<std::string_view>();
4757 };
4758 g["PrecedenceOpe"] = [](const SemanticValues &vs) { return vs.token(); };
4759 g["PrecedenceAssoc"] = [](const SemanticValues &vs) { return vs.token(); };
4760
4761 g["ErrorMessage"] = [](const SemanticValues &vs) {
4762 Instruction instruction;
4763 instruction.type = "error_message";
4764 instruction.data = std::any_cast<std::string>(vs[0]);
4765 instruction.sv = vs.sv();
4766 return instruction;
4767 };
4768
4769 g["NoAstOpt"] = [](const SemanticValues &vs) {
4770 Instruction instruction;
4771 instruction.type = "no_ast_opt";
4772 instruction.sv = vs.sv();
4773 return instruction;
4774 };
4775
4776 g["Instruction"] = [](const SemanticValues &vs) {
4777 return vs.transform<Instruction>();
4778 };
4779 }
4780
4783 const char *s, Log log) {
4784 try {
4785 auto &seq = dynamic_cast<Sequence &>(*rule.get_core_operator());
4786 auto atom = seq.opes_[0];
4787 auto &rep = dynamic_cast<Repetition &>(*seq.opes_[1]);
4788 auto &seq1 = dynamic_cast<Sequence &>(*rep.ope_);
4789 auto binop = seq1.opes_[0];
4790 auto atom1 = seq1.opes_[1];
4791
4792 auto atom_name = dynamic_cast<Reference &>(*atom).name_;
4793 auto binop_name = dynamic_cast<Reference &>(*binop).name_;
4794 auto atom1_name = dynamic_cast<Reference &>(*atom1).name_;
4795
4796 if (!rep.is_zom() || atom_name != atom1_name || atom_name == binop_name) {
4797 if (log) {
4798 auto line = line_info(s, rule.s_);
4799 log(line.first, line.second,
4800 "'precedence' instruction cannot be applied to '" + rule.name +
4801 "'.",
4802 "");
4803 }
4804 return false;
4805 }
4806
4807 rule.holder_->ope_ = pre(atom, binop, info, rule);
4808 rule.disable_action = true;
4809 } catch (...) {
4810 if (log) {
4811 auto line = line_info(s, rule.s_);
4812 log(line.first, line.second,
4813 "'precedence' instruction cannot be applied to '" + rule.name +
4814 "'.",
4815 "");
4816 }
4817 return false;
4818 }
4819 return true;
4820 }
4821
4822 ParserContext perform_core(const char *s, size_t n, const Rules &rules,
4823 Log log, std::string requested_start,
4824 bool enable_left_recursion = true) {
4825 Data data;
4826 auto &grammar = *data.grammar;
4827
4828 // Built-in macros
4829 {
4830 // `%recover`
4831 {
4832 auto &rule = grammar[RECOVER_DEFINITION_NAME];
4833 rule <= ref(grammar, "x", "", false, {});
4834 rule.name = RECOVER_DEFINITION_NAME;
4835 rule.s_ = "[native]";
4836 rule.ignoreSemanticValue = true;
4837 rule.is_macro = true;
4838 rule.params = {"x"};
4839 }
4840 }
4841
4842 try {
4843 std::any dt = &data;
4844 auto r = g["Grammar"].parse(s, n, dt, nullptr, log);
4845
4846 if (!r.ret) {
4847 if (log) {
4848 if (r.error_info.message_pos) {
4849 auto line = line_info(s, r.error_info.message_pos);
4850 log(line.first, line.second, r.error_info.message,
4851 r.error_info.label);
4852 } else {
4853 auto line = line_info(s, r.error_info.error_pos);
4854 log(line.first, line.second, "syntax error", r.error_info.label);
4855 }
4856 }
4857 return {};
4858 }
4859 } catch (const SyntaxErrorException &e) {
4860 if (log) {
4861 auto line = e.line_info();
4862 log(line.first, line.second, e.what(), "");
4863 }
4864 return {};
4865 }
4866
4867 // User provided rules
4868 for (auto [user_name, user_rule] : rules) {
4869 auto name = user_name;
4870 auto ignore = false;
4871 if (!name.empty() && name[0] == '~') {
4872 ignore = true;
4873 name.erase(0, 1);
4874 }
4875 if (!name.empty()) {
4876 auto &rule = grammar[name];
4877 rule <= user_rule;
4878 rule.name = name;
4879 rule.ignoreSemanticValue = ignore;
4880 }
4881 }
4882
4883 // Check duplicated definitions
4884 auto ret = true;
4885
4886 if (!data.duplicates_of_definition.empty()) {
4887 for (const auto &[name, ptr] : data.duplicates_of_definition) {
4888 if (log) {
4889 auto line = line_info(s, ptr);
4890 log(line.first, line.second,
4891 "the definition '" + name + "' is already defined.", "");
4892 }
4893 }
4894 ret = false;
4895 }
4896
4897 // Check duplicated instructions
4898 if (!data.duplicates_of_instruction.empty()) {
4899 for (const auto &[type, ptr] : data.duplicates_of_instruction) {
4900 if (log) {
4901 auto line = line_info(s, ptr);
4902 log(line.first, line.second,
4903 "the instruction '" + type + "' is already defined.", "");
4904 }
4905 }
4906 ret = false;
4907 }
4908
4909 // Check undefined back references
4910 if (!data.undefined_back_references.empty()) {
4911 for (const auto &[name, ptr] : data.undefined_back_references) {
4912 if (log) {
4913 auto line = line_info(s, ptr);
4914 log(line.first, line.second,
4915 "the back reference '" + name + "' is undefined.", "");
4916 }
4917 }
4918 ret = false;
4919 }
4920
4921 // Set root definition
4922 auto start = data.start;
4923
4924 if (!requested_start.empty()) {
4925 if (grammar.count(requested_start)) {
4926 start = requested_start;
4927 } else {
4928 if (log) {
4929 auto line = line_info(s, s);
4930 log(line.first, line.second,
4931 "the specified start rule '" + requested_start +
4932 "' is undefined.",
4933 "");
4934 }
4935 ret = false;
4936 }
4937 }
4938
4939 if (!ret) { return {}; }
4940
4941 auto &start_rule = grammar[start];
4942
4943 // Check if the start rule has ignore operator
4944 {
4945 if (start_rule.ignoreSemanticValue) {
4946 if (log) {
4947 auto line = line_info(s, start_rule.s_);
4948 log(line.first, line.second,
4949 "ignore operator cannot be applied to '" + start_rule.name + "'.",
4950 "");
4951 }
4952 ret = false;
4953 }
4954 }
4955
4956 if (!ret) { return {}; }
4957
4958 // Check missing definitions
4959 auto referenced = std::unordered_set<std::string>{
4963 start_rule.name,
4964 };
4965
4966 for (auto &[_, rule] : grammar) {
4967 ReferenceChecker vis(grammar, rule.params);
4968 rule.accept(vis);
4969 referenced.insert(vis.referenced.begin(), vis.referenced.end());
4970 for (const auto &[name, ptr] : vis.error_s) {
4971 if (log) {
4972 auto line = line_info(s, ptr);
4973 log(line.first, line.second, vis.error_message[name], "");
4974 }
4975 ret = false;
4976 }
4977 }
4978
4979 for (auto &[name, rule] : grammar) {
4980 if (!referenced.count(name)) {
4981 if (log) {
4982 auto line = line_info(s, rule.s_);
4983 auto msg = "'" + name + "' is not referenced.";
4984 log(line.first, line.second, msg, "");
4985 }
4986 }
4987 }
4988
4989 if (!ret) { return {}; }
4990
4991 // Link references
4992 for (auto &x : grammar) {
4993 auto &rule = x.second;
4994 LinkReferences vis(grammar, rule.params);
4995 rule.accept(vis);
4996 }
4997
4998 // Compute can_be_empty for each rule (fixed-point iteration)
4999 {
5000 bool changed = true;
5001 while (changed) {
5002 changed = false;
5003 for (auto &[name, rule] : grammar) {
5005 rule.accept(vis);
5006 if (vis.result != rule.can_be_empty) {
5007 rule.can_be_empty = vis.result;
5008 changed = true;
5009 }
5010 }
5011 }
5012 }
5013
5014 // Check left recursion
5015 if (enable_left_recursion) {
5016 for (auto &[name, rule] : grammar) {
5017 DetectLeftRecursion vis(name);
5018 rule.accept(vis);
5019 if (vis.error_s) { rule.is_left_recursive = true; }
5020 }
5021 } else {
5022 ret = true;
5023
5024 for (auto &[name, rule] : grammar) {
5025 DetectLeftRecursion vis(name);
5026 rule.accept(vis);
5027 if (vis.error_s) {
5028 if (log) {
5029 auto line = line_info(s, vis.error_s);
5030 log(line.first, line.second, "'" + name + "' is left recursive.",
5031 "");
5032 }
5033 ret = false;
5034 }
5035 }
5036
5037 if (!ret) { return {}; }
5038 }
5039
5040 // Check infinite loop
5041 if (detect_infiniteLoop(data, start_rule, log, s)) { return {}; }
5042
5043 // Automatic whitespace skipping
5044 if (grammar.count(WHITESPACE_DEFINITION_NAME)) {
5045 for (auto &x : grammar) {
5046 auto &rule = x.second;
5047 auto ope = rule.get_core_operator();
5048 if (IsLiteralToken::check(*ope)) { rule <= tok(ope); }
5049 }
5050
5051 auto &rule = grammar[WHITESPACE_DEFINITION_NAME];
5052 start_rule.whitespaceOpe = wsp(rule.get_core_operator());
5053
5054 if (detect_infiniteLoop(data, rule, log, s)) { return {}; }
5055 }
5056
5057 // Word expression
5058 if (grammar.count(WORD_DEFINITION_NAME)) {
5059 auto &rule = grammar[WORD_DEFINITION_NAME];
5060 start_rule.wordOpe = rule.get_core_operator();
5061
5062 if (detect_infiniteLoop(data, rule, log, s)) { return {}; }
5063 }
5064
5065 // Apply instructions
5066 for (const auto &[name, instructions] : data.instructions) {
5067 auto &rule = grammar[name];
5068
5069 for (const auto &instruction : instructions) {
5070 if (instruction.type == "precedence") {
5071 const auto &info =
5072 std::any_cast<PrecedenceClimbing::BinOpeInfo>(instruction.data);
5073
5074 if (!apply_precedence_instruction(rule, info, s, log)) { return {}; }
5075 } else if (instruction.type == "error_message") {
5076 rule.error_message = std::any_cast<std::string>(instruction.data);
5077 } else if (instruction.type == "no_ast_opt") {
5078 rule.no_ast_opt = true;
5079 }
5080 }
5081 }
5082
5083 // Setup First-Set and ISpan optimizations
5084 for (auto &x : grammar) {
5085 SetupFirstSets vis;
5086 x.second.accept(vis);
5087 }
5088
5089 return {data.grammar, start, data.enablePackratParsing};
5090 }
5091
5092 bool detect_infiniteLoop(const Data &data, Definition &rule, const Log &log,
5093 const char *s) const {
5094 std::vector<std::pair<const char *, std::string>> refs;
5095 std::unordered_map<std::string, bool> has_error_cache;
5096 DetectInfiniteLoop vis(data.start_pos, rule.name, refs, has_error_cache);
5097 rule.accept(vis);
5098 if (vis.has_error) {
5099 if (log) {
5100 auto line = line_info(s, vis.error_s);
5101 log(line.first, line.second,
5102 "infinite loop is detected in '" + vis.error_name + "'.", "");
5103 }
5104 return true;
5105 }
5106 return false;
5107 }
5108
5110};
5111
5112/*-----------------------------------------------------------------------------
5113 * AST
5114 *---------------------------------------------------------------------------*/
5115
5116template <typename Annotation> struct AstBase : public Annotation {
5117 AstBase(const char *path, size_t line, size_t column, const char *name,
5118 const std::vector<std::shared_ptr<AstBase>> &nodes,
5119 size_t position = 0, size_t length = 0, size_t choice_count = 0,
5120 size_t choice = 0)
5121 : path(path ? path : ""), line(line), column(column), name(name),
5126
5127 AstBase(const char *path, size_t line, size_t column, const char *name,
5128 const std::string_view &token, size_t position = 0, size_t length = 0,
5129 size_t choice_count = 0, size_t choice = 0)
5130 : path(path ? path : ""), line(line), column(column), name(name),
5135
5136 AstBase(const AstBase &ast, const char *original_name, size_t position = 0,
5137 size_t length = 0, size_t original_choice_count = 0,
5138 size_t original_choice = 0)
5139 : path(ast.path), line(ast.line), column(ast.column), name(ast.name),
5145 token(ast.token), nodes(ast.nodes), parent(ast.parent) {}
5146
5147 const std::string path;
5148 const size_t line = 1;
5149 const size_t column = 1;
5150
5151 const std::string name;
5152 size_t position;
5153 size_t length;
5154 const size_t choice_count;
5155 const size_t choice;
5156 const std::string original_name;
5158 const size_t original_choice;
5159 const unsigned int tag;
5160 const unsigned int original_tag;
5161
5162 const bool is_token;
5163 const std::string_view token;
5164
5165 std::vector<std::shared_ptr<AstBase<Annotation>>> nodes;
5166 std::weak_ptr<AstBase<Annotation>> parent;
5167
5168 std::string token_to_string() const {
5169 assert(is_token);
5170 return std::string(token);
5171 }
5172
5173 template <typename T> T token_to_number() const {
5174 return token_to_number_<T>(token);
5175 }
5176};
5177
5178template <typename T>
5179void ast_to_s_core(const std::shared_ptr<T> &ptr, std::string &s, int level,
5180 std::function<std::string(const T &ast, int level)> fn) {
5181 const auto &ast = *ptr;
5182 for (auto i = 0; i < level; i++) {
5183 s += " ";
5184 }
5185 auto name = ast.original_name;
5186 if (ast.original_choice_count > 0) {
5187 name += "/" + std::to_string(ast.original_choice);
5188 }
5189 if (ast.name != ast.original_name) { name += "[" + ast.name + "]"; }
5190 if (ast.is_token) {
5191 s += "- " + name + " (";
5192 s += ast.token;
5193 s += ")\n";
5194 } else {
5195 s += "+ " + name + "\n";
5196 }
5197 if (fn) { s += fn(ast, level + 1); }
5198 for (const auto &node : ast.nodes) {
5199 ast_to_s_core(node, s, level + 1, fn);
5200 }
5201}
5202
5203template <typename T>
5204std::string
5205ast_to_s(const std::shared_ptr<T> &ptr,
5206 std::function<std::string(const T &ast, int level)> fn = nullptr) {
5207 std::string s;
5208 ast_to_s_core(ptr, s, 0, fn);
5209 return s;
5210}
5211
5213 AstOptimizer(bool mode, const std::vector<std::string> &rules = {})
5214 : mode_(mode), rules_(rules) {}
5215
5216 template <typename T>
5217 std::shared_ptr<T> optimize(std::shared_ptr<T> original,
5218 std::shared_ptr<T> parent = nullptr) {
5219 auto found =
5220 std::find(rules_.begin(), rules_.end(), original->name) != rules_.end();
5221 auto opt = mode_ ? !found : found;
5222
5223 if (opt && original->nodes.size() == 1) {
5224 auto child = optimize(original->nodes[0], parent);
5225 auto ast = std::make_shared<T>(*child, original->name.data(),
5226 original->position, original->length,
5227 original->choice_count, original->choice);
5228 for (auto &node : ast->nodes) {
5229 node->parent = ast;
5230 }
5231 return ast;
5232 }
5233
5234 auto ast = std::make_shared<T>(*original);
5235 ast->parent = parent;
5236 ast->nodes.clear();
5237 for (const auto &node : original->nodes) {
5238 auto child = optimize(node, ast);
5239 ast->nodes.push_back(child);
5240 }
5241 return ast;
5242 }
5243
5244private:
5245 const bool mode_;
5246 const std::vector<std::string> rules_;
5247};
5248
5249struct EmptyType {};
5250using Ast = AstBase<EmptyType>;
5251
5252template <typename T = Ast> void add_ast_action(Definition &rule) {
5253 rule.action = [&](const SemanticValues &vs) {
5254 auto line = vs.line_info();
5255
5256 if (rule.is_token()) {
5257 return std::make_shared<T>(
5258 vs.path, line.first, line.second, rule.name.data(), vs.token(),
5259 std::distance(vs.ss, vs.sv().data()), vs.sv().length(),
5260 vs.choice_count(), vs.choice());
5261 }
5262
5263 auto ast =
5264 std::make_shared<T>(vs.path, line.first, line.second, rule.name.data(),
5265 vs.transform<std::shared_ptr<T>>(),
5266 std::distance(vs.ss, vs.sv().data()),
5267 vs.sv().length(), vs.choice_count(), vs.choice());
5268
5269 for (auto &node : ast->nodes) {
5270 node->parent = ast;
5271 }
5272 return ast;
5273 };
5274}
5275
5276#define PEG_EXPAND(...) __VA_ARGS__
5277#define PEG_CONCAT(a, b) a##b
5278#define PEG_CONCAT2(a, b) PEG_CONCAT(a, b)
5279
5280#define PEG_PICK( \
5281 a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, \
5282 a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, \
5283 a32, a33, a34, a35, a36, a37, a38, a39, a40, a41, a42, a43, a44, a45, a46, \
5284 a47, a48, a49, a50, a51, a52, a53, a54, a55, a56, a57, a58, a59, a60, a61, \
5285 a62, a63, a64, a65, a66, a67, a68, a69, a70, a71, a72, a73, a74, a75, a76, \
5286 a77, a78, a79, a80, a81, a82, a83, a84, a85, a86, a87, a88, a89, a90, a91, \
5287 a92, a93, a94, a95, a96, a97, a98, a99, a100, ...) \
5288 a100
5289
5290#define PEG_COUNT(...) \
5291 PEG_EXPAND(PEG_PICK( \
5292 __VA_ARGS__, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91, 90, 89, 88, 87, \
5293 86, 85, 84, 83, 82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, \
5294 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, \
5295 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, \
5296 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, \
5297 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0))
5298
5299#define PEG_DEF_1(r) \
5300 peg::Definition r; \
5301 r.name = #r; \
5302 peg::add_ast_action(r);
5303
5304#define PEG_DEF_2(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_1(__VA_ARGS__))
5305#define PEG_DEF_3(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_2(__VA_ARGS__))
5306#define PEG_DEF_4(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_3(__VA_ARGS__))
5307#define PEG_DEF_5(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_4(__VA_ARGS__))
5308#define PEG_DEF_6(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_5(__VA_ARGS__))
5309#define PEG_DEF_7(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_6(__VA_ARGS__))
5310#define PEG_DEF_8(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_7(__VA_ARGS__))
5311#define PEG_DEF_9(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_8(__VA_ARGS__))
5312#define PEG_DEF_10(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_9(__VA_ARGS__))
5313#define PEG_DEF_11(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_10(__VA_ARGS__))
5314#define PEG_DEF_12(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_11(__VA_ARGS__))
5315#define PEG_DEF_13(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_12(__VA_ARGS__))
5316#define PEG_DEF_14(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_13(__VA_ARGS__))
5317#define PEG_DEF_15(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_14(__VA_ARGS__))
5318#define PEG_DEF_16(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_15(__VA_ARGS__))
5319#define PEG_DEF_17(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_16(__VA_ARGS__))
5320#define PEG_DEF_18(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_17(__VA_ARGS__))
5321#define PEG_DEF_19(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_18(__VA_ARGS__))
5322#define PEG_DEF_20(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_19(__VA_ARGS__))
5323#define PEG_DEF_21(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_20(__VA_ARGS__))
5324#define PEG_DEF_22(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_21(__VA_ARGS__))
5325#define PEG_DEF_23(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_22(__VA_ARGS__))
5326#define PEG_DEF_24(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_23(__VA_ARGS__))
5327#define PEG_DEF_25(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_24(__VA_ARGS__))
5328#define PEG_DEF_26(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_25(__VA_ARGS__))
5329#define PEG_DEF_27(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_26(__VA_ARGS__))
5330#define PEG_DEF_28(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_27(__VA_ARGS__))
5331#define PEG_DEF_29(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_28(__VA_ARGS__))
5332#define PEG_DEF_30(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_29(__VA_ARGS__))
5333#define PEG_DEF_31(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_30(__VA_ARGS__))
5334#define PEG_DEF_32(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_31(__VA_ARGS__))
5335#define PEG_DEF_33(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_32(__VA_ARGS__))
5336#define PEG_DEF_34(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_33(__VA_ARGS__))
5337#define PEG_DEF_35(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_34(__VA_ARGS__))
5338#define PEG_DEF_36(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_35(__VA_ARGS__))
5339#define PEG_DEF_37(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_36(__VA_ARGS__))
5340#define PEG_DEF_38(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_37(__VA_ARGS__))
5341#define PEG_DEF_39(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_38(__VA_ARGS__))
5342#define PEG_DEF_40(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_39(__VA_ARGS__))
5343#define PEG_DEF_41(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_40(__VA_ARGS__))
5344#define PEG_DEF_42(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_41(__VA_ARGS__))
5345#define PEG_DEF_43(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_42(__VA_ARGS__))
5346#define PEG_DEF_44(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_43(__VA_ARGS__))
5347#define PEG_DEF_45(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_44(__VA_ARGS__))
5348#define PEG_DEF_46(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_45(__VA_ARGS__))
5349#define PEG_DEF_47(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_46(__VA_ARGS__))
5350#define PEG_DEF_48(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_47(__VA_ARGS__))
5351#define PEG_DEF_49(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_48(__VA_ARGS__))
5352#define PEG_DEF_50(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_49(__VA_ARGS__))
5353#define PEG_DEF_51(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_50(__VA_ARGS__))
5354#define PEG_DEF_52(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_51(__VA_ARGS__))
5355#define PEG_DEF_53(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_52(__VA_ARGS__))
5356#define PEG_DEF_54(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_53(__VA_ARGS__))
5357#define PEG_DEF_55(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_54(__VA_ARGS__))
5358#define PEG_DEF_56(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_55(__VA_ARGS__))
5359#define PEG_DEF_57(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_56(__VA_ARGS__))
5360#define PEG_DEF_58(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_57(__VA_ARGS__))
5361#define PEG_DEF_59(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_58(__VA_ARGS__))
5362#define PEG_DEF_60(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_59(__VA_ARGS__))
5363#define PEG_DEF_61(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_60(__VA_ARGS__))
5364#define PEG_DEF_62(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_61(__VA_ARGS__))
5365#define PEG_DEF_63(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_62(__VA_ARGS__))
5366#define PEG_DEF_64(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_63(__VA_ARGS__))
5367#define PEG_DEF_65(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_64(__VA_ARGS__))
5368#define PEG_DEF_66(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_65(__VA_ARGS__))
5369#define PEG_DEF_67(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_66(__VA_ARGS__))
5370#define PEG_DEF_68(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_67(__VA_ARGS__))
5371#define PEG_DEF_69(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_68(__VA_ARGS__))
5372#define PEG_DEF_70(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_69(__VA_ARGS__))
5373#define PEG_DEF_71(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_70(__VA_ARGS__))
5374#define PEG_DEF_72(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_71(__VA_ARGS__))
5375#define PEG_DEF_73(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_72(__VA_ARGS__))
5376#define PEG_DEF_74(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_73(__VA_ARGS__))
5377#define PEG_DEF_75(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_74(__VA_ARGS__))
5378#define PEG_DEF_76(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_75(__VA_ARGS__))
5379#define PEG_DEF_77(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_76(__VA_ARGS__))
5380#define PEG_DEF_78(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_77(__VA_ARGS__))
5381#define PEG_DEF_79(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_78(__VA_ARGS__))
5382#define PEG_DEF_80(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_79(__VA_ARGS__))
5383#define PEG_DEF_81(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_80(__VA_ARGS__))
5384#define PEG_DEF_82(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_81(__VA_ARGS__))
5385#define PEG_DEF_83(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_82(__VA_ARGS__))
5386#define PEG_DEF_84(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_83(__VA_ARGS__))
5387#define PEG_DEF_85(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_84(__VA_ARGS__))
5388#define PEG_DEF_86(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_85(__VA_ARGS__))
5389#define PEG_DEF_87(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_86(__VA_ARGS__))
5390#define PEG_DEF_88(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_87(__VA_ARGS__))
5391#define PEG_DEF_89(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_88(__VA_ARGS__))
5392#define PEG_DEF_90(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_89(__VA_ARGS__))
5393#define PEG_DEF_91(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_90(__VA_ARGS__))
5394#define PEG_DEF_92(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_91(__VA_ARGS__))
5395#define PEG_DEF_93(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_92(__VA_ARGS__))
5396#define PEG_DEF_94(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_93(__VA_ARGS__))
5397#define PEG_DEF_95(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_94(__VA_ARGS__))
5398#define PEG_DEF_96(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_95(__VA_ARGS__))
5399#define PEG_DEF_97(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_96(__VA_ARGS__))
5400#define PEG_DEF_98(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_97(__VA_ARGS__))
5401#define PEG_DEF_99(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_98(__VA_ARGS__))
5402#define PEG_DEF_100(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_99(__VA_ARGS__))
5403
5404#define AST_DEFINITIONS(...) \
5405 PEG_EXPAND(PEG_CONCAT2(PEG_DEF_, PEG_COUNT(__VA_ARGS__))(__VA_ARGS__))
5406
5407/*-----------------------------------------------------------------------------
5408 * parser
5409 *---------------------------------------------------------------------------*/
5410
5411class parser {
5412public:
5413 parser() = default;
5414
5415 parser(const char *s, size_t n, const Rules &rules,
5416 std::string_view start = {}) {
5417 load_grammar(s, n, rules, start);
5418 }
5419
5420 parser(const char *s, size_t n, std::string_view start = {})
5421 : parser(s, n, Rules(), start) {}
5422
5423 parser(std::string_view sv, const Rules &rules, std::string_view start = {})
5424 : parser(sv.data(), sv.size(), rules, start) {}
5425
5426 parser(std::string_view sv, std::string_view start = {})
5427 : parser(sv.data(), sv.size(), Rules(), start) {}
5428
5429#if defined(__cpp_lib_char8_t)
5430 parser(std::u8string_view sv, const Rules &rules, std::string_view start = {})
5431 : parser(reinterpret_cast<const char *>(sv.data()), sv.size(), rules,
5432 start) {}
5433
5434 parser(std::u8string_view sv, std::string_view start = {})
5435 : parser(reinterpret_cast<const char *>(sv.data()), sv.size(), Rules(),
5436 start) {}
5437#endif
5438
5439 operator bool() const { return grammar_ != nullptr; }
5440
5441 bool load_grammar(const char *s, size_t n, const Rules &rules,
5442 std::string_view start = {}) {
5443 auto cxt =
5444 ParserGenerator::parse(s, n, rules, log_, start, enableLeftRecursion_);
5445 grammar_ = cxt.grammar;
5446 start_ = cxt.start;
5447 enablePackratParsing_ = cxt.enablePackratParsing;
5448 return grammar_ != nullptr;
5449 }
5450
5451 bool load_grammar(const char *s, size_t n, std::string_view start = {}) {
5452 return load_grammar(s, n, Rules(), start);
5453 }
5454
5455 bool load_grammar(std::string_view sv, const Rules &rules,
5456 std::string_view start = {}) {
5457 return load_grammar(sv.data(), sv.size(), rules, start);
5458 }
5459
5460 bool load_grammar(std::string_view sv, std::string_view start = {}) {
5461 return load_grammar(sv.data(), sv.size(), Rules(), start);
5462 }
5463
5464 bool parse_n(const char *s, size_t n, const char *path = nullptr) const {
5465 if (grammar_ != nullptr) {
5466 const auto &rule = (*grammar_)[start_];
5467 auto result = rule.parse(s, n, path, log_);
5468 return post_process(s, n, result);
5469 }
5470 return false;
5471 }
5472
5473 bool parse_n(const char *s, size_t n, std::any &dt,
5474 const char *path = nullptr) const {
5475 if (grammar_ != nullptr) {
5476 const auto &rule = (*grammar_)[start_];
5477 auto result = rule.parse(s, n, dt, path, log_);
5478 return post_process(s, n, result);
5479 }
5480 return false;
5481 }
5482
5483 template <typename T>
5484 bool parse_n(const char *s, size_t n, T &val,
5485 const char *path = nullptr) const {
5486 if (grammar_ != nullptr) {
5487 const auto &rule = (*grammar_)[start_];
5488 auto result = rule.parse_and_get_value(s, n, val, path, log_);
5489 return post_process(s, n, result);
5490 }
5491 return false;
5492 }
5493
5494 template <typename T>
5495 bool parse_n(const char *s, size_t n, std::any &dt, T &val,
5496 const char *path = nullptr) const {
5497 if (grammar_ != nullptr) {
5498 const auto &rule = (*grammar_)[start_];
5499 auto result = rule.parse_and_get_value(s, n, dt, val, path, log_);
5500 return post_process(s, n, result);
5501 }
5502 return false;
5503 }
5504
5505 bool parse(std::string_view sv, const char *path = nullptr) const {
5506 return parse_n(sv.data(), sv.size(), path);
5507 }
5508
5509 bool parse(std::string_view sv, std::any &dt,
5510 const char *path = nullptr) const {
5511 return parse_n(sv.data(), sv.size(), dt, path);
5512 }
5513
5514 template <typename T>
5515 bool parse(std::string_view sv, T &val, const char *path = nullptr) const {
5516 return parse_n(sv.data(), sv.size(), val, path);
5517 }
5518
5519 template <typename T>
5520 bool parse(std::string_view sv, std::any &dt, T &val,
5521 const char *path = nullptr) const {
5522 return parse_n(sv.data(), sv.size(), dt, val, path);
5523 }
5524
5525#if defined(__cpp_lib_char8_t)
5526 bool parse(std::u8string_view sv, const char *path = nullptr) const {
5527 return parse_n(reinterpret_cast<const char *>(sv.data()), sv.size(), path);
5528 }
5529
5530 bool parse(std::u8string_view sv, std::any &dt,
5531 const char *path = nullptr) const {
5532 return parse_n(reinterpret_cast<const char *>(sv.data()), sv.size(), dt,
5533 path);
5534 }
5535
5536 template <typename T>
5537 bool parse(std::u8string_view sv, T &val, const char *path = nullptr) const {
5538 return parse_n(reinterpret_cast<const char *>(sv.data()), sv.size(), val,
5539 path);
5540 }
5541
5542 template <typename T>
5543 bool parse(std::u8string_view sv, std::any &dt, T &val,
5544 const char *path = nullptr) const {
5545 return parse_n(reinterpret_cast<const char *>(sv.data()), sv.size(), dt,
5546 val, path);
5547 }
5548#endif
5549
5550 Definition &operator[](const char *s) { return (*grammar_)[s]; }
5551
5552 const Definition &operator[](const char *s) const { return (*grammar_)[s]; }
5553
5554 const Grammar &get_grammar() const { return *grammar_; }
5555
5557 if (grammar_ != nullptr) {
5558 auto &rule = (*grammar_)[start_];
5559 rule.eoi_check = false;
5560 }
5561 }
5562
5563 void enable_left_recursion(bool enable = true) {
5564 enableLeftRecursion_ = enable;
5565 }
5566
5568 if (grammar_ != nullptr) {
5569 auto &rule = (*grammar_)[start_];
5570 rule.enablePackratParsing = enablePackratParsing_;
5571 }
5572 }
5573
5574 void enable_trace(TracerEnter tracer_enter, TracerLeave tracer_leave) {
5575 if (grammar_ != nullptr) {
5576 auto &rule = (*grammar_)[start_];
5577 rule.tracer_enter = tracer_enter;
5578 rule.tracer_leave = tracer_leave;
5579 }
5580 }
5581
5582 void enable_trace(TracerEnter tracer_enter, TracerLeave tracer_leave,
5583 TracerStartOrEnd tracer_start,
5584 TracerStartOrEnd tracer_end) {
5585 if (grammar_ != nullptr) {
5586 auto &rule = (*grammar_)[start_];
5587 rule.tracer_enter = tracer_enter;
5588 rule.tracer_leave = tracer_leave;
5589 rule.tracer_start = tracer_start;
5590 rule.tracer_end = tracer_end;
5591 }
5592 }
5593
5594 void set_verbose_trace(bool verbose_trace) {
5595 if (grammar_ != nullptr) {
5596 auto &rule = (*grammar_)[start_];
5597 rule.verbose_trace = verbose_trace;
5598 }
5599 }
5600
5601 template <typename T = Ast> parser &enable_ast() {
5602 for (auto &[_, rule] : *grammar_) {
5603 if (!rule.action) { add_ast_action<T>(rule); }
5604 }
5605 return *this;
5606 }
5607
5608 template <typename T>
5609 std::shared_ptr<T> optimize_ast(std::shared_ptr<T> ast,
5610 bool opt_mode = true) const {
5611 return AstOptimizer(opt_mode, get_no_ast_opt_rules()).optimize(ast);
5612 }
5613
5614 void set_logger(Log log) { log_ = log; }
5615
5617 std::function<void(size_t line, size_t col, const std::string &msg)>
5618 log) {
5619 log_ = [log](size_t line, size_t col, const std::string &msg,
5620 const std::string & /*rule*/) { log(line, col, msg); };
5621 }
5622
5623private:
5624 bool post_process(const char *s, size_t n, Definition::Result &r) const {
5625 if (log_ && !r.ret) { r.error_info.output_log(log_, s, n); }
5626 return r.ret && !r.recovered;
5627 }
5628
5629 std::vector<std::string> get_no_ast_opt_rules() const {
5630 std::vector<std::string> rules;
5631 for (auto &[name, rule] : *grammar_) {
5632 if (rule.no_ast_opt) { rules.push_back(name); }
5633 }
5634 return rules;
5635 }
5636
5637 std::shared_ptr<Grammar> grammar_;
5638 std::string start_;
5642};
5643
5644/*-----------------------------------------------------------------------------
5645 * enable_tracing
5646 *---------------------------------------------------------------------------*/
5647
5648inline void enable_tracing(parser &parser, std::ostream &os) {
5650 [&](auto &ope, auto s, auto, auto &, auto &c, auto &, auto &trace_data) {
5651 auto prev_pos = std::any_cast<size_t>(trace_data);
5652 auto pos = static_cast<size_t>(s - c.s);
5653 auto backtrack = (pos < prev_pos ? "*" : "");
5654 std::string indent;
5655 auto level = c.trace_ids.size() - 1;
5656 while (level--) {
5657 indent += "│";
5658 }
5659 std::string name;
5660 {
5661 name = peg::TraceOpeName::get(const_cast<peg::Ope &>(ope));
5662
5663 auto lit = dynamic_cast<const peg::LiteralString *>(&ope);
5664 if (lit) { name += " '" + peg::escape_characters(lit->lit_) + "'"; }
5665 }
5666 os << "E " << pos + 1 << backtrack << "\t" << indent << "┌" << name
5667 << " #" << c.trace_ids.back() << std::endl;
5668 trace_data = static_cast<size_t>(pos);
5669 },
5670 [&](auto &ope, auto s, auto, auto &sv, auto &c, auto &, auto len,
5671 auto &) {
5672 auto pos = static_cast<size_t>(s - c.s);
5673 if (len != static_cast<size_t>(-1)) { pos += len; }
5674 std::string indent;
5675 auto level = c.trace_ids.size() - 1;
5676 while (level--) {
5677 indent += "│";
5678 }
5679 auto ret = len != static_cast<size_t>(-1) ? "└o " : "└x ";
5680 auto name = peg::TraceOpeName::get(const_cast<peg::Ope &>(ope));
5681 std::stringstream choice;
5682 if (sv.choice_count() > 0) {
5683 choice << " " << sv.choice() << "/" << sv.choice_count();
5684 }
5685 std::string token;
5686 if (!sv.tokens.empty()) {
5687 token += ", token '";
5688 token += sv.tokens[0];
5689 token += "'";
5690 }
5691 std::string matched;
5692 if (peg::success(len) &&
5693 peg::TokenChecker::is_token(const_cast<peg::Ope &>(ope))) {
5694 matched = ", match '" + peg::escape_characters(s, len) + "'";
5695 }
5696 os << "L " << pos + 1 << "\t" << indent << ret << name << " #"
5697 << c.trace_ids.back() << choice.str() << token << matched
5698 << std::endl;
5699 },
5700 [&](auto &trace_data) { trace_data = static_cast<size_t>(0); },
5701 [&](auto &) {});
5702}
5703
5704/*-----------------------------------------------------------------------------
5705 * enable_profiling
5706 *---------------------------------------------------------------------------*/
5707
5708inline void enable_profiling(parser &parser, std::ostream &os) {
5709 struct Stats {
5710 struct Item {
5711 std::string name;
5712 size_t success;
5713 size_t fail;
5714 };
5715 std::vector<Item> items;
5716 std::map<std::string, size_t> index;
5717 size_t total = 0;
5718 std::chrono::steady_clock::time_point start;
5719 };
5720
5722 [&](auto &ope, auto, auto, auto &, auto &, auto &, std::any &trace_data) {
5723 if (auto holder = dynamic_cast<const peg::Holder *>(&ope)) {
5724 auto &stats = *std::any_cast<Stats *>(trace_data);
5725
5726 auto &name = holder->name();
5727 if (stats.index.find(name) == stats.index.end()) {
5728 stats.index[name] = stats.index.size();
5729 stats.items.push_back({name, 0, 0});
5730 }
5731 stats.total++;
5732 }
5733 },
5734 [&](auto &ope, auto, auto, auto &, auto &, auto &, auto len,
5735 std::any &trace_data) {
5736 if (auto holder = dynamic_cast<const peg::Holder *>(&ope)) {
5737 auto &stats = *std::any_cast<Stats *>(trace_data);
5738
5739 auto &name = holder->name();
5740 auto index = stats.index[name];
5741 auto &stat = stats.items[index];
5742 if (len != static_cast<size_t>(-1)) {
5743 stat.success++;
5744 } else {
5745 stat.fail++;
5746 }
5747
5748 if (index == 0) {
5749 auto end = std::chrono::steady_clock::now();
5750 auto nano = std::chrono::duration_cast<std::chrono::microseconds>(
5751 end - stats.start)
5752 .count();
5753 auto sec = nano / 1000000.0;
5754 os << "duration: " << sec << "s (" << nano << "µs)" << std::endl
5755 << std::endl;
5756
5757 char buff[BUFSIZ];
5758 size_t total_success = 0;
5759 size_t total_fail = 0;
5760 for (auto &[name, success, fail] : stats.items) {
5761 total_success += success;
5762 total_fail += fail;
5763 }
5764
5765 os << " id total % success fail "
5766 "definition"
5767 << std::endl;
5768
5769 auto grand_total = total_success + total_fail;
5770 snprintf(buff, BUFSIZ, "%4s %10zu %5s %10zu %10zu %s", "",
5771 grand_total, "", total_success, total_fail,
5772 "Total counters");
5773 os << buff << std::endl;
5774
5775 snprintf(buff, BUFSIZ, "%4s %10s %5s %10.2f %10.2f %s", "", "",
5776 "", total_success * 100.0 / grand_total,
5777 total_fail * 100.0 / grand_total, "% success/fail");
5778 os << buff << std::endl << std::endl;
5779 ;
5780
5781 size_t id = 0;
5782 for (auto &[name, success, fail] : stats.items) {
5783 auto total = success + fail;
5784 auto ratio = total * 100.0 / stats.total;
5785 snprintf(buff, BUFSIZ, "%4zu %10zu %5.2f %10zu %10zu %s", id,
5786 total, ratio, success, fail, name.c_str());
5787 os << buff << std::endl;
5788 id++;
5789 }
5790 }
5791 }
5792 },
5793 [&](auto &trace_data) {
5794 auto stats = new Stats{};
5795 stats->start = std::chrono::steady_clock::now();
5796 trace_data = stats;
5797 },
5798 [&](auto &trace_data) {
5799 auto stats = std::any_cast<Stats *>(trace_data);
5800 delete stats;
5801 });
5802}
5803} // namespace peg
Definition peglib.h:642
void operator=(F fn)
Definition peglib.h:647
Action()=default
Action(Action &&rhs)=default
Fty make_adaptor(F fn)
Definition peglib.h:661
std::function< std::any(SemanticValues &vs, std::any &dt, const std::any &predicate_data)> Fty
Definition peglib.h:658
std::any operator()(SemanticValues &vs, std::any &dt, const std::any &predicate_data) const
Definition peglib.h:652
Fty fn_
Definition peglib.h:677
Action(F fn)
Definition peglib.h:646
Action & operator=(const Action &rhs)=default
Definition peglib.h:1375
AndPredicate(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1377
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:1379
std::shared_ptr< Ope > ope_
Definition peglib.h:1393
void accept(Visitor &v) override
Definition peglib.h:3597
Definition peglib.h:1587
size_t parse_core(const char *s, size_t n, SemanticValues &, Context &c, std::any &) const override
Definition peglib.h:1589
void accept(Visitor &v) override
Definition peglib.h:3603
Definition peglib.h:1769
std::string name_
Definition peglib.h:1780
BackReference(const std::string &name)
Definition peglib.h:1773
BackReference(std::string &&name)
Definition peglib.h:1771
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:3435
void accept(Visitor &v) override
Definition peglib.h:3613
Definition peglib.h:1602
void accept(Visitor &v) override
Definition peglib.h:3604
CaptureScope(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1604
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:1606
std::shared_ptr< Ope > ope_
Definition peglib.h:1616
Definition peglib.h:1619
MatchAction match_action_
Definition peglib.h:1636
std::function< void(const char *s, size_t n, Context &c)> MatchAction
Definition peglib.h:1621
std::shared_ptr< Ope > ope_
Definition peglib.h:1635
void accept(Visitor &v) override
Definition peglib.h:3605
Capture(const std::shared_ptr< Ope > &ope, MatchAction ma)
Definition peglib.h:1623
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:1626
Definition peglib.h:1459
bool negated_
Definition peglib.h:1554
bool ignore_case_
Definition peglib.h:1555
std::vector< std::pair< char32_t, char32_t > > ranges_
Definition peglib.h:1553
CharacterClass(const std::string &s, bool negated, bool ignore_case)
Definition peglib.h:1461
void setup_ascii_bitset()
Definition peglib.h:1535
size_t parse_core(const char *s, size_t n, SemanticValues &, Context &c, std::any &) const override
Definition peglib.h:1488
bool in_range(const std::pair< char32_t, char32_t > &range, char32_t cp) const
Definition peglib.h:1525
CharacterClass(const std::vector< std::pair< char32_t, char32_t > > &ranges, bool negated, bool ignore_case)
Definition peglib.h:1481
bool is_ascii_only_
Definition peglib.h:1557
bool is_ascii_only() const
Definition peglib.h:1521
friend struct ComputeFirstSet
Definition peglib.h:1519
const std::bitset< 256 > & ascii_bitset() const
Definition peglib.h:1522
void accept(Visitor &v) override
Definition peglib.h:3601
std::bitset< 256 > ascii_bitset_
Definition peglib.h:1556
Definition peglib.h:1560
Character(char32_t ch)
Definition peglib.h:1562
char32_t ch_
Definition peglib.h:1583
void accept(Visitor &v) override
Definition peglib.h:3602
size_t parse_core(const char *s, size_t n, SemanticValues &, Context &c, std::any &) const override
Definition peglib.h:1564
Definition peglib.h:811
size_t in_token_boundary_count
Definition peglib.h:826
std::vector< Definition * > rule_stack
Definition peglib.h:823
void trace_leave(const Ope &ope, const char *a_s, size_t n, const SemanticValues &vs, std::any &dt, size_t len)
Definition peglib.h:3069
std::vector< std::pair< std::string_view, std::string > > capture_entries
Definition peglib.h:833
std::once_flag source_line_index_init_
Definition peglib.h:1074
std::shared_ptr< Ope > wordOpe
Definition peglib.h:831
std::set< std::pair< const Definition *, const char * > > lr_active_seeds
Definition peglib.h:858
void trace_enter(const Ope &ope, const char *a_s, size_t n, const SemanticValues &vs, std::any &dt)
Definition peglib.h:3063
TracerEnter tracer_enter
Definition peglib.h:883
std::vector< bool > cut_stack
Definition peglib.h:835
size_t skip_whitespace(const char *a_s, size_t n, SemanticValues &vs, std::any &dt)
Definition peglib.h:3020
const std::vector< std::shared_ptr< Ope > > & top_args() const
Definition peglib.h:1005
std::vector< bool > cache_success
Definition peglib.h:840
std::set< const Definition * > lr_refs_hit
Definition peglib.h:854
Context operator=(const Context &)=delete
std::shared_ptr< Ope > whitespaceOpe
Definition peglib.h:828
void clear_packrat_cache(const char *pos, size_t def_id)
Definition peglib.h:860
std::map< std::pair< size_t, size_t >, std::tuple< size_t, std::any > > cache_values
Definition peglib.h:843
const size_t def_count
Definition peglib.h:837
std::map< std::pair< const Definition *, const char * >, LRMemo > lr_memo
Definition peglib.h:850
Context(Context &&)=delete
Context(const char *path, const char *s, size_t l, size_t def_count, std::shared_ptr< Ope > whitespaceOpe, std::shared_ptr< Ope > wordOpe, bool enablePackratParsing, TracerEnter tracer_enter, TracerLeave tracer_leave, std::any trace_data, bool verbose_trace, Log log)
Definition peglib.h:890
const bool verbose_trace
Definition peglib.h:886
Log log
Definition peglib.h:888
std::vector< PackratStats > * packrat_stats
Definition peglib.h:919
const char * s
Definition peglib.h:814
size_t next_trace_id
Definition peglib.h:1071
SemanticValues & push_semantic_values_scope()
Definition peglib.h:974
void pop_semantic_values_scope()
Definition peglib.h:996
bool is_traceable(const Ope &ope) const
Definition peglib.h:3076
const char * path
Definition peglib.h:813
std::any trace_data
Definition peglib.h:885
TracerLeave tracer_leave
Definition peglib.h:884
Snapshot snapshot(const SemanticValues &vs) const
Definition peglib.h:1020
std::vector< size_t > trace_ids
Definition peglib.h:1072
void write_packrat_cache(const char *pos, size_t def_id, size_t len, const std::any &val)
Definition peglib.h:871
size_t value_stack_size
Definition peglib.h:821
const size_t l
Definition peglib.h:815
void push_args(std::vector< std::shared_ptr< Ope > > &&args)
Definition peglib.h:999
void pop_args()
Definition peglib.h:1003
ErrorInfo error_info
Definition peglib.h:817
std::vector< bool > cache_registered
Definition peglib.h:839
bool in_whitespace
Definition peglib.h:829
std::pair< size_t, size_t > line_info(const char *cur) const
Definition peglib.h:1052
std::vector< size_t > source_line_index
Definition peglib.h:1075
bool recovered
Definition peglib.h:818
std::vector< std::shared_ptr< SemanticValues > > value_stack
Definition peglib.h:820
~Context()
Definition peglib.h:905
void packrat(const char *a_s, size_t def_id, size_t &len, std::any &val, T fn)
Definition peglib.h:927
const bool enablePackratParsing
Definition peglib.h:838
void rollback(SemanticValues &vs, const Snapshot &snap)
Definition peglib.h:1025
Context(const Context &)=delete
void set_error_pos(const char *a_s, const char *literal=nullptr)
Definition peglib.h:3029
bool ignore_trace_state
Definition peglib.h:1073
const std::vector< bool > * packrat_rule_filter
Definition peglib.h:924
std::vector< std::vector< std::shared_ptr< Ope > > > args_stack
Definition peglib.h:824
Definition peglib.h:1823
void accept(Visitor &v) override
Definition peglib.h:3616
size_t parse_core(const char *, size_t, SemanticValues &, Context &c, std::any &) const override
Definition peglib.h:1825
Definition peglib.h:2590
std::shared_ptr< Ope > wordOpe
Definition peglib.h:2776
bool is_macro
Definition peglib.h:2778
bool ignoreSemanticValue
Definition peglib.h:2774
bool eoi_check
Definition peglib.h:2793
Predicate predicate
Definition peglib.h:2765
Definition & operator<=(const std::shared_ptr< Ope > &ope)
Definition peglib.h:2614
std::function< void(const Context &c, const char *s, size_t n, size_t matchlen, std::any &value, std::any &dt)> leave
Definition peglib.h:2773
void initialize_packrat_filter() const
Definition peglib.h:3980
Definition()
Definition peglib.h:2599
Result parse(const char *s, std::any &dt, const char *path=nullptr, Log log=nullptr) const
Definition peglib.h:2638
bool is_left_recursive
Definition peglib.h:2781
bool disable_action
Definition peglib.h:2780
TracerEnter tracer_enter
Definition peglib.h:2784
std::vector< std::string > params
Definition peglib.h:2779
std::once_flag packrat_filter_init_
Definition peglib.h:2883
Definition & operator~()
Definition peglib.h:2745
Result parse(const char *s, size_t n, const char *path=nullptr, Log log=nullptr) const
Definition peglib.h:2619
bool enablePackratParsing
Definition peglib.h:2777
std::pair< size_t, size_t > line_
Definition peglib.h:2763
friend class ParserGenerator
Definition peglib.h:2801
std::vector< bool > packrat_filter_
Definition peglib.h:2884
std::once_flag is_token_init_
Definition peglib.h:2878
TracerStartOrEnd tracer_end
Definition peglib.h:2788
std::vector< Context::PackratStats > packrat_stats_
Definition peglib.h:2797
std::once_flag definition_ids_init_
Definition peglib.h:2881
bool collect_packrat_stats
Definition peglib.h:2796
bool no_ast_opt
Definition peglib.h:2791
Definition & operator,(T fn)
Definition peglib.h:2740
friend class Reference
Definition peglib.h:2800
void operator=(Action a)
Definition peglib.h:2738
Result parse_and_get_value(const char *s, size_t n, std::any &dt, T &val, const char *path=nullptr, Log log=nullptr) const
Definition peglib.h:2665
Result parse(const char *s, size_t n, std::any &dt, const char *path=nullptr, Log log=nullptr) const
Definition peglib.h:2632
std::unordered_map< void *, size_t > definition_ids_
Definition peglib.h:2882
Result parse_and_get_value(const char *s, size_t n, T &val, const char *path=nullptr, Log log=nullptr) const
Definition peglib.h:2645
std::once_flag assign_id_to_definition_init_
Definition peglib.h:2880
TracerLeave tracer_leave
Definition peglib.h:2785
size_t id
Definition peglib.h:2767
bool can_be_empty
Definition peglib.h:2782
std::shared_ptr< Ope > whitespaceOpe
Definition peglib.h:2775
bool is_token() const
Definition peglib.h:2754
Result parse_and_get_value(const char *s, T &val, const char *path=nullptr, Log log=nullptr) const
Definition peglib.h:2658
Definition & operator=(Definition &&rhs)
bool is_token_
Definition peglib.h:2879
Definition(const Definition &rhs)
Definition peglib.h:2601
Result parse_core(const char *s, size_t n, SemanticValues &vs, std::any &dt, const char *path, Log log) const
Definition peglib.h:2818
Definition & operator=(const Definition &rhs)
void accept(Ope::Visitor &v)
Definition peglib.h:2750
bool verbose_trace
Definition peglib.h:2786
std::string name
Definition peglib.h:2761
std::shared_ptr< Holder > holder_
Definition peglib.h:2877
std::string error_message
Definition peglib.h:2790
std::function< void(const Context &c, const char *s, size_t n, std::any &dt)> enter
Definition peglib.h:2770
Result parse_and_get_value(const char *s, std::any &dt, T &val, const char *path=nullptr, Log log=nullptr) const
Definition peglib.h:2677
TracerStartOrEnd tracer_start
Definition peglib.h:2787
Action action
Definition peglib.h:2768
Result parse(const char *s, const char *path=nullptr, Log log=nullptr) const
Definition peglib.h:2626
void initialize_definition_ids() const
Definition peglib.h:2806
std::shared_ptr< Ope > get_core_operator() const
Definition peglib.h:2752
Definition(const std::shared_ptr< Ope > &ope)
Definition peglib.h:2605
const char * s_
Definition peglib.h:2762
Definition peglib.h:1418
Dictionary(const std::vector< std::string > &v, bool ignore_case)
Definition peglib.h:1420
void accept(Visitor &v) override
Definition peglib.h:3599
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:3095
Trie trie_
Definition peglib.h:1430
Definition peglib.h:1702
Holder(Definition *outer)
Definition peglib.h:1704
const std::string & name() const
Definition peglib.h:3385
Definition * outer_
Definition peglib.h:1718
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:3172
const std::string & trace_name() const
Definition peglib.h:3387
void accept(Visitor &v) override
Definition peglib.h:3610
std::string trace_name_
Definition peglib.h:1720
friend class Definition
Definition peglib.h:1722
std::once_flag trace_name_init_
Definition peglib.h:1719
std::any reduce(SemanticValues &vs, std::any &dt, const std::any &predicate_data) const
Definition peglib.h:3374
std::shared_ptr< Ope > ope_
Definition peglib.h:1717
Definition peglib.h:1653
void accept(Visitor &v) override
Definition peglib.h:3607
Ignore(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1655
std::shared_ptr< Ope > ope_
Definition peglib.h:1666
size_t parse_core(const char *s, size_t n, SemanticValues &, Context &c, std::any &dt) const override
Definition peglib.h:1657
Definition peglib.h:1434
void accept(Visitor &v) override
Definition peglib.h:3600
bool ignore_case_
Definition peglib.h:1452
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:3140
std::string lower_lit_
Definition peglib.h:1453
std::string lit_
Definition peglib.h:1451
LiteralString(std::string &&s, bool ignore_case)
Definition peglib.h:1436
std::once_flag init_is_word_
Definition peglib.h:1454
bool is_word_
Definition peglib.h:1455
LiteralString(const std::string &s, bool ignore_case)
Definition peglib.h:1441
Definition peglib.h:1396
std::shared_ptr< Ope > ope_
Definition peglib.h:1415
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:1400
NotPredicate(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1398
void accept(Visitor &v) override
Definition peglib.h:3598
Definition peglib.h:1081
bool is_choice_like
Definition peglib.h:1093
bool is_token_boundary
Definition peglib.h:1092
virtual ~Ope()=default
size_t parse(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const
Definition peglib.h:3084
virtual size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const =0
virtual void accept(Visitor &v)=0
std::pair< size_t, size_t > r_
Definition peglib.h:4178
std::pair< size_t, size_t > line_info() const
Definition peglib.h:4175
SyntaxErrorException(const char *what_arg, std::pair< size_t, size_t > r)
Definition peglib.h:4172
ParserContext perform_core(const char *s, size_t n, const Rules &rules, Log log, std::string requested_start, bool enable_left_recursion=true)
Definition peglib.h:4822
bool apply_precedence_instruction(Definition &rule, const PrecedenceClimbing::BinOpeInfo &info, const char *s, Log log)
Definition peglib.h:4781
void make_grammar()
Definition peglib.h:4181
Grammar g
Definition peglib.h:5109
ParserGenerator()
Definition peglib.h:4140
static bool parse_test(const char *d, const char *s)
Definition peglib.h:4119
static ParserContext parse(const char *s, size_t n, const Rules &rules, Log log, std::string_view start, bool enable_left_recursion=true)
Definition peglib.h:4111
bool detect_infiniteLoop(const Data &data, Definition &rule, const Log &log, const char *s) const
Definition peglib.h:5092
static ParserGenerator & get_instance()
Definition peglib.h:4135
void setup_actions()
Definition peglib.h:4366
Definition peglib.h:1783
std::shared_ptr< Ope > atom_
Definition peglib.h:1799
std::map< std::string_view, std::pair< size_t, char > > BinOpeInfo
Definition peglib.h:1785
PrecedenceClimbing(const std::shared_ptr< Ope > &atom, const std::shared_ptr< Ope > &binop, const BinOpeInfo &info, const Definition &rule)
Definition peglib.h:1787
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:1792
const Definition & rule_
Definition peglib.h:1802
std::shared_ptr< Ope > binop_
Definition peglib.h:1800
Definition & get_reference_for_binop(Context &c) const
Definition peglib.h:3456
BinOpeInfo info_
Definition peglib.h:1801
void accept(Visitor &v) override
Definition peglib.h:3614
size_t parse_expression(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt, size_t min_prec) const
Definition peglib.h:3468
Definition peglib.h:680
Predicate()=default
bool operator()(const SemanticValues &vs, const std::any &dt, std::string &msg, std::any &predicate_data) const
Definition peglib.h:690
Predicate & operator=(const Predicate &rhs)=default
Fty make_adaptor(F fn)
Definition peglib.h:699
Fty fn_
Definition peglib.h:711
Predicate(Predicate &&rhs)=default
Predicate(F fn)
Definition peglib.h:684
void operator=(F fn)
Definition peglib.h:685
std::function< bool(const SemanticValues &vs, const std::any &dt, std::string &msg, std::any &predicate_data)> Fty
Definition peglib.h:696
Definition peglib.h:1211
PrioritizedChoice(bool for_label, const Args &...args)
Definition peglib.h:1214
size_t size() const
Definition peglib.h:1288
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:1228
void accept(Visitor &v) override
Definition peglib.h:3595
bool for_label_
Definition peglib.h:1291
std::vector< std::shared_ptr< Ope > > opes_
Definition peglib.h:1290
PrioritizedChoice(const std::vector< std::shared_ptr< Ope > > &opes)
Definition peglib.h:1219
std::vector< FirstSet > first_sets_
Definition peglib.h:1292
PrioritizedChoice(std::vector< std::shared_ptr< Ope > > &&opes)
Definition peglib.h:1223
Definition peglib.h:1811
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:3545
void accept(Visitor &v) override
Definition peglib.h:3615
Recovery(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1813
std::shared_ptr< Ope > ope_
Definition peglib.h:1820
Definition peglib.h:1727
const std::string name_
Definition peglib.h:1742
Definition * rule_
Definition peglib.h:1748
std::shared_ptr< Ope > get_core_operator() const
Definition peglib.h:3431
const char * s_
Definition peglib.h:1743
void accept(Visitor &v) override
Definition peglib.h:3611
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:3393
const bool is_macro_
Definition peglib.h:1745
const std::vector< std::shared_ptr< Ope > > args_
Definition peglib.h:1746
size_t iarg_
Definition peglib.h:1749
const Grammar & grammar_
Definition peglib.h:1741
Reference(const Grammar &grammar, const std::string &name, const char *s, bool is_macro, const std::vector< std::shared_ptr< Ope > > &args)
Definition peglib.h:1729
Definition peglib.h:1295
static std::shared_ptr< Repetition > zom(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1354
static std::shared_ptr< Repetition > opt(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1364
bool is_zom() const
Definition peglib.h:1350
const std::bitset< 256 > * span_bitset_
Definition peglib.h:1371
Repetition(const std::shared_ptr< Ope > &ope, size_t min, size_t max)
Definition peglib.h:1297
std::shared_ptr< Ope > ope_
Definition peglib.h:1368
size_t max_
Definition peglib.h:1370
void accept(Visitor &v) override
Definition peglib.h:3596
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:1300
size_t min_
Definition peglib.h:1369
static std::shared_ptr< Repetition > oom(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1359
Definition peglib.h:1113
Sequence(std::vector< std::shared_ptr< Ope > > &&opes)
Definition peglib.h:1119
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:1121
std::unique_ptr< KeywordGuardData > kw_guard_
Definition peglib.h:1146
std::vector< std::shared_ptr< Ope > > opes_
Definition peglib.h:1142
std::optional< size_t > parse_keyword_guarded(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const
Definition peglib.h:1149
void accept(Visitor &v) override
Definition peglib.h:3594
Sequence(const Args &...args)
Definition peglib.h:1116
Sequence(const std::vector< std::shared_ptr< Ope > > &opes)
Definition peglib.h:1118
friend struct SetupFirstSets
Definition peglib.h:1145
Definition peglib.h:1639
void accept(Visitor &v) override
Definition peglib.h:3606
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:3147
std::shared_ptr< Ope > ope_
Definition peglib.h:1650
TokenBoundary(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1641
Definition peglib.h:395
size_t max_len_
Definition peglib.h:465
std::map< std::string, Info, std::less<> > dic_
Definition peglib.h:461
size_t match(const char *text, size_t text_len, size_t &id) const
Definition peglib.h:419
friend struct ComputeFirstSet
Definition peglib.h:450
Trie(const std::vector< std::string > &items, bool ignore_case)
Definition peglib.h:397
size_t size() const
Definition peglib.h:447
bool ignore_case_
Definition peglib.h:463
size_t items_count_
Definition peglib.h:464
size_t items_count() const
Definition peglib.h:448
Definition peglib.h:1672
std::function< size_t(const char *s, size_t n, SemanticValues &vs, std::any &dt)> fn_
Definition peglib.h:1683
void accept(Visitor &v) override
Definition peglib.h:3608
User(Parser fn)
Definition peglib.h:1674
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &, std::any &dt) const override
Definition peglib.h:1675
Definition peglib.h:1686
WeakHolder(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1688
void accept(Visitor &v) override
Definition peglib.h:3609
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:1690
std::weak_ptr< Ope > weak_
Definition peglib.h:1699
Definition peglib.h:1752
std::shared_ptr< Ope > ope_
Definition peglib.h:1766
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:1756
void accept(Visitor &v) override
Definition peglib.h:3612
Whitespace(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1754
Definition peglib.h:5411
parser(const char *s, size_t n, std::string_view start={})
Definition peglib.h:5420
Log log_
Definition peglib.h:5641
bool enablePackratParsing_
Definition peglib.h:5640
parser(const char *s, size_t n, const Rules &rules, std::string_view start={})
Definition peglib.h:5415
bool parse_n(const char *s, size_t n, std::any &dt, T &val, const char *path=nullptr) const
Definition peglib.h:5495
std::string start_
Definition peglib.h:5638
const Grammar & get_grammar() const
Definition peglib.h:5554
std::shared_ptr< Grammar > grammar_
Definition peglib.h:5637
bool parse_n(const char *s, size_t n, std::any &dt, const char *path=nullptr) const
Definition peglib.h:5473
void set_logger(Log log)
Definition peglib.h:5614
bool enableLeftRecursion_
Definition peglib.h:5639
bool load_grammar(std::string_view sv, std::string_view start={})
Definition peglib.h:5460
void enable_packrat_parsing()
Definition peglib.h:5567
parser & enable_ast()
Definition peglib.h:5601
parser(std::string_view sv, const Rules &rules, std::string_view start={})
Definition peglib.h:5423
void disable_eoi_check()
Definition peglib.h:5556
bool load_grammar(const char *s, size_t n, std::string_view start={})
Definition peglib.h:5451
std::shared_ptr< T > optimize_ast(std::shared_ptr< T > ast, bool opt_mode=true) const
Definition peglib.h:5609
void set_verbose_trace(bool verbose_trace)
Definition peglib.h:5594
void enable_left_recursion(bool enable=true)
Definition peglib.h:5563
parser()=default
const Definition & operator[](const char *s) const
Definition peglib.h:5552
bool parse_n(const char *s, size_t n, T &val, const char *path=nullptr) const
Definition peglib.h:5484
bool parse(std::string_view sv, std::any &dt, const char *path=nullptr) const
Definition peglib.h:5509
parser(std::string_view sv, std::string_view start={})
Definition peglib.h:5426
void set_logger(std::function< void(size_t line, size_t col, const std::string &msg)> log)
Definition peglib.h:5616
bool load_grammar(const char *s, size_t n, const Rules &rules, std::string_view start={})
Definition peglib.h:5441
void enable_trace(TracerEnter tracer_enter, TracerLeave tracer_leave)
Definition peglib.h:5574
bool parse(std::string_view sv, std::any &dt, T &val, const char *path=nullptr) const
Definition peglib.h:5520
bool post_process(const char *s, size_t n, Definition::Result &r) const
Definition peglib.h:5624
std::vector< std::string > get_no_ast_opt_rules() const
Definition peglib.h:5629
bool load_grammar(std::string_view sv, const Rules &rules, std::string_view start={})
Definition peglib.h:5455
bool parse(std::string_view sv, T &val, const char *path=nullptr) const
Definition peglib.h:5515
Definition & operator[](const char *s)
Definition peglib.h:5550
bool parse_n(const char *s, size_t n, const char *path=nullptr) const
Definition peglib.h:5464
bool parse(std::string_view sv, const char *path=nullptr) const
Definition peglib.h:5505
void enable_trace(TracerEnter tracer_enter, TracerLeave tracer_leave, TracerStartOrEnd tracer_start, TracerStartOrEnd tracer_end)
Definition peglib.h:5582
Definition peglib.h:507
Definition filter_string.h:27
std::string escape_characters(const char *s, size_t n)
Definition peglib.h:222
size_t parse_literal(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt, const std::string &lit, std::once_flag &init_is_word, bool &is_word, bool ignore_case, const std::string &lower_lit)
Definition peglib.h:2891
static const char * WORD_DEFINITION_NAME
Definition peglib.h:2584
const char * u8(const T *s)
Definition peglib.h:214
std::shared_ptr< Ope > ref(const Grammar &grammar, const std::string &name, const char *s, bool is_macro, const std::vector< std::shared_ptr< Ope > > &args)
Definition peglib.h:1939
size_t encode_codepoint(char32_t cp, char *buff)
Definition peglib.h:115
std::shared_ptr< Ope > cut()
Definition peglib.h:1964
std::function< size_t(const char *s, size_t n, SemanticValues &vs, std::any &dt)> Parser
Definition peglib.h:1669
std::shared_ptr< Ope > tok(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1924
std::shared_ptr< Ope > csc(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1915
bool decode_codepoint(const char *s8, size_t l, size_t &bytes, char32_t &cp)
Definition peglib.h:152
std::function< void(size_t line, size_t col, const std::string &msg, const std::string &rule)> Log
Definition peglib.h:724
size_t codepoint_count(const char *s8, size_t l)
Definition peglib.h:101
std::shared_ptr< Ope > apd(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1868
std::pair< int, size_t > parse_octal_number(const char *s, size_t n, size_t i)
Definition peglib.h:279
std::u32string decode(const char *s8, size_t l)
Definition peglib.h:201
std::pair< size_t, size_t > line_info(const char *start, const char *cur)
Definition peglib.h:475
std::shared_ptr< Ope > rec(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1960
std::function< void( const Ope &ope, const char *s, size_t n, const SemanticValues &vs, const Context &c, const std::any &dt, size_t, std::any &trace_data)> TracerLeave
Definition peglib.h:805
std::shared_ptr< Ope > cls(const std::string &s)
Definition peglib.h:1889
std::shared_ptr< Ope > wsp(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1945
bool fail(size_t len)
Definition peglib.h:719
T token_to_number_(std::string_view sv)
Definition peglib.h:368
std::shared_ptr< Ope > pre(const std::shared_ptr< Ope > &atom, const std::shared_ptr< Ope > &binop, const PrecedenceClimbing::BinOpeInfo &info, const Definition &rule)
Definition peglib.h:1953
bool is_digit(char c, int &v)
Definition peglib.h:260
std::shared_ptr< Ope > dic(const std::vector< std::string > &v, bool ignore_case)
Definition peglib.h:1876
std::shared_ptr< Ope > liti(std::string &&s)
Definition peglib.h:1885
std::shared_ptr< Ope > ign(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1928
std::unordered_map< std::string, Definition > Grammar
Definition peglib.h:1725
size_t codepoint_length(const char *s8, size_t l)
Definition peglib.h:85
static const char * WHITESPACE_DEFINITION_NAME
Definition peglib.h:2583
std::string resolve_escape_sequence(const char *s, size_t n)
Definition peglib.h:290
std::shared_ptr< Ope > lit(std::string &&s)
Definition peglib.h:1881
std::shared_ptr< Ope > cho4label_(Args &&...args)
Definition peglib.h:1846
std::shared_ptr< Ope > dot()
Definition peglib.h:1913
std::unordered_map< std::string, std::shared_ptr< Ope > > Rules
Definition peglib.h:4101
void enable_profiling(parser &parser, std::ostream &os)
Definition peglib.h:5708
std::function< void( const Ope &name, const char *s, size_t n, const SemanticValues &vs, const Context &c, const std::any &dt, std::any &trace_data)> TracerEnter
Definition peglib.h:801
std::shared_ptr< Ope > chr(char32_t dt)
Definition peglib.h:1909
bool is_hex(char c, int &v)
Definition peglib.h:246
std::string ast_to_s(const std::shared_ptr< T > &ptr, std::function< std::string(const T &ast, int level)> fn=nullptr)
Definition peglib.h:5205
std::shared_ptr< Ope > opt(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1859
std::function< void(std::any &trace_data)> TracerStartOrEnd
Definition peglib.h:809
std::shared_ptr< Ope > oom(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1855
std::shared_ptr< Ope > cho(Args &&...args)
Definition peglib.h:1841
std::shared_ptr< Ope > rep(const std::shared_ptr< Ope > &ope, size_t min, size_t max)
Definition peglib.h:1863
constexpr unsigned int str2tag_core(const char *s, size_t l, unsigned int h)
Definition peglib.h:496
std::shared_ptr< Ope > npd(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1872
void ast_to_s_core(const std::shared_ptr< T > &ptr, std::string &s, int level, std::function< std::string(const T &ast, int level)> fn)
Definition peglib.h:5179
constexpr unsigned int str2tag(std::string_view sv)
Definition peglib.h:503
std::shared_ptr< Ope > seq(Args &&...args)
Definition peglib.h:1837
bool success(size_t len)
Definition peglib.h:717
std::shared_ptr< Ope > ncls(const std::string &s)
Definition peglib.h:1899
std::pair< int, size_t > parse_hex_number(const char *s, size_t n, size_t i)
Definition peglib.h:268
std::shared_ptr< Ope > cap(const std::shared_ptr< Ope > &ope, Capture::MatchAction ma)
Definition peglib.h:1919
void add_ast_action(Definition &rule)
Definition peglib.h:5252
AstBase< EmptyType > Ast
Definition filter_string.h:30
static const char * RECOVER_DEFINITION_NAME
Definition peglib.h:2585
std::any call(F fn, Args &&...args)
Definition peglib.h:617
std::shared_ptr< Ope > zom(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1851
std::shared_ptr< Ope > bkr(std::string &&name)
Definition peglib.h:1949
std::shared_ptr< Ope > usr(std::function< size_t(const char *s, size_t n, SemanticValues &vs, std::any &dt)> fn)
Definition peglib.h:1933
std::string to_lower(std::string s)
Definition peglib.h:384
void enable_tracing(parser &parser, std::ostream &os)
Definition peglib.h:5648
#define CPPPEGLIB_HEURISTIC_ERROR_TOKEN_MAX_CHAR_COUNT
Definition peglib.h:15
Definition clipboard_testing.h:11
Definition peglib.h:2059
void visit(Holder &ope) override
Definition peglib.h:3618
std::unordered_map< void *, size_t > ids
Definition peglib.h:2066
Definition peglib.h:5116
const size_t column
Definition peglib.h:5149
AstBase(const AstBase &ast, const char *original_name, size_t position=0, size_t length=0, size_t original_choice_count=0, size_t original_choice=0)
Definition peglib.h:5136
std::weak_ptr< AstBase< EmptyType > > parent
Definition peglib.h:5166
AstBase(const char *path, size_t line, size_t column, const char *name, const std::string_view &token, size_t position=0, size_t length=0, size_t choice_count=0, size_t choice=0)
Definition peglib.h:5127
const std::string name
Definition peglib.h:5151
T token_to_number() const
Definition peglib.h:5173
const bool is_token
Definition peglib.h:5162
const size_t line
Definition peglib.h:5148
size_t length
Definition peglib.h:5153
const unsigned int original_tag
Definition peglib.h:5160
const size_t choice
Definition peglib.h:5155
size_t position
Definition peglib.h:5152
const size_t original_choice_count
Definition peglib.h:5157
const std::string_view token
Definition peglib.h:5163
std::vector< std::shared_ptr< AstBase< EmptyType > > > nodes
Definition peglib.h:5165
const size_t choice_count
Definition peglib.h:5154
const size_t original_choice
Definition peglib.h:5158
const std::string path
Definition peglib.h:5147
std::string token_to_string() const
Definition peglib.h:5168
AstBase(const char *path, size_t line, size_t column, const char *name, const std::vector< std::shared_ptr< AstBase > > &nodes, size_t position=0, size_t length=0, size_t choice_count=0, size_t choice=0)
Definition peglib.h:5117
const unsigned int tag
Definition peglib.h:5159
const std::string original_name
Definition peglib.h:5156
Definition peglib.h:5212
const bool mode_
Definition peglib.h:5245
const std::vector< std::string > rules_
Definition peglib.h:5246
std::shared_ptr< T > optimize(std::shared_ptr< T > original, std::shared_ptr< T > parent=nullptr)
Definition peglib.h:5217
AstOptimizer(bool mode, const std::vector< std::string > &rules={})
Definition peglib.h:5213
Definition peglib.h:2191
void visit(Repetition &ope) override
Definition peglib.h:2210
void visit(BackReference &) override
Definition peglib.h:2220
void visit(Cut &) override
Definition peglib.h:2221
void visit(LiteralString &ope) override
Definition peglib.h:2214
void visit(NotPredicate &) override
Definition peglib.h:2212
bool result
Definition peglib.h:2194
void visit(Dictionary &) override
Definition peglib.h:2213
void visit(User &) override
Definition peglib.h:2218
void visit(Character &) override
Definition peglib.h:2216
void visit(AndPredicate &) override
Definition peglib.h:2211
void visit(Sequence &ope) override
Definition peglib.h:2196
void visit(PrioritizedChoice &ope) override
Definition peglib.h:2203
void visit(AnyCharacter &) override
Definition peglib.h:2217
void visit(CharacterClass &) override
Definition peglib.h:2215
Definition peglib.h:2433
void visit(User &) override
Definition peglib.h:2526
ComputeFirstSet(FirstSetCache &cache)
Definition peglib.h:2539
void visit(BackReference &) override
Definition peglib.h:2528
void visit(AndPredicate &) override
Definition peglib.h:2468
void visit(NotPredicate &) override
Definition peglib.h:2469
FirstSetCache & cache_
Definition peglib.h:2544
void visit(Cut &) override
Definition peglib.h:2529
void visit(LiteralString &ope) override
Definition peglib.h:2482
void visit(PrioritizedChoice &ope) override
Definition peglib.h:2455
void visit(Repetition &ope) override
Definition peglib.h:2464
void visit(Dictionary &ope) override
Definition peglib.h:2470
std::unordered_map< const Definition *, FirstSet > FirstSetCache
Definition peglib.h:2537
void visit(AnyCharacter &) override
Definition peglib.h:2525
void visit(CharacterClass &ope) override
Definition peglib.h:2495
std::unordered_set< const Definition * > refs_
Definition peglib.h:2545
FirstSet result_
Definition peglib.h:2541
void visit(Character &ope) override
Definition peglib.h:2518
void visit(Sequence &ope) override
Definition peglib.h:2436
size_t cycle_count_
Definition peglib.h:2546
Definition peglib.h:846
std::any val
Definition peglib.h:848
size_t len
Definition peglib.h:847
Definition peglib.h:915
size_t misses
Definition peglib.h:917
size_t hits
Definition peglib.h:916
Definition peglib.h:1010
std::string_view sv_sv
Definition peglib.h:1014
size_t sv_tags_size
Definition peglib.h:1012
size_t sv_tokens_size
Definition peglib.h:1013
size_t capture_size
Definition peglib.h:1017
size_t choice
Definition peglib.h:1016
size_t choice_count
Definition peglib.h:1015
size_t sv_size
Definition peglib.h:1011
Definition peglib.h:2592
bool ret
Definition peglib.h:2593
size_t len
Definition peglib.h:2595
ErrorInfo error_info
Definition peglib.h:2596
bool recovered
Definition peglib.h:2594
Definition peglib.h:2265
void visit(Repetition &ope) override
Definition peglib.h:2291
void visit(PrioritizedChoice &ope) override
Definition peglib.h:2285
DetectInfiniteLoop(std::vector< std::pair< const char *, std::string > > &refs, std::unordered_map< std::string, bool > &has_error_cache)
Definition peglib.h:2275
std::unordered_map< std::string, bool > & has_error_cache_
Definition peglib.h:2312
void visit(Sequence &ope) override
Definition peglib.h:2279
bool has_error
Definition peglib.h:2306
std::vector< std::pair< const char *, std::string > > & refs_
Definition peglib.h:2311
const char * error_s
Definition peglib.h:2307
DetectInfiniteLoop(const char *s, const std::string &name, std::vector< std::pair< const char *, std::string > > &refs, std::unordered_map< std::string, bool > &has_error_cache)
Definition peglib.h:2268
std::string error_name
Definition peglib.h:2308
Definition peglib.h:2133
bool done_
Definition peglib.h:2187
void visit(AnyCharacter &) override
Definition peglib.h:2174
void visit(Sequence &ope) override
Definition peglib.h:2138
void visit(AndPredicate &ope) override
Definition peglib.h:2162
void visit(PrioritizedChoice &ope) override
Definition peglib.h:2149
void visit(BackReference &) override
Definition peglib.h:2177
const char * error_s
Definition peglib.h:2180
void visit(Repetition &ope) override
Definition peglib.h:2158
std::unordered_set< std::string > refs_
Definition peglib.h:2186
void visit(Cut &) override
Definition peglib.h:2178
void visit(Character &) override
Definition peglib.h:2173
std::string name_
Definition peglib.h:2185
void visit(LiteralString &ope) override
Definition peglib.h:2171
void visit(CharacterClass &) override
Definition peglib.h:2172
DetectLeftRecursion(const std::string &name)
Definition peglib.h:2136
void visit(Dictionary &) override
Definition peglib.h:2170
void visit(NotPredicate &ope) override
Definition peglib.h:2166
std::vector< const std::vector< std::shared_ptr< Ope > > * > macro_args_stack_
Definition peglib.h:2188
void visit(User &) override
Definition peglib.h:2175
std::shared_ptr< Ope > resolve_macro_arg(size_t iarg) const
Definition peglib.h:3701
Definition peglib.h:5249
Definition peglib.h:732
std::vector< std::pair< const char *, const Definition * > > expected_tokens
Definition peglib.h:734
std::string replace_all(std::string str, const std::string &from, const std::string &to) const
Definition peglib.h:785
void clear()
Definition peglib.h:741
const char * message_pos
Definition peglib.h:735
int cast_char(char c) const
Definition peglib.h:758
std::string heuristic_error_token(const char *s, size_t n, const char *pos) const
Definition peglib.h:760
const char * last_output_pos
Definition peglib.h:738
void output_log(const Log &log, const char *s, size_t n)
Definition peglib.h:2953
bool keep_previous_token
Definition peglib.h:739
void add(const char *error_literal, const Definition *error_rule)
Definition peglib.h:748
std::string message
Definition peglib.h:736
std::string label
Definition peglib.h:737
const char * error_pos
Definition peglib.h:733
Definition peglib.h:2114
void visit(LiteralString &ope) override
Definition peglib.h:2117
static const char * token(Ope &ope)
Definition peglib.h:2123
void visit(TokenBoundary &ope) override
Definition peglib.h:2118
const char * token_
Definition peglib.h:2130
void visit(Ignore &ope) override
Definition peglib.h:2119
void visit(Recovery &ope) override
Definition peglib.h:2121
Definition peglib.h:2346
const std::vector< std::string > & params_
Definition peglib.h:2427
void visit(Repetition &ope) override
Definition peglib.h:2369
void visit(WeakHolder &ope) override
Definition peglib.h:2406
void visit(Character &ope) override
Definition peglib.h:2388
void visit(CharacterClass &ope) override
Definition peglib.h:2385
void visit(PrioritizedChoice &ope) override
Definition peglib.h:2361
void visit(Ignore &ope) override
Definition peglib.h:2402
void visit(Cut &ope) override
Definition peglib.h:2421
void visit(AnyCharacter &ope) override
Definition peglib.h:2389
void visit(LiteralString &ope) override
Definition peglib.h:2382
void visit(Recovery &ope) override
Definition peglib.h:2417
void visit(Holder &ope) override
Definition peglib.h:2407
const std::vector< std::shared_ptr< Ope > > & args_
Definition peglib.h:2426
void visit(Dictionary &ope) override
Definition peglib.h:2381
void visit(Whitespace &ope) override
Definition peglib.h:2409
void visit(TokenBoundary &ope) override
Definition peglib.h:2398
std::shared_ptr< Ope > found_ope
Definition peglib.h:2423
void visit(NotPredicate &ope) override
Definition peglib.h:2377
FindReference(const std::vector< std::shared_ptr< Ope > > &args, const std::vector< std::string > &params)
Definition peglib.h:2349
void visit(Sequence &ope) override
Definition peglib.h:2353
void visit(AndPredicate &ope) override
Definition peglib.h:2373
void visit(Capture &ope) override
Definition peglib.h:2394
void visit(CaptureScope &ope) override
Definition peglib.h:2390
void visit(PrecedenceClimbing &ope) override
Definition peglib.h:2413
Definition peglib.h:1193
const char * first_literal
Definition peglib.h:1199
void merge(const FirstSet &other)
Definition peglib.h:1203
bool any_char
Definition peglib.h:1198
std::bitset< 256 > chars
Definition peglib.h:1196
bool can_be_empty
Definition peglib.h:1197
const Definition * first_rule
Definition peglib.h:1200
Definition peglib.h:2224
std::string error_name
Definition peglib.h:2254
void visit(Sequence &ope) override
Definition peglib.h:3716
bool is_empty
Definition peglib.h:2252
void visit(Repetition &ope) override
Definition peglib.h:2238
const char * error_s
Definition peglib.h:2253
void visit(NotPredicate &) override
Definition peglib.h:2246
std::vector< std::pair< const char *, std::string > > & refs_
Definition peglib.h:2261
void visit(LiteralString &ope) override
Definition peglib.h:2247
void visit(AndPredicate &) override
Definition peglib.h:2245
std::unordered_map< std::string, bool > & has_error_cache_
Definition peglib.h:2262
void set_error()
Definition peglib.h:2257
void visit(PrioritizedChoice &ope) override
Definition peglib.h:2232
HasEmptyElement(std::vector< std::pair< const char *, std::string > > &refs, std::unordered_map< std::string, bool > &has_error_cache)
Definition peglib.h:2227
Definition peglib.h:2069
void visit(Dictionary &) override
Definition peglib.h:2079
void visit(LiteralString &) override
Definition peglib.h:2080
bool result_
Definition peglib.h:2089
void visit(PrioritizedChoice &ope) override
Definition peglib.h:2072
static bool check(Ope &ope)
Definition peglib.h:2082
Definition peglib.h:1098
std::bitset< 256 > identifier_rest
Definition peglib.h:1100
std::bitset< 256 > identifier_first
Definition peglib.h:1099
size_t max_keyword_len
Definition peglib.h:1104
size_t min_keyword_len
Definition peglib.h:1103
std::vector< std::string > exact_keywords
Definition peglib.h:1101
std::vector< std::string > prefix_keywords
Definition peglib.h:1102
static bool matches_any(const std::vector< std::string > &keywords, std::string_view input)
Definition peglib.h:1106
Definition peglib.h:2333
const std::vector< std::string > & params_
Definition peglib.h:2343
Grammar & grammar_
Definition peglib.h:2342
void visit(Reference &ope) override
Definition peglib.h:4063
LinkReferences(Grammar &grammar, const std::vector< std::string > &params)
Definition peglib.h:2336
Definition peglib.h:1969
virtual void visit(WeakHolder &)
Definition peglib.h:1986
virtual void visit(TokenBoundary &)
Definition peglib.h:1983
virtual void visit(Repetition &)
Definition peglib.h:1973
virtual void visit(Dictionary &)
Definition peglib.h:1976
virtual void visit(Character &)
Definition peglib.h:1979
virtual ~Visitor()
Definition peglib.h:1970
virtual void visit(AndPredicate &)
Definition peglib.h:1974
virtual void visit(LiteralString &)
Definition peglib.h:1977
virtual void visit(Reference &)
Definition peglib.h:1988
virtual void visit(CharacterClass &)
Definition peglib.h:1978
virtual void visit(PrioritizedChoice &)
Definition peglib.h:1972
virtual void visit(Ignore &)
Definition peglib.h:1984
virtual void visit(PrecedenceClimbing &)
Definition peglib.h:1991
virtual void visit(CaptureScope &)
Definition peglib.h:1981
virtual void visit(Sequence &)
Definition peglib.h:1971
virtual void visit(Holder &)
Definition peglib.h:1987
virtual void visit(Capture &)
Definition peglib.h:1982
virtual void visit(NotPredicate &)
Definition peglib.h:1975
virtual void visit(BackReference &)
Definition peglib.h:1990
virtual void visit(Cut &)
Definition peglib.h:1993
virtual void visit(AnyCharacter &)
Definition peglib.h:1980
virtual void visit(Whitespace &)
Definition peglib.h:1989
virtual void visit(Recovery &)
Definition peglib.h:1992
virtual void visit(User &)
Definition peglib.h:1985
Definition peglib.h:4151
Data()
Definition peglib.h:4167
std::vector< std::pair< std::string, const char * > > duplicates_of_definition
Definition peglib.h:4156
bool enablePackratParsing
Definition peglib.h:4165
std::map< std::string, std::vector< Instruction > > instructions
Definition peglib.h:4159
std::string start
Definition peglib.h:4153
std::vector< std::pair< std::string, const char * > > duplicates_of_instruction
Definition peglib.h:4158
const char * start_pos
Definition peglib.h:4154
std::set< std::string_view > captures_in_current_definition
Definition peglib.h:4164
std::vector< std::pair< std::string, const char * > > undefined_back_references
Definition peglib.h:4161
std::shared_ptr< Grammar > grammar
Definition peglib.h:4152
std::vector< std::set< std::string_view > > captures_stack
Definition peglib.h:4162
Definition peglib.h:4145
std::any data
Definition peglib.h:4147
std::string type
Definition peglib.h:4146
std::string_view sv
Definition peglib.h:4148
Definition peglib.h:4105
std::shared_ptr< Grammar > grammar
Definition peglib.h:4106
bool enablePackratParsing
Definition peglib.h:4108
std::string start
Definition peglib.h:4107
Definition peglib.h:2315
std::unordered_set< std::string > referenced
Definition peglib.h:2326
std::unordered_map< std::string, const char * > error_s
Definition peglib.h:2324
const std::vector< std::string > & params_
Definition peglib.h:2330
std::unordered_map< std::string, std::string > error_message
Definition peglib.h:2325
ReferenceChecker(const Grammar &grammar, const std::vector< std::string > &params)
Definition peglib.h:2318
const Grammar & grammar_
Definition peglib.h:2329
void visit(Reference &ope) override
Definition peglib.h:3793
Definition peglib.h:520
std::pair< size_t, size_t > line_info() const
Definition peglib.h:2948
std::string token_to_string(size_t id=0) const
Definition peglib.h:555
std::vector< std::string_view > tokens
Definition peglib.h:546
size_t choice_
Definition peglib.h:610
Context * c_
Definition peglib.h:607
std::string name_
Definition peglib.h:611
friend class Holder
Definition peglib.h:604
friend class Sequence
Definition peglib.h:601
std::string_view token(size_t id=0) const
Definition peglib.h:548
SemanticValues()=default
std::string_view sv() const
Definition peglib.h:529
std::string_view sv_
Definition peglib.h:608
size_t choice() const
Definition peglib.h:543
T token_to_number() const
Definition peglib.h:559
const char * ss
Definition peglib.h:526
size_t choice_count_
Definition peglib.h:609
friend class Dictionary
Definition peglib.h:600
size_t choice_count() const
Definition peglib.h:540
std::vector< T > transform(size_t beg=0, size_t end=static_cast< size_t >(-1)) const
Definition peglib.h:565
const char * path
Definition peglib.h:525
std::vector< unsigned int > tags
Definition peglib.h:534
const std::string & name() const
Definition peglib.h:532
friend class Repetition
Definition peglib.h:603
friend class PrecedenceClimbing
Definition peglib.h:605
SemanticValues(Context *c)
Definition peglib.h:522
friend class Context
Definition peglib.h:599
friend class PrioritizedChoice
Definition peglib.h:602
Definition peglib.h:2549
void setup_keyword_guarded_identifier(Sequence &ope)
Definition peglib.h:3878
void visit(Repetition &ope) override
Definition peglib.h:2567
ComputeFirstSet::FirstSetCache first_set_cache_
Definition peglib.h:2576
void visit(Sequence &ope) override
Definition peglib.h:3870
void visit(PrioritizedChoice &ope) override
Definition peglib.h:2555
std::unordered_set< const Definition * > visited_rules_
Definition peglib.h:2577
Definition peglib.h:2092
bool has_rule_
Definition peglib.h:2111
bool has_token_boundary_
Definition peglib.h:2110
void visit(TokenBoundary &) override
Definition peglib.h:2095
void visit(WeakHolder &) override
Definition peglib.h:2098
void visit(NotPredicate &) override
Definition peglib.h:2097
void visit(AndPredicate &) override
Definition peglib.h:2096
static bool is_token(Ope &ope)
Definition peglib.h:2101
Definition peglib.h:2022
void visit(Recovery &) override
Definition peglib.h:2046
void visit(Cut &) override
Definition peglib.h:2047
void visit(Holder &ope) override
Definition peglib.h:2041
void visit(User &) override
Definition peglib.h:2039
void visit(NotPredicate &) override
Definition peglib.h:2029
void visit(TokenBoundary &) override
Definition peglib.h:2037
void visit(LiteralString &) override
Definition peglib.h:2031
void visit(AnyCharacter &) override
Definition peglib.h:2034
void visit(Whitespace &) override
Definition peglib.h:2043
void visit(WeakHolder &) override
Definition peglib.h:2040
void visit(Repetition &) override
Definition peglib.h:2027
void visit(Character &) override
Definition peglib.h:2033
void visit(CharacterClass &) override
Definition peglib.h:2032
void visit(Reference &) override
Definition peglib.h:2042
static std::string get(Ope &ope)
Definition peglib.h:2049
const char * name_
Definition peglib.h:2056
void visit(Capture &) override
Definition peglib.h:2036
void visit(CaptureScope &) override
Definition peglib.h:2035
void visit(PrecedenceClimbing &) override
Definition peglib.h:2045
void visit(Sequence &) override
Definition peglib.h:2025
void visit(PrioritizedChoice &) override
Definition peglib.h:2026
void visit(Ignore &) override
Definition peglib.h:2038
void visit(AndPredicate &) override
Definition peglib.h:2028
void visit(BackReference &) override
Definition peglib.h:2044
void visit(Dictionary &) override
Definition peglib.h:2030
Definition peglib.h:1996
void visit(TokenBoundary &ope) override
Definition peglib.h:2013
void visit(Recovery &ope) override
Definition peglib.h:2018
void visit(PrioritizedChoice &ope) override
Definition peglib.h:2003
void visit(Capture &ope) override
Definition peglib.h:2012
void visit(Whitespace &ope) override
Definition peglib.h:2017
void visit(PrecedenceClimbing &ope) override
Definition peglib.h:2019
void visit(Repetition &ope) override
Definition peglib.h:2008
void visit(CaptureScope &ope) override
Definition peglib.h:2011
void visit(AndPredicate &ope) override
Definition peglib.h:2009
void visit(Sequence &ope) override
Definition peglib.h:1998
void visit(WeakHolder &ope) override
Definition peglib.h:2015
void visit(NotPredicate &ope) override
Definition peglib.h:2010
void visit(Ignore &ope) override
Definition peglib.h:2014
void visit(Holder &ope) override
Definition peglib.h:2016
Definition peglib.h:453
bool match
Definition peglib.h:455
size_t id
Definition peglib.h:456
bool done
Definition peglib.h:454
Definition peglib.h:631
Definition peglib.h:56
bool execute_on_destruction
Definition peglib.h:78
scope_exit(scope_exit &&rhs)
Definition peglib.h:60
EF exit_function
Definition peglib.h:77
~scope_exit()
Definition peglib.h:66
scope_exit(EF &&f)
Definition peglib.h:57
scope_exit(const scope_exit &)=delete
void operator=(const scope_exit &)=delete
scope_exit & operator=(scope_exit &&)=delete
void release()
Definition peglib.h:70