Cockatrice 2026-04-21-Development-2.11.0-beta.61
A virtual tabletop for multiplayer card games
Loading...
Searching...
No Matches
peglib.h
Go to the documentation of this file.
1//
2// peglib.h
3//
4// Copyright (c) 2022 Yuji Hirose. All rights reserved.
5// MIT License
6//
7
8#pragma once
9
10/*
11 * Configuration
12 */
13
14#ifndef CPPPEGLIB_HEURISTIC_ERROR_TOKEN_MAX_CHAR_COUNT
15#define CPPPEGLIB_HEURISTIC_ERROR_TOKEN_MAX_CHAR_COUNT 32
16#endif
17
18#include <algorithm>
19#include <any>
20#include <bitset>
21#include <cassert>
22#include <cctype>
23#if __has_include(<charconv>)
24#include <charconv>
25#endif
26#include <cstring>
27#include <functional>
28#include <initializer_list>
29#include <iostream>
30#include <limits>
31#include <map>
32#include <memory>
33#include <mutex>
34#include <optional>
35#include <set>
36#include <sstream>
37#include <string>
38#include <unordered_map>
39#include <unordered_set>
40#include <vector>
41
42#if !defined(__cplusplus) || __cplusplus < 201703L
43#error "Requires complete C++17 support"
44#endif
45
46namespace peg {
47
48/*-----------------------------------------------------------------------------
49 * scope_exit
50 *---------------------------------------------------------------------------*/
51
52// This is based on
53// "http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2014/n4189".
54
55template <typename EF> struct scope_exit {
56 explicit scope_exit(EF &&f)
57 : exit_function(std::move(f)), execute_on_destruction{true} {}
58
60 : exit_function(std::move(rhs.exit_function)),
62 rhs.release();
63 }
64
67 }
68
69 void release() { this->execute_on_destruction = false; }
70
71private:
72 scope_exit(const scope_exit &) = delete;
73 void operator=(const scope_exit &) = delete;
75
78};
79
80/*-----------------------------------------------------------------------------
81 * UTF8 functions
82 *---------------------------------------------------------------------------*/
83
84inline size_t codepoint_length(const char *s8, size_t l) {
85 if (l) {
86 auto b = static_cast<uint8_t>(s8[0]);
87 if ((b & 0x80) == 0) {
88 return 1;
89 } else if ((b & 0xE0) == 0xC0 && l >= 2) {
90 return 2;
91 } else if ((b & 0xF0) == 0xE0 && l >= 3) {
92 return 3;
93 } else if ((b & 0xF8) == 0xF0 && l >= 4) {
94 return 4;
95 }
96 }
97 return 0;
98}
99
100inline size_t codepoint_count(const char *s8, size_t l) {
101 size_t count = 0;
102 for (size_t i = 0; i < l;) {
103 auto len = codepoint_length(s8 + i, l - i);
104 if (len == 0) {
105 // Invalid UTF-8 byte, treat as single byte to avoid infinite loop
106 len = 1;
107 }
108 i += len;
109 count++;
110 }
111 return count;
112}
113
114inline size_t encode_codepoint(char32_t cp, char *buff) {
115 if (cp < 0x0080) {
116 buff[0] = static_cast<char>(cp & 0x7F);
117 return 1;
118 } else if (cp < 0x0800) {
119 buff[0] = static_cast<char>(0xC0 | ((cp >> 6) & 0x1F));
120 buff[1] = static_cast<char>(0x80 | (cp & 0x3F));
121 return 2;
122 } else if (cp < 0xD800) {
123 buff[0] = static_cast<char>(0xE0 | ((cp >> 12) & 0xF));
124 buff[1] = static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
125 buff[2] = static_cast<char>(0x80 | (cp & 0x3F));
126 return 3;
127 } else if (cp < 0xE000) {
128 // D800 - DFFF is invalid...
129 return 0;
130 } else if (cp < 0x10000) {
131 buff[0] = static_cast<char>(0xE0 | ((cp >> 12) & 0xF));
132 buff[1] = static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
133 buff[2] = static_cast<char>(0x80 | (cp & 0x3F));
134 return 3;
135 } else if (cp < 0x110000) {
136 buff[0] = static_cast<char>(0xF0 | ((cp >> 18) & 0x7));
137 buff[1] = static_cast<char>(0x80 | ((cp >> 12) & 0x3F));
138 buff[2] = static_cast<char>(0x80 | ((cp >> 6) & 0x3F));
139 buff[3] = static_cast<char>(0x80 | (cp & 0x3F));
140 return 4;
141 }
142 return 0;
143}
144
145inline std::string encode_codepoint(char32_t cp) {
146 char buff[4];
147 auto l = encode_codepoint(cp, buff);
148 return std::string(buff, l);
149}
150
151inline bool decode_codepoint(const char *s8, size_t l, size_t &bytes,
152 char32_t &cp) {
153 if (l) {
154 auto b = static_cast<uint8_t>(s8[0]);
155 if ((b & 0x80) == 0) {
156 bytes = 1;
157 cp = b;
158 return true;
159 } else if ((b & 0xE0) == 0xC0) {
160 if (l >= 2) {
161 bytes = 2;
162 cp = ((static_cast<char32_t>(s8[0] & 0x1F)) << 6) |
163 (static_cast<char32_t>(s8[1] & 0x3F));
164 return true;
165 }
166 } else if ((b & 0xF0) == 0xE0) {
167 if (l >= 3) {
168 bytes = 3;
169 cp = ((static_cast<char32_t>(s8[0] & 0x0F)) << 12) |
170 ((static_cast<char32_t>(s8[1] & 0x3F)) << 6) |
171 (static_cast<char32_t>(s8[2] & 0x3F));
172 return true;
173 }
174 } else if ((b & 0xF8) == 0xF0) {
175 if (l >= 4) {
176 bytes = 4;
177 cp = ((static_cast<char32_t>(s8[0] & 0x07)) << 18) |
178 ((static_cast<char32_t>(s8[1] & 0x3F)) << 12) |
179 ((static_cast<char32_t>(s8[2] & 0x3F)) << 6) |
180 (static_cast<char32_t>(s8[3] & 0x3F));
181 return true;
182 }
183 }
184 }
185 return false;
186}
187
188inline size_t decode_codepoint(const char *s8, size_t l, char32_t &cp) {
189 size_t bytes;
190 if (decode_codepoint(s8, l, bytes, cp)) { return bytes; }
191 return 0;
192}
193
194inline char32_t decode_codepoint(const char *s8, size_t l) {
195 char32_t cp = 0;
196 decode_codepoint(s8, l, cp);
197 return cp;
198}
199
200inline std::u32string decode(const char *s8, size_t l) {
201 std::u32string out;
202 size_t i = 0;
203 while (i < l) {
204 auto beg = i++;
205 while (i < l && (s8[i] & 0xc0) == 0x80) {
206 i++;
207 }
208 out += decode_codepoint(&s8[beg], (i - beg));
209 }
210 return out;
211}
212
213template <typename T> const char *u8(const T *s) {
214 return reinterpret_cast<const char *>(s);
215}
216
217/*-----------------------------------------------------------------------------
218 * escape_characters
219 *---------------------------------------------------------------------------*/
220
221inline std::string escape_characters(const char *s, size_t n) {
222 std::string str;
223 for (size_t i = 0; i < n; i++) {
224 auto c = s[i];
225 switch (c) {
226 case '\f': str += "\\f"; break;
227 case '\n': str += "\\n"; break;
228 case '\r': str += "\\r"; break;
229 case '\t': str += "\\t"; break;
230 case '\v': str += "\\v"; break;
231 default: str += c; break;
232 }
233 }
234 return str;
235}
236
237inline std::string escape_characters(std::string_view sv) {
238 return escape_characters(sv.data(), sv.size());
239}
240
241/*-----------------------------------------------------------------------------
242 * resolve_escape_sequence
243 *---------------------------------------------------------------------------*/
244
245inline bool is_hex(char c, int &v) {
246 if ('0' <= c && c <= '9') {
247 v = c - '0';
248 return true;
249 } else if ('a' <= c && c <= 'f') {
250 v = c - 'a' + 10;
251 return true;
252 } else if ('A' <= c && c <= 'F') {
253 v = c - 'A' + 10;
254 return true;
255 }
256 return false;
257}
258
259inline bool is_digit(char c, int &v) {
260 if ('0' <= c && c <= '9') {
261 v = c - '0';
262 return true;
263 }
264 return false;
265}
266
267inline std::pair<int, size_t> parse_hex_number(const char *s, size_t n,
268 size_t i) {
269 int ret = 0;
270 int val;
271 while (i < n && is_hex(s[i], val)) {
272 ret = static_cast<int>(ret * 16 + val);
273 i++;
274 }
275 return std::pair(ret, i);
276}
277
278inline std::pair<int, size_t> parse_octal_number(const char *s, size_t n,
279 size_t i) {
280 int ret = 0;
281 int val;
282 while (i < n && is_digit(s[i], val)) {
283 ret = static_cast<int>(ret * 8 + val);
284 i++;
285 }
286 return std::pair(ret, i);
287}
288
289inline std::string resolve_escape_sequence(const char *s, size_t n) {
290 std::string r;
291 r.reserve(n);
292
293 size_t i = 0;
294 while (i < n) {
295 auto ch = s[i];
296 if (ch == '\\') {
297 i++;
298 assert(i < n);
299
300 switch (s[i]) {
301 case 'f':
302 r += '\f';
303 i++;
304 break;
305 case 'n':
306 r += '\n';
307 i++;
308 break;
309 case 'r':
310 r += '\r';
311 i++;
312 break;
313 case 't':
314 r += '\t';
315 i++;
316 break;
317 case 'v':
318 r += '\v';
319 i++;
320 break;
321 case '\'':
322 r += '\'';
323 i++;
324 break;
325 case '"':
326 r += '"';
327 i++;
328 break;
329 case '[':
330 r += '[';
331 i++;
332 break;
333 case ']':
334 r += ']';
335 i++;
336 break;
337 case '\\':
338 r += '\\';
339 i++;
340 break;
341 case 'x':
342 case 'u': {
343 char32_t cp;
344 std::tie(cp, i) = parse_hex_number(s, n, i + 1);
345 r += encode_codepoint(cp);
346 break;
347 }
348 default: {
349 char32_t cp;
350 std::tie(cp, i) = parse_octal_number(s, n, i);
351 r += encode_codepoint(cp);
352 break;
353 }
354 }
355 } else {
356 r += ch;
357 i++;
358 }
359 }
360 return r;
361}
362
363/*-----------------------------------------------------------------------------
364 * token_to_number_ - This function should be removed eventually
365 *---------------------------------------------------------------------------*/
366
367template <typename T> T token_to_number_(std::string_view sv) {
368 T n = 0;
369#if __has_include(<charconv>)
370 if constexpr (!std::is_floating_point<T>::value) {
371 std::from_chars(sv.data(), sv.data() + sv.size(), n);
372#else
373 if constexpr (false) {
374#endif
375 } else {
376 auto s = std::string(sv);
377 std::istringstream ss(s);
378 ss >> n;
379 }
380 return n;
381}
382
383inline std::string to_lower(std::string s) {
384 for (auto &c : s) {
385 c = static_cast<char>(std::tolower(static_cast<unsigned char>(c)));
386 }
387 return s;
388}
389
390/*-----------------------------------------------------------------------------
391 * Trie
392 *---------------------------------------------------------------------------*/
393
394class Trie {
395public:
396 Trie(const std::vector<std::string> &items, bool ignore_case)
397 : ignore_case_(ignore_case), items_count_(items.size()) {
398 size_t id = 0;
399 for (const auto &item : items) {
400 const auto &s = ignore_case ? to_lower(item) : item;
401 if (item.size() > max_len_) { max_len_ = item.size(); }
402 for (size_t len = 1; len <= item.size(); len++) {
403 auto last = len == item.size();
404 std::string_view sv(s.data(), len);
405 auto it = dic_.find(sv);
406 if (it == dic_.end()) {
407 dic_.emplace(sv, Info{last, last, id});
408 } else if (last) {
409 it->second.match = true;
410 } else {
411 it->second.done = false;
412 }
413 }
414 id++;
415 }
416 }
417
418 size_t match(const char *text, size_t text_len, size_t &id) const {
419 auto limit = std::min(text_len, max_len_);
420 std::string lower_text;
421 if (ignore_case_) {
422 lower_text = to_lower(std::string(text, limit));
423 text = lower_text.data();
424 }
425
426 size_t match_len = 0;
427 auto done = false;
428 size_t len = 1;
429 while (!done && len <= limit) {
430 std::string_view sv(text, len);
431 auto it = dic_.find(sv);
432 if (it == dic_.end()) {
433 done = true;
434 } else {
435 if (it->second.match) {
436 match_len = len;
437 id = it->second.id;
438 }
439 if (it->second.done) { done = true; }
440 }
441 len += 1;
442 }
443 return match_len;
444 }
445
446 size_t size() const { return dic_.size(); }
447 size_t items_count() const { return items_count_; }
448
449 friend struct ComputeFirstSet;
450
451private:
452 struct Info {
453 bool done;
454 bool match;
455 size_t id;
456 };
457
458 // TODO: Use unordered_map when heterogeneous lookup is supported in C++20
459 // std::unordered_map<std::string, Info> dic_;
460 std::map<std::string, Info, std::less<>> dic_;
461
464 size_t max_len_ = 0;
465};
466
467/*-----------------------------------------------------------------------------
468 * PEG
469 *---------------------------------------------------------------------------*/
470
471/*
472 * Line information utility function
473 */
474inline std::pair<size_t, size_t> line_info(const char *start, const char *cur) {
475 auto p = start;
476 auto col_ptr = p;
477 auto no = 1;
478
479 while (p < cur) {
480 if (*p == '\n') {
481 no++;
482 col_ptr = p + 1;
483 }
484 p++;
485 }
486
487 auto col = codepoint_count(col_ptr, p - col_ptr) + 1;
488
489 return std::pair(no, col);
490}
491
492/*
493 * String tag
494 */
495inline constexpr unsigned int str2tag_core(const char *s, size_t l,
496 unsigned int h) {
497 return (l == 0) ? h
498 : str2tag_core(s + 1, l - 1,
499 (h * 33) ^ static_cast<unsigned char>(*s));
500}
501
502inline constexpr unsigned int str2tag(std::string_view sv) {
503 return str2tag_core(sv.data(), sv.size(), 0);
504}
505
506namespace udl {
507
508inline constexpr unsigned int operator""_(const char *s, size_t l) {
509 return str2tag_core(s, l, 0);
510}
511
512} // namespace udl
513
514/*
515 * Semantic values
516 */
517class Context;
518
519struct SemanticValues : protected std::vector<std::any> {
520 SemanticValues() = default;
522
523 // Input text
524 const char *path = nullptr;
525 const char *ss = nullptr;
526
527 // Matched string
528 std::string_view sv() const { return sv_; }
529
530 // Definition name
531 const std::string &name() const { return name_; }
532
533 std::vector<unsigned int> tags;
534
535 // Line number and column at which the matched string is
536 std::pair<size_t, size_t> line_info() const;
537
538 // Choice count
539 size_t choice_count() const { return choice_count_; }
540
541 // Choice number (0 based index)
542 size_t choice() const { return choice_; }
543
544 // Tokens
545 std::vector<std::string_view> tokens;
546
547 std::string_view token(size_t id = 0) const {
548 if (tokens.empty()) { return sv_; }
549 assert(id < tokens.size());
550 return tokens[id];
551 }
552
553 // Token conversion
554 std::string token_to_string(size_t id = 0) const {
555 return std::string(token(id));
556 }
557
558 template <typename T> T token_to_number() const {
559 return token_to_number_<T>(token());
560 }
561
562 // Transform the semantic value vector to another vector
563 template <typename T>
564 std::vector<T> transform(size_t beg = 0,
565 size_t end = static_cast<size_t>(-1)) const {
566 std::vector<T> r;
567 end = (std::min)(end, size());
568 for (size_t i = beg; i < end; i++) {
569 r.emplace_back(std::any_cast<T>((*this)[i]));
570 }
571 return r;
572 }
573
574 using std::vector<std::any>::iterator;
575 using std::vector<std::any>::const_iterator;
576 using std::vector<std::any>::size;
577 using std::vector<std::any>::empty;
578 using std::vector<std::any>::assign;
579 using std::vector<std::any>::begin;
580 using std::vector<std::any>::end;
581 using std::vector<std::any>::rbegin;
582 using std::vector<std::any>::rend;
583 using std::vector<std::any>::operator[];
584 using std::vector<std::any>::at;
585 using std::vector<std::any>::resize;
586 using std::vector<std::any>::front;
587 using std::vector<std::any>::back;
588 using std::vector<std::any>::push_back;
589 using std::vector<std::any>::pop_back;
590 using std::vector<std::any>::insert;
591 using std::vector<std::any>::erase;
592 using std::vector<std::any>::clear;
593 using std::vector<std::any>::swap;
594 using std::vector<std::any>::emplace;
595 using std::vector<std::any>::emplace_back;
596
597private:
598 friend class Context;
599 friend class Dictionary;
600 friend class Sequence;
601 friend class PrioritizedChoice;
602 friend class Repetition;
603 friend class Holder;
604 friend class PrecedenceClimbing;
605
606 Context *c_ = nullptr;
607 std::string_view sv_;
608 size_t choice_count_ = 0;
609 size_t choice_ = 0;
610 std::string name_;
611};
612
613/*
614 * Semantic action
615 */
616template <typename F, typename... Args> std::any call(F fn, Args &&...args) {
617 using R = decltype(fn(std::forward<Args>(args)...));
618 if constexpr (std::is_void<R>::value) {
619 fn(std::forward<Args>(args)...);
620 return std::any();
621 } else if constexpr (std::is_same<typename std::remove_cv<R>::type,
622 std::any>::value) {
623 return fn(std::forward<Args>(args)...);
624 } else {
625 return std::any(fn(std::forward<Args>(args)...));
626 }
627}
628
629template <typename T>
630struct argument_count : argument_count<decltype(&T::operator())> {};
631template <typename R, typename... Args>
632struct argument_count<R (*)(Args...)>
633 : std::integral_constant<unsigned, sizeof...(Args)> {};
634template <typename R, typename C, typename... Args>
635struct argument_count<R (C::*)(Args...)>
636 : std::integral_constant<unsigned, sizeof...(Args)> {};
637template <typename R, typename C, typename... Args>
638struct argument_count<R (C::*)(Args...) const>
639 : std::integral_constant<unsigned, sizeof...(Args)> {};
640
641class Action {
642public:
643 Action() = default;
644 Action(Action &&rhs) = default;
645 template <typename F> Action(F fn) : fn_(make_adaptor(fn)) {}
646 template <typename F> void operator=(F fn) { fn_ = make_adaptor(fn); }
647 Action &operator=(const Action &rhs) = default;
648
649 operator bool() const { return bool(fn_); }
650
651 std::any operator()(SemanticValues &vs, std::any &dt,
652 const std::any &predicate_data) const {
653 return fn_(vs, dt, predicate_data);
654 }
655
656private:
657 using Fty = std::function<std::any(SemanticValues &vs, std::any &dt,
658 const std::any &predicate_data)>;
659
660 template <typename F> Fty make_adaptor(F fn) {
661 if constexpr (argument_count<F>::value == 1) {
662 return [fn](auto &vs, auto & /*dt*/, const auto & /*predicate_data*/) {
663 return call(fn, vs);
664 };
665 } else if constexpr (argument_count<F>::value == 2) {
666 return [fn](auto &vs, auto &dt, const auto & /*predicate_data*/) {
667 return call(fn, vs, dt);
668 };
669 } else {
670 return [fn](auto &vs, auto &dt, const auto &predicate_data) {
671 return call(fn, vs, dt, predicate_data);
672 };
673 }
674 }
675
677};
678
680public:
681 Predicate() = default;
682 Predicate(Predicate &&rhs) = default;
683 template <typename F> Predicate(F fn) : fn_(make_adaptor(fn)) {}
684 template <typename F> void operator=(F fn) { fn_ = make_adaptor(fn); }
685 Predicate &operator=(const Predicate &rhs) = default;
686
687 operator bool() const { return bool(fn_); }
688
689 bool operator()(const SemanticValues &vs, const std::any &dt,
690 std::string &msg, std::any &predicate_data) const {
691 return fn_(vs, dt, msg, predicate_data);
692 }
693
694private:
695 using Fty = std::function<bool(const SemanticValues &vs, const std::any &dt,
696 std::string &msg, std::any &predicate_data)>;
697
698 template <typename F> Fty make_adaptor(F fn) {
699 if constexpr (argument_count<F>::value == 3) {
700 return [fn](const auto &vs, const auto &dt, auto &msg,
701 auto & /*predicate_data*/) { return fn(vs, dt, msg); };
702 } else {
703 return [fn](const auto &vs, const auto &dt, auto &msg,
704 auto &predicate_data) {
705 return fn(vs, dt, msg, predicate_data);
706 };
707 }
708 }
709
711};
712
713/*
714 * Parse result helper
715 */
716inline bool success(size_t len) { return len != static_cast<size_t>(-1); }
717
718inline bool fail(size_t len) { return len == static_cast<size_t>(-1); }
719
720/*
721 * Log
722 */
723using Log = std::function<void(size_t line, size_t col, const std::string &msg,
724 const std::string &rule)>;
725
726/*
727 * ErrorInfo
728 */
729class Definition;
730
731struct ErrorInfo {
732 const char *error_pos = nullptr;
733 std::vector<std::pair<const char *, const Definition *>> expected_tokens;
734 const char *message_pos = nullptr;
735 std::string message;
736 std::string label;
737 const char *last_output_pos = nullptr;
739
740 void clear() {
741 error_pos = nullptr;
742 expected_tokens.clear();
743 message_pos = nullptr;
744 message.clear();
745 }
746
747 void add(const char *error_literal, const Definition *error_rule) {
748 for (const auto &[t, r] : expected_tokens) {
749 if (t == error_literal && r == error_rule) { return; }
750 }
751 expected_tokens.emplace_back(error_literal, error_rule);
752 }
753
754 void output_log(const Log &log, const char *s, size_t n);
755
756private:
757 int cast_char(char c) const { return static_cast<unsigned char>(c); }
758
759 std::string heuristic_error_token(const char *s, size_t n,
760 const char *pos) const {
761 auto len = n - std::distance(s, pos);
762 if (len) {
763 size_t i = 0;
764 auto c = cast_char(pos[i++]);
765 if (!std::ispunct(c) && !std::isspace(c)) {
766 while (i < len && !std::ispunct(cast_char(pos[i])) &&
767 !std::isspace(cast_char(pos[i]))) {
768 i++;
769 }
770 }
771
773 size_t j = 0;
774 while (count > 0 && j < i) {
775 j += codepoint_length(&pos[j], i - j);
776 count--;
777 }
778
779 return escape_characters(pos, j);
780 }
781 return std::string();
782 }
783
784 std::string replace_all(std::string str, const std::string &from,
785 const std::string &to) const {
786 size_t pos = 0;
787 while ((pos = str.find(from, pos)) != std::string::npos) {
788 str.replace(pos, from.length(), to);
789 pos += to.length();
790 }
791 return str;
792 }
793};
794
795/*
796 * Context
797 */
798class Ope;
799
800using TracerEnter = std::function<void(
801 const Ope &name, const char *s, size_t n, const SemanticValues &vs,
802 const Context &c, const std::any &dt, std::any &trace_data)>;
803
804using TracerLeave = std::function<void(
805 const Ope &ope, const char *s, size_t n, const SemanticValues &vs,
806 const Context &c, const std::any &dt, size_t, std::any &trace_data)>;
807
808using TracerStartOrEnd = std::function<void(std::any &trace_data)>;
809
810class Context {
811public:
812 const char *path;
813 const char *s;
814 const size_t l;
815
817 bool recovered = false;
818
819 std::vector<std::shared_ptr<SemanticValues>> value_stack;
821
822 std::vector<Definition *> rule_stack;
823 std::vector<std::vector<std::shared_ptr<Ope>>> args_stack;
824
826
827 std::shared_ptr<Ope> whitespaceOpe;
828 bool in_whitespace = false;
829
830 std::shared_ptr<Ope> wordOpe;
831
832 std::vector<std::pair<std::string_view, std::string>> capture_entries;
833
834 std::vector<bool> cut_stack;
835
836 const size_t def_count;
838 std::vector<bool> cache_registered;
839 std::vector<bool> cache_success;
840
841 std::map<std::pair<size_t, size_t>, std::tuple<size_t, std::any>>
843
844 // Left recursion support
845 struct LRMemo {
846 size_t len = static_cast<size_t>(-1);
847 std::any val;
848 };
849 std::map<std::pair<const Definition *, const char *>, LRMemo> lr_memo;
850
851 // Rules whose lr_memo was hit during the current parse scope.
852 // Used to track LR cycle membership.
853 std::set<const Definition *> lr_refs_hit;
854
855 // Rules currently in their seeding/growing phase at a given position.
856 // Protected from having their lr_memo erased by inner growers.
857 std::set<std::pair<const Definition *, const char *>> lr_active_seeds;
858
859 void clear_packrat_cache(const char *pos, size_t def_id) {
860 if (!enablePackratParsing) { return; }
861 auto col = static_cast<size_t>(pos - s);
862 auto idx = def_count * col + def_id;
863 if (idx < cache_registered.size()) {
864 cache_registered[idx] = false;
865 cache_success[idx] = false;
866 }
867 cache_values.erase(std::make_pair(col, def_id));
868 }
869
870 void write_packrat_cache(const char *pos, size_t def_id, size_t len,
871 const std::any &val) {
872 if (!enablePackratParsing) { return; }
873 auto col = pos - s;
874 auto idx = def_count * static_cast<size_t>(col) + def_id;
875 if (idx >= cache_registered.size()) { return; }
876 cache_registered[idx] = true;
877 cache_success[idx] = true;
878 auto key = std::pair(col, def_id);
879 cache_values[key] = std::pair(len, val);
880 }
881
884 std::any trace_data;
885 const bool verbose_trace;
886
888
903
905 assert(!value_stack_size);
906 assert(cut_stack.empty());
907 }
908
909 Context(const Context &) = delete;
910 Context(Context &&) = delete;
911 Context operator=(const Context &) = delete;
912
913 // Per-rule packrat stats (populated when packrat_stats is non-null)
915 size_t hits = 0;
916 size_t misses = 0;
917 };
918 std::vector<PackratStats> *packrat_stats = nullptr;
919
920 // Per-rule packrat filter: if set, only rules with filter[def_id]=true
921 // use full memoization (cache_values map). Others use bitvector-only
922 // re-entry guard.
923 const std::vector<bool> *packrat_rule_filter = nullptr;
924
925 template <typename T>
926 void packrat(const char *a_s, size_t def_id, size_t &len, std::any &val,
927 T fn) {
929 fn(val);
930 return;
931 }
932
933 auto col = a_s - s;
934 auto idx = def_count * static_cast<size_t>(col) + def_id;
935
936 if (cache_registered[idx]) {
937 if (packrat_stats && def_id < packrat_stats->size()) {
938 (*packrat_stats)[def_id].hits++;
939 }
940 if (cache_success[idx]) {
941 auto key = std::pair(col, def_id);
942 std::tie(len, val) = cache_values[key];
943 return;
944 } else {
945 len = static_cast<size_t>(-1);
946 return;
947 }
948 } else {
949 // Pre-register as failure (re-entry guard for all rules)
950 cache_registered[idx] = true;
951 cache_success[idx] = false;
952
953 if (packrat_stats && def_id < packrat_stats->size()) {
954 (*packrat_stats)[def_id].misses++;
955 }
956
957 fn(val);
958
959 bool full_memo =
960 !packrat_rule_filter || (def_id < packrat_rule_filter->size() &&
961 (*packrat_rule_filter)[def_id]);
962 if (full_memo) {
963 if (success(len)) { write_packrat_cache(a_s, def_id, len, val); }
964 } else {
965 // Guard-only: undo registration so future calls re-parse
966 cache_registered[idx] = false;
967 }
968 return;
969 }
970 }
971
972 // Semantic values
974 assert(value_stack_size <= value_stack.size());
975 if (value_stack_size == value_stack.size()) {
976 value_stack.emplace_back(std::make_shared<SemanticValues>(this));
977 } else {
978 auto &vs = *value_stack[value_stack_size];
979 if (!vs.empty()) {
980 vs.clear();
981 if (!vs.tags.empty()) { vs.tags.clear(); }
982 }
983 vs.sv_ = std::string_view();
984 vs.choice_count_ = 0;
985 vs.choice_ = 0;
986 if (!vs.tokens.empty()) { vs.tokens.clear(); }
987 }
988
989 auto &vs = *value_stack[value_stack_size++];
990 vs.path = path;
991 vs.ss = s;
992 return vs;
993 }
994
996
997 // Arguments
998 void push_args(std::vector<std::shared_ptr<Ope>> &&args) {
999 args_stack.emplace_back(std::move(args));
1000 }
1001
1002 void pop_args() { args_stack.pop_back(); }
1003
1004 const std::vector<std::shared_ptr<Ope>> &top_args() const {
1005 return args_stack[args_stack.size() - 1];
1006 }
1007
1008 // Snapshot/Rollback
1009 struct Snapshot {
1010 size_t sv_size;
1013 std::string_view sv_sv;
1015 size_t choice;
1017 };
1018
1020 return {vs.size(), vs.tags.size(), vs.tokens.size(), vs.sv_,
1021 vs.choice_count_, vs.choice_, capture_entries.size()};
1022 }
1023
1024 void rollback(SemanticValues &vs, const Snapshot &snap) {
1025 vs.resize(snap.sv_size);
1026 vs.tags.resize(snap.sv_tags_size);
1027 vs.tokens.resize(snap.sv_tokens_size);
1028 vs.sv_ = snap.sv_sv;
1029 vs.choice_count_ = snap.choice_count;
1030 vs.choice_ = snap.choice;
1031 capture_entries.resize(snap.capture_size);
1032 }
1033
1034 // Skip trailing whitespace with trace suppression.
1035 // Returns whitespace length, or -1 on failure.
1036 // No-op (returns 0) if inside a token boundary or no whitespaceOpe.
1037 size_t skip_whitespace(const char *a_s, size_t n, SemanticValues &vs,
1038 std::any &dt);
1039
1040 // Error
1041 void set_error_pos(const char *a_s, const char *literal = nullptr);
1042
1043 // Trace
1044 void trace_enter(const Ope &ope, const char *a_s, size_t n,
1045 const SemanticValues &vs, std::any &dt);
1046 void trace_leave(const Ope &ope, const char *a_s, size_t n,
1047 const SemanticValues &vs, std::any &dt, size_t len);
1048 bool is_traceable(const Ope &ope) const;
1049
1050 // Line info
1051 std::pair<size_t, size_t> line_info(const char *cur) const {
1052 std::call_once(source_line_index_init_, [this]() {
1053 for (size_t pos = 0; pos < l; pos++) {
1054 if (s[pos] == '\n') { source_line_index.push_back(pos); }
1055 }
1056 source_line_index.push_back(l);
1057 });
1058
1059 auto pos = static_cast<size_t>(std::distance(s, cur));
1060
1061 auto it = std::lower_bound(
1062 source_line_index.begin(), source_line_index.end(), pos,
1063 [](size_t element, size_t value) { return element < value; });
1064
1065 auto id = static_cast<size_t>(std::distance(source_line_index.begin(), it));
1066 auto off = pos - (id == 0 ? 0 : source_line_index[id - 1] + 1);
1067 return std::pair(id + 1, off + 1);
1068 }
1069
1070 size_t next_trace_id = 0;
1071 std::vector<size_t> trace_ids;
1073 mutable std::once_flag source_line_index_init_;
1074 mutable std::vector<size_t> source_line_index;
1075};
1076
1077/*
1078 * Parser operators
1079 */
1080class Ope {
1081public:
1082 struct Visitor;
1083
1084 virtual ~Ope() = default;
1085 size_t parse(const char *s, size_t n, SemanticValues &vs, Context &c,
1086 std::any &dt) const;
1087 virtual size_t parse_core(const char *s, size_t n, SemanticValues &vs,
1088 Context &c, std::any &dt) const = 0;
1089 virtual void accept(Visitor &v) = 0;
1090
1091 bool is_token_boundary = false;
1092 bool is_choice_like = false;
1093};
1094
1095// Keyword-guarded identifier data, heap-allocated only for matching Sequences.
1096// Avoids bloating all Sequence objects with bitsets and keyword sets.
1098 std::bitset<256> identifier_first; // first char of identifier
1099 std::bitset<256> identifier_rest; // subsequent chars of identifier
1100 std::vector<std::string> exact_keywords; // single-word keywords (lowercase)
1101 std::vector<std::string> prefix_keywords; // first word of compound keywords
1104
1105 static bool matches_any(const std::vector<std::string> &keywords,
1106 std::string_view input) {
1107 return std::any_of(keywords.begin(), keywords.end(),
1108 [&](const auto &kw) { return kw == input; });
1109 }
1110};
1111
1112class Sequence : public Ope {
1113public:
1114 template <typename... Args>
1115 Sequence(const Args &...args)
1116 : opes_{static_cast<std::shared_ptr<Ope>>(args)...} {}
1117 Sequence(const std::vector<std::shared_ptr<Ope>> &opes) : opes_(opes) {}
1118 Sequence(std::vector<std::shared_ptr<Ope>> &&opes) : opes_(std::move(opes)) {}
1119
1120 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1121 std::any &dt) const override {
1122 // Keyword-guarded identifier fast path:
1123 // Fuses !ReservedKeyword <identifier> into scan-then-lookup
1124 if (kw_guard_) {
1125 if (auto result = parse_keyword_guarded(s, n, vs, c, dt)) {
1126 return *result;
1127 }
1128 // nullopt means prefix keyword match — fall through to normal path
1129 }
1130 size_t i = 0;
1131 for (const auto &ope : opes_) {
1132 auto len = ope->parse(s + i, n - i, vs, c, dt);
1133 if (fail(len)) { return len; }
1134 i += len;
1135 }
1136 return i;
1137 }
1138
1139 void accept(Visitor &v) override;
1140
1141 std::vector<std::shared_ptr<Ope>> opes_;
1142
1143private:
1144 friend struct SetupFirstSets;
1145 std::unique_ptr<KeywordGuardData> kw_guard_;
1146
1147 // Returns parse result, or nullopt to fall through to normal path
1148 std::optional<size_t> parse_keyword_guarded(const char *s, size_t n,
1149 SemanticValues &vs, Context &c,
1150 std::any &dt) const {
1151 const auto &kw = *kw_guard_;
1152 if (n < 1 || !kw.identifier_first.test(static_cast<unsigned char>(*s))) {
1153 c.set_error_pos(s);
1154 return static_cast<size_t>(-1);
1155 }
1156 // Scan identifier using bitset
1157 size_t id_len = 1;
1158 while (id_len < n &&
1159 kw.identifier_rest.test(static_cast<unsigned char>(s[id_len]))) {
1160 id_len++;
1161 }
1162 // Skip keyword matching if identifier length is out of range
1163 if (id_len >= kw.min_keyword_len && id_len <= kw.max_keyword_len) {
1164 char lower_buf[64];
1165 std::unique_ptr<char[]> lower_heap;
1166 char *lower = lower_buf;
1167 if (id_len > sizeof(lower_buf)) {
1168 lower_heap.reset(new char[id_len]);
1169 lower = lower_heap.get();
1170 }
1171 std::transform(s, s + id_len, lower, [](unsigned char ch) {
1172 return static_cast<char>(std::tolower(ch));
1173 });
1174 std::string_view lower_sv(lower, id_len);
1175
1176 if (KeywordGuardData::matches_any(kw.exact_keywords, lower_sv)) {
1177 c.set_error_pos(s);
1178 return static_cast<size_t>(-1);
1179 }
1180 if (KeywordGuardData::matches_any(kw.prefix_keywords, lower_sv)) {
1181 return std::nullopt;
1182 }
1183 }
1184 // Success: emit token and consume trailing whitespace
1185 vs.tokens.emplace_back(std::string_view(s, id_len));
1186 auto wl = c.skip_whitespace(s + id_len, n - id_len, vs, dt);
1187 if (fail(wl)) { return wl; }
1188 return id_len + wl;
1189 }
1190};
1191
1192struct FirstSet {
1193 // First-Set: set of possible first bytes for an expression.
1194 // Used by PrioritizedChoice to skip alternatives that cannot match.
1195 std::bitset<256> chars; // byte values that can appear as the first byte
1196 bool can_be_empty = false; // true if the expression can match empty string
1197 bool any_char = false; // true if any character can appear (cannot filter)
1198 const char *first_literal = nullptr; // first literal for error reporting
1200 nullptr; // first token rule for error reporting
1201
1202 void merge(const FirstSet &other) {
1203 chars |= other.chars;
1204 if (other.can_be_empty) { can_be_empty = true; }
1205 if (other.any_char) { any_char = true; }
1206 // Note: first_literal/first_rule are NOT merged — per-alternative
1207 }
1208};
1209
1210class PrioritizedChoice : public Ope {
1211public:
1212 template <typename... Args>
1213 PrioritizedChoice(bool for_label, const Args &...args)
1214 : opes_{static_cast<std::shared_ptr<Ope>>(args)...},
1215 for_label_(for_label) {
1216 is_choice_like = true;
1217 }
1218 PrioritizedChoice(const std::vector<std::shared_ptr<Ope>> &opes)
1219 : opes_(opes) {
1220 is_choice_like = true;
1221 }
1222 PrioritizedChoice(std::vector<std::shared_ptr<Ope>> &&opes)
1223 : opes_(std::move(opes)) {
1224 is_choice_like = true;
1225 }
1226
1227 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1228 std::any &dt) const override {
1229 size_t len = static_cast<size_t>(-1);
1230
1231 if (!for_label_) { c.cut_stack.push_back(false); }
1232 auto se = scope_exit([&]() {
1233 if (!for_label_) { c.cut_stack.pop_back(); }
1234 });
1235
1236 size_t id = 0;
1237 for (const auto &ope : opes_) {
1238 // First-Set filtering: skip if next byte cannot start this alternative
1239 if (n > 0 && id < first_sets_.size()) {
1240 const auto &fs = first_sets_[id];
1241 if (!fs.any_char && !fs.can_be_empty &&
1242 !fs.chars.test(static_cast<unsigned char>(*s))) {
1243 if (c.log && (fs.first_literal || fs.first_rule)) {
1244 if (c.error_info.error_pos <= s) {
1245 if (c.error_info.error_pos < s || !(id > 0)) {
1246 c.error_info.error_pos = s;
1247 c.error_info.expected_tokens.clear();
1248 }
1249 if (fs.first_literal) {
1250 c.error_info.add(fs.first_literal, nullptr);
1251 } else {
1252 c.error_info.add(nullptr, fs.first_rule);
1253 }
1254 }
1255 }
1256 id++;
1257 continue;
1258 }
1259 }
1260
1261 if (!c.cut_stack.empty()) { c.cut_stack.back() = false; }
1262
1263 auto snap = c.snapshot(vs);
1265
1266 len = ope->parse(s, n, vs, c, dt);
1267
1268 if (success(len)) {
1269 vs.choice_count_ = opes_.size();
1270 vs.choice_ = id;
1271 break;
1272 }
1273
1274 c.rollback(vs, snap);
1275
1276 if (!c.cut_stack.empty() && c.cut_stack.back()) { break; }
1277
1278 id++;
1279 }
1280
1282 return len;
1283 }
1284
1285 void accept(Visitor &v) override;
1286
1287 size_t size() const { return opes_.size(); }
1288
1289 std::vector<std::shared_ptr<Ope>> opes_;
1290 bool for_label_ = false;
1291 std::vector<FirstSet> first_sets_;
1292};
1293
1294class Repetition : public Ope {
1295public:
1296 Repetition(const std::shared_ptr<Ope> &ope, size_t min, size_t max)
1297 : ope_(ope), min_(min), max_(max) {}
1298
1299 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1300 std::any &dt) const override {
1301 // ISpan fast path: tight loop for ASCII CharacterClass repetition.
1302 // Safe because each ASCII match is exactly 1 byte, so byte count == match
1303 // count.
1304 if (span_bitset_) {
1305 const auto &bitset = *span_bitset_;
1306 size_t i = 0;
1307 if (max_ == std::numeric_limits<size_t>::max()) {
1308 // Unbounded repetition (*, +): no per-iteration max check
1309 while (i < n && bitset.test(static_cast<unsigned char>(s[i]))) {
1310 i++;
1311 }
1312 } else {
1313 auto limit = std::min(n, max_);
1314 while (i < limit && bitset.test(static_cast<unsigned char>(s[i]))) {
1315 i++;
1316 }
1317 }
1318 if (i < min_) {
1319 c.set_error_pos(s + i);
1320 return static_cast<size_t>(-1);
1321 }
1322 return i;
1323 }
1324
1325 size_t count = 0;
1326 size_t i = 0;
1327 while (count < min_) {
1328 auto len = ope_->parse(s + i, n - i, vs, c, dt);
1329 if (fail(len)) { return len; }
1330 i += len;
1331 count++;
1332 }
1333
1334 while (count < max_) {
1335 auto snap = c.snapshot(vs);
1336 auto len = ope_->parse(s + i, n - i, vs, c, dt);
1337 if (fail(len)) {
1338 c.rollback(vs, snap);
1339 break;
1340 }
1341 i += len;
1342 count++;
1343 }
1344 return i;
1345 }
1346
1347 void accept(Visitor &v) override;
1348
1349 bool is_zom() const {
1350 return min_ == 0 && max_ == std::numeric_limits<size_t>::max();
1351 }
1352
1353 static std::shared_ptr<Repetition> zom(const std::shared_ptr<Ope> &ope) {
1354 return std::make_shared<Repetition>(ope, 0,
1355 std::numeric_limits<size_t>::max());
1356 }
1357
1358 static std::shared_ptr<Repetition> oom(const std::shared_ptr<Ope> &ope) {
1359 return std::make_shared<Repetition>(ope, 1,
1360 std::numeric_limits<size_t>::max());
1361 }
1362
1363 static std::shared_ptr<Repetition> opt(const std::shared_ptr<Ope> &ope) {
1364 return std::make_shared<Repetition>(ope, 0, 1);
1365 }
1366
1367 std::shared_ptr<Ope> ope_;
1368 size_t min_;
1369 size_t max_;
1370 const std::bitset<256> *span_bitset_ =
1371 nullptr; // non-owning, set by SetupFirstSets
1372};
1373
1374class AndPredicate : public Ope {
1375public:
1376 AndPredicate(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1377
1378 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1379 std::any &dt) const override {
1380 auto snap = c.snapshot(vs);
1381 auto len = ope_->parse(s, n, vs, c, dt);
1382 c.rollback(vs, snap); // Always rollback — predicates consume nothing
1383 if (success(len)) {
1384 return 0;
1385 } else {
1386 return len;
1387 }
1388 }
1389
1390 void accept(Visitor &v) override;
1391
1392 std::shared_ptr<Ope> ope_;
1393};
1394
1395class NotPredicate : public Ope {
1396public:
1397 NotPredicate(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1398
1399 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1400 std::any &dt) const override {
1401 auto snap = c.snapshot(vs);
1402 auto len = ope_->parse(s, n, vs, c, dt);
1403 c.rollback(vs, snap); // Always rollback — predicates consume nothing
1404 if (success(len)) {
1405 c.set_error_pos(s);
1406 return static_cast<size_t>(-1);
1407 } else {
1408 return 0;
1409 }
1410 }
1411
1412 void accept(Visitor &v) override;
1413
1414 std::shared_ptr<Ope> ope_;
1415};
1416
1417class Dictionary : public Ope, public std::enable_shared_from_this<Dictionary> {
1418public:
1419 Dictionary(const std::vector<std::string> &v, bool ignore_case)
1420 : trie_(v, ignore_case) {
1421 is_choice_like = true;
1422 }
1423
1424 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1425 std::any &dt) const override;
1426
1427 void accept(Visitor &v) override;
1428
1430};
1431
1432class LiteralString : public Ope,
1433 public std::enable_shared_from_this<LiteralString> {
1434public:
1435 LiteralString(std::string &&s, bool ignore_case)
1436 : lit_(std::move(s)), ignore_case_(ignore_case),
1437 lower_lit_(ignore_case ? to_lower(lit_) : std::string()),
1438 is_word_(false) {}
1439
1440 LiteralString(const std::string &s, bool ignore_case)
1441 : lit_(s), ignore_case_(ignore_case),
1442 lower_lit_(ignore_case ? to_lower(lit_) : std::string()),
1443 is_word_(false) {}
1444
1445 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1446 std::any &dt) const override;
1447
1448 void accept(Visitor &v) override;
1449
1450 std::string lit_;
1452 std::string lower_lit_; // pre-computed for ignore_case
1453 mutable std::once_flag init_is_word_;
1454 mutable bool is_word_;
1455};
1456
1457class CharacterClass : public Ope,
1458 public std::enable_shared_from_this<CharacterClass> {
1459public:
1460 CharacterClass(const std::string &s, bool negated, bool ignore_case)
1461 : negated_(negated), ignore_case_(ignore_case) {
1462 auto chars = decode(s.data(), s.length());
1463 auto i = 0u;
1464 while (i < chars.size()) {
1465 if (i + 2 < chars.size() && chars[i + 1] == '-') {
1466 auto cp1 = chars[i];
1467 auto cp2 = chars[i + 2];
1468 ranges_.emplace_back(std::pair(cp1, cp2));
1469 i += 3;
1470 } else {
1471 auto cp = chars[i];
1472 ranges_.emplace_back(std::pair(cp, cp));
1473 i += 1;
1474 }
1475 }
1476 assert(!ranges_.empty());
1478 }
1479
1480 CharacterClass(const std::vector<std::pair<char32_t, char32_t>> &ranges,
1481 bool negated, bool ignore_case)
1482 : ranges_(ranges), negated_(negated), ignore_case_(ignore_case) {
1483 assert(!ranges_.empty());
1485 }
1486
1487 size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1488 Context &c, std::any & /*dt*/) const override {
1489 if (n < 1) {
1490 c.set_error_pos(s);
1491 return static_cast<size_t>(-1);
1492 }
1493
1494 char32_t cp = 0;
1495 auto len = decode_codepoint(s, n, cp);
1496
1497 for (const auto &range : ranges_) {
1498 if (in_range(range, cp)) {
1499 if (negated_) {
1500 c.set_error_pos(s);
1501 return static_cast<size_t>(-1);
1502 } else {
1503 return len;
1504 }
1505 }
1506 }
1507
1508 if (negated_) {
1509 return len;
1510 } else {
1511 c.set_error_pos(s);
1512 return static_cast<size_t>(-1);
1513 }
1514 }
1515
1516 void accept(Visitor &v) override;
1517
1518 friend struct ComputeFirstSet;
1519
1520 bool is_ascii_only() const { return is_ascii_only_; }
1521 const std::bitset<256> &ascii_bitset() const { return ascii_bitset_; }
1522
1523private:
1524 bool in_range(const std::pair<char32_t, char32_t> &range, char32_t cp) const {
1525 if (ignore_case_) {
1526 auto cpl = std::tolower(cp);
1527 return std::tolower(range.first) <= cpl &&
1528 cpl <= std::tolower(range.second);
1529 } else {
1530 return range.first <= cp && cp <= range.second;
1531 }
1532 }
1533
1535 if (negated_) { return; } // negated classes can match non-ASCII
1536 for (const auto &[lo, hi] : ranges_) {
1537 if (lo > 0x7F || hi > 0x7F) { return; }
1538 }
1539 is_ascii_only_ = true;
1540 for (const auto &[lo, hi] : ranges_) {
1541 for (auto cp = lo; cp <= hi; cp++) {
1542 auto ch = static_cast<unsigned char>(cp);
1543 ascii_bitset_.set(ch);
1544 if (ignore_case_) {
1545 ascii_bitset_.set(static_cast<unsigned char>(std::toupper(ch)));
1546 ascii_bitset_.set(static_cast<unsigned char>(std::tolower(ch)));
1547 }
1548 }
1549 }
1550 }
1551
1552 std::vector<std::pair<char32_t, char32_t>> ranges_;
1555 std::bitset<256> ascii_bitset_;
1556 bool is_ascii_only_ = false;
1557};
1558
1559class Character : public Ope, public std::enable_shared_from_this<Character> {
1560public:
1561 Character(char32_t ch) : ch_(ch) {}
1562
1563 size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1564 Context &c, std::any & /*dt*/) const override {
1565 if (n < 1) {
1566 c.set_error_pos(s);
1567 return static_cast<size_t>(-1);
1568 }
1569
1570 char32_t cp = 0;
1571 auto len = decode_codepoint(s, n, cp);
1572
1573 if (cp != ch_) {
1574 c.set_error_pos(s);
1575 return static_cast<size_t>(-1);
1576 }
1577 return len;
1578 }
1579
1580 void accept(Visitor &v) override;
1581
1582 char32_t ch_;
1583};
1584
1585class AnyCharacter : public Ope,
1586 public std::enable_shared_from_this<AnyCharacter> {
1587public:
1588 size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1589 Context &c, std::any & /*dt*/) const override {
1590 auto len = codepoint_length(s, n);
1591 if (len < 1) {
1592 c.set_error_pos(s);
1593 return static_cast<size_t>(-1);
1594 }
1595 return len;
1596 }
1597
1598 void accept(Visitor &v) override;
1599};
1600
1601class CaptureScope : public Ope {
1602public:
1603 CaptureScope(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1604
1605 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1606 std::any &dt) const override {
1607 auto cap_snap = c.capture_entries.size();
1608 auto len = ope_->parse(s, n, vs, c, dt);
1609 c.capture_entries.resize(cap_snap); // Always rollback (isolation)
1610 return len;
1611 }
1612
1613 void accept(Visitor &v) override;
1614
1615 std::shared_ptr<Ope> ope_;
1616};
1617
1618class Capture : public Ope {
1619public:
1620 using MatchAction = std::function<void(const char *s, size_t n, Context &c)>;
1621
1622 Capture(const std::shared_ptr<Ope> &ope, MatchAction ma)
1623 : ope_(ope), match_action_(ma) {}
1624
1625 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1626 std::any &dt) const override {
1627 auto len = ope_->parse(s, n, vs, c, dt);
1628 if (success(len) && match_action_) { match_action_(s, len, c); }
1629 return len;
1630 }
1631
1632 void accept(Visitor &v) override;
1633
1634 std::shared_ptr<Ope> ope_;
1636};
1637
1638class TokenBoundary : public Ope {
1639public:
1640 TokenBoundary(const std::shared_ptr<Ope> &ope) : ope_(ope) {
1641 is_token_boundary = true;
1642 }
1643
1644 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1645 std::any &dt) const override;
1646
1647 void accept(Visitor &v) override;
1648
1649 std::shared_ptr<Ope> ope_;
1650};
1651
1652class Ignore : public Ope {
1653public:
1654 Ignore(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1655
1656 size_t parse_core(const char *s, size_t n, SemanticValues & /*vs*/,
1657 Context &c, std::any &dt) const override {
1658 auto &chvs = c.push_semantic_values_scope();
1659 auto se = scope_exit([&]() { c.pop_semantic_values_scope(); });
1660 return ope_->parse(s, n, chvs, c, dt);
1661 }
1662
1663 void accept(Visitor &v) override;
1664
1665 std::shared_ptr<Ope> ope_;
1666};
1667
1668using Parser = std::function<size_t(const char *s, size_t n, SemanticValues &vs,
1669 std::any &dt)>;
1670
1671class User : public Ope {
1672public:
1673 User(Parser fn) : fn_(fn) {}
1674 size_t parse_core(const char *s, size_t n, SemanticValues &vs,
1675 Context & /*c*/, std::any &dt) const override {
1676 assert(fn_);
1677 return fn_(s, n, vs, dt);
1678 }
1679 void accept(Visitor &v) override;
1680 std::function<size_t(const char *s, size_t n, SemanticValues &vs,
1681 std::any &dt)>
1683};
1684
1685class WeakHolder : public Ope {
1686public:
1687 WeakHolder(const std::shared_ptr<Ope> &ope) : weak_(ope) {}
1688
1689 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1690 std::any &dt) const override {
1691 auto ope = weak_.lock();
1692 assert(ope);
1693 return ope->parse(s, n, vs, c, dt);
1694 }
1695
1696 void accept(Visitor &v) override;
1697
1698 std::weak_ptr<Ope> weak_;
1699};
1700
1701class Holder : public Ope {
1702public:
1703 Holder(Definition *outer) : outer_(outer) {}
1704
1705 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1706 std::any &dt) const override;
1707
1708 void accept(Visitor &v) override;
1709
1710 std::any reduce(SemanticValues &vs, std::any &dt,
1711 const std::any &predicate_data) const;
1712
1713 const std::string &name() const;
1714 const std::string &trace_name() const;
1715
1716 std::shared_ptr<Ope> ope_;
1718 mutable std::once_flag trace_name_init_;
1719 mutable std::string trace_name_;
1720
1721 friend class Definition;
1722};
1723
1724using Grammar = std::unordered_map<std::string, Definition>;
1725
1726class Reference : public Ope, public std::enable_shared_from_this<Reference> {
1727public:
1728 Reference(const Grammar &grammar, const std::string &name, const char *s,
1729 bool is_macro, const std::vector<std::shared_ptr<Ope>> &args)
1730 : grammar_(grammar), name_(name), s_(s), is_macro_(is_macro), args_(args),
1731 rule_(nullptr), iarg_(0) {}
1732
1733 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1734 std::any &dt) const override;
1735
1736 void accept(Visitor &v) override;
1737
1738 std::shared_ptr<Ope> get_core_operator() const;
1739
1741 const std::string name_;
1742 const char *s_;
1743
1744 const bool is_macro_;
1745 const std::vector<std::shared_ptr<Ope>> args_;
1746
1748 size_t iarg_;
1749};
1750
1751class Whitespace : public Ope {
1752public:
1753 Whitespace(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1754
1755 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1756 std::any &dt) const override {
1757 if (c.in_whitespace) { return 0; }
1758 c.in_whitespace = true;
1759 auto se = scope_exit([&]() { c.in_whitespace = false; });
1760 return ope_->parse(s, n, vs, c, dt);
1761 }
1762
1763 void accept(Visitor &v) override;
1764
1765 std::shared_ptr<Ope> ope_;
1766};
1767
1768class BackReference : public Ope {
1769public:
1770 BackReference(std::string &&name) : name_(std::move(name)) {}
1771
1772 BackReference(const std::string &name) : name_(name) {}
1773
1774 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1775 std::any &dt) const override;
1776
1777 void accept(Visitor &v) override;
1778
1779 std::string name_;
1780};
1781
1782class PrecedenceClimbing : public Ope {
1783public:
1784 using BinOpeInfo = std::map<std::string_view, std::pair<size_t, char>>;
1785
1786 PrecedenceClimbing(const std::shared_ptr<Ope> &atom,
1787 const std::shared_ptr<Ope> &binop, const BinOpeInfo &info,
1788 const Definition &rule)
1789 : atom_(atom), binop_(binop), info_(info), rule_(rule) {}
1790
1791 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1792 std::any &dt) const override {
1793 return parse_expression(s, n, vs, c, dt, 0);
1794 }
1795
1796 void accept(Visitor &v) override;
1797
1798 std::shared_ptr<Ope> atom_;
1799 std::shared_ptr<Ope> binop_;
1802
1803private:
1804 size_t parse_expression(const char *s, size_t n, SemanticValues &vs,
1805 Context &c, std::any &dt, size_t min_prec) const;
1806
1808};
1809
1810class Recovery : public Ope {
1811public:
1812 Recovery(const std::shared_ptr<Ope> &ope) : ope_(ope) {}
1813
1814 size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c,
1815 std::any &dt) const override;
1816
1817 void accept(Visitor &v) override;
1818
1819 std::shared_ptr<Ope> ope_;
1820};
1821
1822class Cut : public Ope, public std::enable_shared_from_this<Cut> {
1823public:
1824 size_t parse_core(const char * /*s*/, size_t /*n*/, SemanticValues & /*vs*/,
1825 Context &c, std::any & /*dt*/) const override {
1826 if (!c.cut_stack.empty()) { c.cut_stack.back() = true; }
1827 return 0;
1828 }
1829
1830 void accept(Visitor &v) override;
1831};
1832
1833/*
1834 * Factories
1835 */
1836template <typename... Args> std::shared_ptr<Ope> seq(Args &&...args) {
1837 return std::make_shared<Sequence>(static_cast<std::shared_ptr<Ope>>(args)...);
1838}
1839
1840template <typename... Args> std::shared_ptr<Ope> cho(Args &&...args) {
1841 return std::make_shared<PrioritizedChoice>(
1842 false, static_cast<std::shared_ptr<Ope>>(args)...);
1843}
1844
1845template <typename... Args> std::shared_ptr<Ope> cho4label_(Args &&...args) {
1846 return std::make_shared<PrioritizedChoice>(
1847 true, static_cast<std::shared_ptr<Ope>>(args)...);
1848}
1849
1850inline std::shared_ptr<Ope> zom(const std::shared_ptr<Ope> &ope) {
1851 return Repetition::zom(ope);
1852}
1853
1854inline std::shared_ptr<Ope> oom(const std::shared_ptr<Ope> &ope) {
1855 return Repetition::oom(ope);
1856}
1857
1858inline std::shared_ptr<Ope> opt(const std::shared_ptr<Ope> &ope) {
1859 return Repetition::opt(ope);
1860}
1861
1862inline std::shared_ptr<Ope> rep(const std::shared_ptr<Ope> &ope, size_t min,
1863 size_t max) {
1864 return std::make_shared<Repetition>(ope, min, max);
1865}
1866
1867inline std::shared_ptr<Ope> apd(const std::shared_ptr<Ope> &ope) {
1868 return std::make_shared<AndPredicate>(ope);
1869}
1870
1871inline std::shared_ptr<Ope> npd(const std::shared_ptr<Ope> &ope) {
1872 return std::make_shared<NotPredicate>(ope);
1873}
1874
1875inline std::shared_ptr<Ope> dic(const std::vector<std::string> &v,
1876 bool ignore_case) {
1877 return std::make_shared<Dictionary>(v, ignore_case);
1878}
1879
1880inline std::shared_ptr<Ope> lit(std::string &&s) {
1881 return std::make_shared<LiteralString>(s, false);
1882}
1883
1884inline std::shared_ptr<Ope> liti(std::string &&s) {
1885 return std::make_shared<LiteralString>(s, true);
1886}
1887
1888inline std::shared_ptr<Ope> cls(const std::string &s) {
1889 return std::make_shared<CharacterClass>(s, false, false);
1890}
1891
1892inline std::shared_ptr<Ope>
1893cls(const std::vector<std::pair<char32_t, char32_t>> &ranges,
1894 bool ignore_case = false) {
1895 return std::make_shared<CharacterClass>(ranges, false, ignore_case);
1896}
1897
1898inline std::shared_ptr<Ope> ncls(const std::string &s) {
1899 return std::make_shared<CharacterClass>(s, true, false);
1900}
1901
1902inline std::shared_ptr<Ope>
1903ncls(const std::vector<std::pair<char32_t, char32_t>> &ranges,
1904 bool ignore_case = false) {
1905 return std::make_shared<CharacterClass>(ranges, true, ignore_case);
1906}
1907
1908inline std::shared_ptr<Ope> chr(char32_t dt) {
1909 return std::make_shared<Character>(dt);
1910}
1911
1912inline std::shared_ptr<Ope> dot() { return std::make_shared<AnyCharacter>(); }
1913
1914inline std::shared_ptr<Ope> csc(const std::shared_ptr<Ope> &ope) {
1915 return std::make_shared<CaptureScope>(ope);
1916}
1917
1918inline std::shared_ptr<Ope> cap(const std::shared_ptr<Ope> &ope,
1920 return std::make_shared<Capture>(ope, ma);
1921}
1922
1923inline std::shared_ptr<Ope> tok(const std::shared_ptr<Ope> &ope) {
1924 return std::make_shared<TokenBoundary>(ope);
1925}
1926
1927inline std::shared_ptr<Ope> ign(const std::shared_ptr<Ope> &ope) {
1928 return std::make_shared<Ignore>(ope);
1929}
1930
1931inline std::shared_ptr<Ope>
1932usr(std::function<size_t(const char *s, size_t n, SemanticValues &vs,
1933 std::any &dt)>
1934 fn) {
1935 return std::make_shared<User>(fn);
1936}
1937
1938inline std::shared_ptr<Ope> ref(const Grammar &grammar, const std::string &name,
1939 const char *s, bool is_macro,
1940 const std::vector<std::shared_ptr<Ope>> &args) {
1941 return std::make_shared<Reference>(grammar, name, s, is_macro, args);
1942}
1943
1944inline std::shared_ptr<Ope> wsp(const std::shared_ptr<Ope> &ope) {
1945 return std::make_shared<Whitespace>(std::make_shared<Ignore>(ope));
1946}
1947
1948inline std::shared_ptr<Ope> bkr(std::string &&name) {
1949 return std::make_shared<BackReference>(name);
1950}
1951
1952inline std::shared_ptr<Ope> pre(const std::shared_ptr<Ope> &atom,
1953 const std::shared_ptr<Ope> &binop,
1955 const Definition &rule) {
1956 return std::make_shared<PrecedenceClimbing>(atom, binop, info, rule);
1957}
1958
1959inline std::shared_ptr<Ope> rec(const std::shared_ptr<Ope> &ope) {
1960 return std::make_shared<Recovery>(ope);
1961}
1962
1963inline std::shared_ptr<Ope> cut() { return std::make_shared<Cut>(); }
1964
1965/*
1966 * Visitor
1967 */
1969 virtual ~Visitor() {}
1970 virtual void visit(Sequence &) {}
1971 virtual void visit(PrioritizedChoice &) {}
1972 virtual void visit(Repetition &) {}
1973 virtual void visit(AndPredicate &) {}
1974 virtual void visit(NotPredicate &) {}
1975 virtual void visit(Dictionary &) {}
1976 virtual void visit(LiteralString &) {}
1977 virtual void visit(CharacterClass &) {}
1978 virtual void visit(Character &) {}
1979 virtual void visit(AnyCharacter &) {}
1980 virtual void visit(CaptureScope &) {}
1981 virtual void visit(Capture &) {}
1982 virtual void visit(TokenBoundary &) {}
1983 virtual void visit(Ignore &) {}
1984 virtual void visit(User &) {}
1985 virtual void visit(WeakHolder &) {}
1986 virtual void visit(Holder &) {}
1987 virtual void visit(Reference &) {}
1988 virtual void visit(Whitespace &) {}
1989 virtual void visit(BackReference &) {}
1990 virtual void visit(PrecedenceClimbing &) {}
1991 virtual void visit(Recovery &) {}
1992 virtual void visit(Cut &) {}
1993};
1994
1997 void visit(Sequence &ope) override {
1998 for (auto &op : ope.opes_) {
1999 op->accept(*this);
2000 }
2001 }
2002 void visit(PrioritizedChoice &ope) override {
2003 for (auto &op : ope.opes_) {
2004 op->accept(*this);
2005 }
2006 }
2007 void visit(Repetition &ope) override { ope.ope_->accept(*this); }
2008 void visit(AndPredicate &ope) override { ope.ope_->accept(*this); }
2009 void visit(NotPredicate &ope) override { ope.ope_->accept(*this); }
2010 void visit(CaptureScope &ope) override { ope.ope_->accept(*this); }
2011 void visit(Capture &ope) override { ope.ope_->accept(*this); }
2012 void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
2013 void visit(Ignore &ope) override { ope.ope_->accept(*this); }
2014 void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
2015 void visit(Holder &ope) override { ope.ope_->accept(*this); }
2016 void visit(Whitespace &ope) override { ope.ope_->accept(*this); }
2017 void visit(Recovery &ope) override { ope.ope_->accept(*this); }
2018 void visit(PrecedenceClimbing &ope) override { ope.atom_->accept(*this); }
2019};
2020
2022 using Ope::Visitor::visit;
2023
2024 void visit(Sequence &) override { name_ = "Sequence"; }
2025 void visit(PrioritizedChoice &) override { name_ = "PrioritizedChoice"; }
2026 void visit(Repetition &) override { name_ = "Repetition"; }
2027 void visit(AndPredicate &) override { name_ = "AndPredicate"; }
2028 void visit(NotPredicate &) override { name_ = "NotPredicate"; }
2029 void visit(Dictionary &) override { name_ = "Dictionary"; }
2030 void visit(LiteralString &) override { name_ = "LiteralString"; }
2031 void visit(CharacterClass &) override { name_ = "CharacterClass"; }
2032 void visit(Character &) override { name_ = "Character"; }
2033 void visit(AnyCharacter &) override { name_ = "AnyCharacter"; }
2034 void visit(CaptureScope &) override { name_ = "CaptureScope"; }
2035 void visit(Capture &) override { name_ = "Capture"; }
2036 void visit(TokenBoundary &) override { name_ = "TokenBoundary"; }
2037 void visit(Ignore &) override { name_ = "Ignore"; }
2038 void visit(User &) override { name_ = "User"; }
2039 void visit(WeakHolder &) override { name_ = "WeakHolder"; }
2040 void visit(Holder &ope) override { name_ = ope.trace_name().data(); }
2041 void visit(Reference &) override { name_ = "Reference"; }
2042 void visit(Whitespace &) override { name_ = "Whitespace"; }
2043 void visit(BackReference &) override { name_ = "BackReference"; }
2044 void visit(PrecedenceClimbing &) override { name_ = "PrecedenceClimbing"; }
2045 void visit(Recovery &) override { name_ = "Recovery"; }
2046 void visit(Cut &) override { name_ = "Cut"; }
2047
2048 static std::string get(Ope &ope) {
2049 TraceOpeName vis;
2050 ope.accept(vis);
2051 return vis.name_;
2052 }
2053
2054private:
2055 const char *name_ = nullptr;
2056};
2057
2060
2061 void visit(Holder &ope) override;
2062 void visit(Reference &ope) override;
2063 void visit(PrecedenceClimbing &ope) override;
2064
2065 std::unordered_map<void *, size_t> ids;
2066};
2067
2069 using Ope::Visitor::visit;
2070
2071 void visit(PrioritizedChoice &ope) override {
2072 for (const auto &op : ope.opes_) {
2073 if (!IsLiteralToken::check(*op)) { return; }
2074 }
2075 result_ = true;
2076 }
2077
2078 void visit(Dictionary &) override { result_ = true; }
2079 void visit(LiteralString &) override { result_ = true; }
2080
2081 static bool check(Ope &ope) {
2082 IsLiteralToken vis;
2083 ope.accept(vis);
2084 return vis.result_;
2085 }
2086
2087private:
2088 bool result_ = false;
2089};
2090
2093
2094 void visit(TokenBoundary &) override { has_token_boundary_ = true; }
2095 void visit(AndPredicate &) override {}
2096 void visit(NotPredicate &) override {}
2097 void visit(WeakHolder &) override { has_rule_ = true; }
2098 void visit(Reference &ope) override;
2099
2100 static bool is_token(Ope &ope) {
2101 if (IsLiteralToken::check(ope)) { return true; }
2102
2103 TokenChecker vis;
2104 ope.accept(vis);
2105 return vis.has_token_boundary_ || !vis.has_rule_;
2106 }
2107
2108private:
2110 bool has_rule_ = false;
2111};
2112
2114 using Ope::Visitor::visit;
2115
2116 void visit(LiteralString &ope) override { token_ = ope.lit_.data(); }
2117 void visit(TokenBoundary &ope) override { ope.ope_->accept(*this); }
2118 void visit(Ignore &ope) override { ope.ope_->accept(*this); }
2119 void visit(Reference &ope) override;
2120 void visit(Recovery &ope) override { ope.ope_->accept(*this); }
2121
2122 static const char *token(Ope &ope) {
2123 FindLiteralToken vis;
2124 ope.accept(vis);
2125 return vis.token_;
2126 }
2127
2128private:
2129 const char *token_ = nullptr;
2130};
2131
2134
2135 DetectLeftRecursion(const std::string &name) : name_(name) {}
2136
2137 void visit(Sequence &ope) override {
2138 for (const auto &op : ope.opes_) {
2139 op->accept(*this);
2140 if (done_) {
2141 break;
2142 } else if (error_s) {
2143 done_ = true;
2144 break;
2145 }
2146 }
2147 }
2148 void visit(PrioritizedChoice &ope) override {
2149 for (const auto &op : ope.opes_) {
2150 op->accept(*this);
2151 if (error_s) {
2152 done_ = true;
2153 break;
2154 }
2155 }
2156 }
2157 void visit(Repetition &ope) override {
2158 ope.ope_->accept(*this);
2159 done_ = ope.min_ > 0;
2160 }
2161 void visit(AndPredicate &ope) override {
2162 ope.ope_->accept(*this);
2163 done_ = false;
2164 }
2165 void visit(NotPredicate &ope) override {
2166 ope.ope_->accept(*this);
2167 done_ = false;
2168 }
2169 void visit(Dictionary &) override { done_ = true; }
2170 void visit(LiteralString &ope) override { done_ = !ope.lit_.empty(); }
2171 void visit(CharacterClass &) override { done_ = true; }
2172 void visit(Character &) override { done_ = true; }
2173 void visit(AnyCharacter &) override { done_ = true; }
2174 void visit(User &) override { done_ = true; }
2175 void visit(Reference &ope) override;
2176 void visit(BackReference &) override { done_ = true; }
2177 void visit(Cut &) override { done_ = true; }
2178
2179 const char *error_s = nullptr;
2180
2181 std::shared_ptr<Ope> resolve_macro_arg(size_t iarg) const;
2182
2183private:
2184 std::string name_;
2185 std::unordered_set<std::string> refs_;
2186 bool done_ = false;
2187 std::vector<const std::vector<std::shared_ptr<Ope>> *> macro_args_stack_;
2188};
2189
2192
2193 bool result = false;
2194
2195 void visit(Sequence &ope) override {
2196 result = std::all_of(ope.opes_.begin(), ope.opes_.end(), [](auto &op) {
2197 ComputeCanBeEmpty vis;
2198 op->accept(vis);
2199 return vis.result;
2200 });
2201 }
2202 void visit(PrioritizedChoice &ope) override {
2203 result = std::any_of(ope.opes_.begin(), ope.opes_.end(), [](auto &op) {
2204 ComputeCanBeEmpty vis;
2205 op->accept(vis);
2206 return vis.result;
2207 });
2208 }
2209 void visit(Repetition &ope) override { result = ope.min_ == 0; }
2210 void visit(AndPredicate &) override { result = true; }
2211 void visit(NotPredicate &) override { result = true; }
2212 void visit(Dictionary &) override { result = false; }
2213 void visit(LiteralString &ope) override { result = ope.lit_.empty(); }
2214 void visit(CharacterClass &) override { result = false; }
2215 void visit(Character &) override { result = false; }
2216 void visit(AnyCharacter &) override { result = false; }
2217 void visit(User &) override { result = false; }
2218 void visit(Reference &ope) override;
2219 void visit(BackReference &) override { result = false; }
2220 void visit(Cut &) override { result = false; }
2221};
2222
2225
2226 HasEmptyElement(std::vector<std::pair<const char *, std::string>> &refs,
2227 std::unordered_map<std::string, bool> &has_error_cache)
2228 : refs_(refs), has_error_cache_(has_error_cache) {}
2229
2230 void visit(Sequence &ope) override;
2231 void visit(PrioritizedChoice &ope) override {
2232 for (const auto &op : ope.opes_) {
2233 op->accept(*this);
2234 if (is_empty) { return; }
2235 }
2236 }
2237 void visit(Repetition &ope) override {
2238 if (ope.min_ == 0) {
2239 set_error();
2240 } else {
2241 ope.ope_->accept(*this);
2242 }
2243 }
2244 void visit(AndPredicate &) override { set_error(); }
2245 void visit(NotPredicate &) override { set_error(); }
2246 void visit(LiteralString &ope) override {
2247 if (ope.lit_.empty()) { set_error(); }
2248 }
2249 void visit(Reference &ope) override;
2250
2251 bool is_empty = false;
2252 const char *error_s = nullptr;
2253 std::string error_name;
2254
2255private:
2256 void set_error() {
2257 is_empty = true;
2258 tie(error_s, error_name) = refs_.back();
2259 }
2260 std::vector<std::pair<const char *, std::string>> &refs_;
2261 std::unordered_map<std::string, bool> &has_error_cache_;
2262};
2263
2266
2267 DetectInfiniteLoop(const char *s, const std::string &name,
2268 std::vector<std::pair<const char *, std::string>> &refs,
2269 std::unordered_map<std::string, bool> &has_error_cache)
2270 : refs_(refs), has_error_cache_(has_error_cache) {
2271 refs_.emplace_back(s, name);
2272 }
2273
2274 DetectInfiniteLoop(std::vector<std::pair<const char *, std::string>> &refs,
2275 std::unordered_map<std::string, bool> &has_error_cache)
2276 : refs_(refs), has_error_cache_(has_error_cache) {}
2277
2278 void visit(Sequence &ope) override {
2279 for (const auto &op : ope.opes_) {
2280 op->accept(*this);
2281 if (has_error) { return; }
2282 }
2283 }
2284 void visit(PrioritizedChoice &ope) override {
2285 for (const auto &op : ope.opes_) {
2286 op->accept(*this);
2287 if (has_error) { return; }
2288 }
2289 }
2290 void visit(Repetition &ope) override {
2291 if (ope.max_ == std::numeric_limits<size_t>::max()) {
2293 ope.ope_->accept(vis);
2294 if (vis.is_empty) {
2295 has_error = true;
2296 error_s = vis.error_s;
2297 error_name = vis.error_name;
2298 }
2299 } else {
2300 ope.ope_->accept(*this);
2301 }
2302 }
2303 void visit(Reference &ope) override;
2304
2305 bool has_error = false;
2306 const char *error_s = nullptr;
2307 std::string error_name;
2308
2309private:
2310 std::vector<std::pair<const char *, std::string>> &refs_;
2311 std::unordered_map<std::string, bool> &has_error_cache_;
2312};
2313
2316
2318 const std::vector<std::string> &params)
2319 : grammar_(grammar), params_(params) {}
2320
2321 void visit(Reference &ope) override;
2322
2323 std::unordered_map<std::string, const char *> error_s;
2324 std::unordered_map<std::string, std::string> error_message;
2325 std::unordered_set<std::string> referenced;
2326
2327private:
2329 const std::vector<std::string> &params_;
2330};
2331
2334
2335 LinkReferences(Grammar &grammar, const std::vector<std::string> &params)
2336 : grammar_(grammar), params_(params) {}
2337
2338 void visit(Reference &ope) override;
2339
2340private:
2342 const std::vector<std::string> &params_;
2343};
2344
2346 using Ope::Visitor::visit;
2347
2348 FindReference(const std::vector<std::shared_ptr<Ope>> &args,
2349 const std::vector<std::string> &params)
2350 : args_(args), params_(params) {}
2351
2352 void visit(Sequence &ope) override {
2353 std::vector<std::shared_ptr<Ope>> opes;
2354 for (const auto &o : ope.opes_) {
2355 o->accept(*this);
2356 opes.emplace_back(std::move(found_ope));
2357 }
2358 found_ope = std::make_shared<Sequence>(opes);
2359 }
2360 void visit(PrioritizedChoice &ope) override {
2361 std::vector<std::shared_ptr<Ope>> opes;
2362 for (const auto &o : ope.opes_) {
2363 o->accept(*this);
2364 opes.emplace_back(std::move(found_ope));
2365 }
2366 found_ope = std::make_shared<PrioritizedChoice>(opes);
2367 }
2368 void visit(Repetition &ope) override {
2369 ope.ope_->accept(*this);
2370 found_ope = rep(found_ope, ope.min_, ope.max_);
2371 }
2372 void visit(AndPredicate &ope) override {
2373 ope.ope_->accept(*this);
2375 }
2376 void visit(NotPredicate &ope) override {
2377 ope.ope_->accept(*this);
2379 }
2380 void visit(Dictionary &ope) override { found_ope = ope.shared_from_this(); }
2381 void visit(LiteralString &ope) override {
2382 found_ope = ope.shared_from_this();
2383 }
2384 void visit(CharacterClass &ope) override {
2385 found_ope = ope.shared_from_this();
2386 }
2387 void visit(Character &ope) override { found_ope = ope.shared_from_this(); }
2388 void visit(AnyCharacter &ope) override { found_ope = ope.shared_from_this(); }
2389 void visit(CaptureScope &ope) override {
2390 ope.ope_->accept(*this);
2392 }
2393 void visit(Capture &ope) override {
2394 ope.ope_->accept(*this);
2396 }
2397 void visit(TokenBoundary &ope) override {
2398 ope.ope_->accept(*this);
2400 }
2401 void visit(Ignore &ope) override {
2402 ope.ope_->accept(*this);
2404 }
2405 void visit(WeakHolder &ope) override { ope.weak_.lock()->accept(*this); }
2406 void visit(Holder &ope) override { ope.ope_->accept(*this); }
2407 void visit(Reference &ope) override;
2408 void visit(Whitespace &ope) override {
2409 ope.ope_->accept(*this);
2411 }
2412 void visit(PrecedenceClimbing &ope) override {
2413 ope.atom_->accept(*this);
2415 }
2416 void visit(Recovery &ope) override {
2417 ope.ope_->accept(*this);
2419 }
2420 void visit(Cut &ope) override { found_ope = ope.shared_from_this(); }
2421
2422 std::shared_ptr<Ope> found_ope;
2423
2424private:
2425 const std::vector<std::shared_ptr<Ope>> &args_;
2426 const std::vector<std::string> &params_;
2427};
2428
2429/*
2430 * First-Set computation
2431 */
2434
2435 void visit(Sequence &ope) override {
2436 for (const auto &op : ope.opes_) {
2437 auto save = result_;
2438 result_ = FirstSet{};
2439 op->accept(*this);
2440 auto element_fs = result_;
2441 result_ = save;
2442 result_.chars |= element_fs.chars;
2443 if (element_fs.any_char) { result_.any_char = true; }
2444 if (!result_.first_literal) {
2445 result_.first_literal = element_fs.first_literal;
2446 }
2447 if (!result_.first_rule) { result_.first_rule = element_fs.first_rule; }
2448 if (!element_fs.can_be_empty) { return; }
2449 // This element can be empty, continue to next
2450 }
2451 result_.can_be_empty = true;
2452 }
2453 void visit(PrioritizedChoice &ope) override {
2454 auto save = result_;
2455 for (const auto &op : ope.opes_) {
2456 result_ = FirstSet{};
2457 op->accept(*this);
2458 save.merge(result_);
2459 }
2460 result_ = save;
2461 }
2462 void visit(Repetition &ope) override {
2463 ope.ope_->accept(*this);
2464 if (ope.min_ == 0) { result_.can_be_empty = true; }
2465 }
2466 void visit(AndPredicate &) override { result_.can_be_empty = true; }
2467 void visit(NotPredicate &) override { result_.can_be_empty = true; }
2468 void visit(Dictionary &ope) override {
2469 for (const auto &[key, info] : ope.trie_.dic_) {
2470 if (!key.empty()) {
2471 auto ch = static_cast<unsigned char>(key[0]);
2472 result_.chars.set(ch);
2473 if (ope.trie_.ignore_case_) {
2474 result_.chars.set(static_cast<unsigned char>(std::toupper(ch)));
2475 result_.chars.set(static_cast<unsigned char>(std::tolower(ch)));
2476 }
2477 }
2478 }
2479 }
2480 void visit(LiteralString &ope) override {
2481 if (ope.lit_.empty()) {
2482 result_.can_be_empty = true;
2483 } else {
2484 auto ch = static_cast<unsigned char>(ope.lit_[0]);
2485 result_.chars.set(ch);
2486 if (ope.ignore_case_) {
2487 result_.chars.set(static_cast<unsigned char>(std::toupper(ch)));
2488 result_.chars.set(static_cast<unsigned char>(std::tolower(ch)));
2489 }
2490 if (!result_.first_literal) { result_.first_literal = ope.lit_.c_str(); }
2491 }
2492 }
2493 void visit(CharacterClass &ope) override {
2494 for (const auto &range : ope.ranges_) {
2495 auto cp1 = range.first;
2496 auto cp2 = range.second;
2497 if (cp1 > 0x7F || cp2 > 0x7F) {
2498 // Non-ASCII range: conservative fallback
2499 result_.any_char = true;
2500 return;
2501 }
2502 for (auto cp = cp1; cp <= cp2; cp++) {
2503 auto ch = static_cast<unsigned char>(cp);
2504 result_.chars.set(ch);
2505 if (ope.ignore_case_) {
2506 result_.chars.set(static_cast<unsigned char>(std::toupper(ch)));
2507 result_.chars.set(static_cast<unsigned char>(std::tolower(ch)));
2508 }
2509 }
2510 }
2511 if (ope.negated_) {
2512 result_.chars.flip();
2513 result_.any_char = true; // negated class can match non-ASCII
2514 }
2515 }
2516 void visit(Character &ope) override {
2517 if (ope.ch_ > 0x7F) {
2518 result_.any_char = true;
2519 } else {
2520 result_.chars.set(static_cast<unsigned char>(ope.ch_));
2521 }
2522 }
2523 void visit(AnyCharacter &) override { result_.any_char = true; }
2524 void visit(User &) override { result_.any_char = true; }
2525 void visit(Reference &ope) override;
2526 void visit(BackReference &) override { result_.any_char = true; }
2527 void visit(Cut &) override { result_.can_be_empty = true; }
2528
2530
2531private:
2532 std::unordered_set<std::string> refs_;
2533};
2534
2537
2538 void visit(Sequence &ope) override;
2540
2541 void visit(PrioritizedChoice &ope) override {
2542 ope.first_sets_.clear();
2543 ope.first_sets_.reserve(ope.opes_.size());
2544 for (const auto &op : ope.opes_) {
2545 ComputeFirstSet cfs;
2546 op->accept(cfs);
2547 ope.first_sets_.push_back(cfs.result_);
2548 }
2549 for (const auto &op : ope.opes_) {
2550 op->accept(*this);
2551 }
2552 }
2553 void visit(Repetition &ope) override {
2554 ope.ope_->accept(*this);
2555 // ISpan optimization: detect Repetition + ASCII CharacterClass
2556 auto cc = dynamic_cast<CharacterClass *>(ope.ope_.get());
2557 if (cc && cc->is_ascii_only()) { ope.span_bitset_ = &cc->ascii_bitset(); }
2558 }
2559 void visit(Reference &ope) override;
2560
2561private:
2562 std::unordered_set<std::string> refs_;
2563};
2564
2565/*
2566 * Keywords
2567 */
2568static const char *WHITESPACE_DEFINITION_NAME = "%whitespace";
2569static const char *WORD_DEFINITION_NAME = "%word";
2570static const char *RECOVER_DEFINITION_NAME = "%recover";
2571
2572/*
2573 * Definition
2574 */
2576public:
2577 struct Result {
2578 bool ret;
2580 size_t len;
2582 };
2583
2584 Definition() : holder_(std::make_shared<Holder>(this)) {}
2585
2586 Definition(const Definition &rhs) : name(rhs.name), holder_(rhs.holder_) {
2587 holder_->outer_ = this;
2588 }
2589
2590 Definition(const std::shared_ptr<Ope> &ope)
2591 : holder_(std::make_shared<Holder>(this)) {
2592 *this <= ope;
2593 }
2594
2595 operator std::shared_ptr<Ope>() {
2596 return std::make_shared<WeakHolder>(holder_);
2597 }
2598
2599 Definition &operator<=(const std::shared_ptr<Ope> &ope) {
2600 holder_->ope_ = ope;
2601 return *this;
2602 }
2603
2604 Result parse(const char *s, size_t n, const char *path = nullptr,
2605 Log log = nullptr) const {
2606 SemanticValues vs;
2607 std::any dt;
2608 return parse_core(s, n, vs, dt, path, log);
2609 }
2610
2611 Result parse(const char *s, const char *path = nullptr,
2612 Log log = nullptr) const {
2613 auto n = strlen(s);
2614 return parse(s, n, path, log);
2615 }
2616
2617 Result parse(const char *s, size_t n, std::any &dt,
2618 const char *path = nullptr, Log log = nullptr) const {
2619 SemanticValues vs;
2620 return parse_core(s, n, vs, dt, path, log);
2621 }
2622
2623 Result parse(const char *s, std::any &dt, const char *path = nullptr,
2624 Log log = nullptr) const {
2625 auto n = strlen(s);
2626 return parse(s, n, dt, path, log);
2627 }
2628
2629 template <typename T>
2630 Result parse_and_get_value(const char *s, size_t n, T &val,
2631 const char *path = nullptr,
2632 Log log = nullptr) const {
2633 SemanticValues vs;
2634 std::any dt;
2635 auto r = parse_core(s, n, vs, dt, path, log);
2636 if (r.ret && !vs.empty() && vs.front().has_value()) {
2637 val = std::any_cast<T>(vs[0]);
2638 }
2639 return r;
2640 }
2641
2642 template <typename T>
2643 Result parse_and_get_value(const char *s, T &val, const char *path = nullptr,
2644 Log log = nullptr) const {
2645 auto n = strlen(s);
2646 return parse_and_get_value(s, n, val, path, log);
2647 }
2648
2649 template <typename T>
2650 Result parse_and_get_value(const char *s, size_t n, std::any &dt, T &val,
2651 const char *path = nullptr,
2652 Log log = nullptr) const {
2653 SemanticValues vs;
2654 auto r = parse_core(s, n, vs, dt, path, log);
2655 if (r.ret && !vs.empty() && vs.front().has_value()) {
2656 val = std::any_cast<T>(vs[0]);
2657 }
2658 return r;
2659 }
2660
2661 template <typename T>
2662 Result parse_and_get_value(const char *s, std::any &dt, T &val,
2663 const char *path = nullptr,
2664 Log log = nullptr) const {
2665 auto n = strlen(s);
2666 return parse_and_get_value(s, n, dt, val, path, log);
2667 }
2668
2669#if defined(__cpp_lib_char8_t)
2670 Result parse(const char8_t *s, size_t n, const char *path = nullptr,
2671 Log log = nullptr) const {
2672 return parse(reinterpret_cast<const char *>(s), n, path, log);
2673 }
2674
2675 Result parse(const char8_t *s, const char *path = nullptr,
2676 Log log = nullptr) const {
2677 return parse(reinterpret_cast<const char *>(s), path, log);
2678 }
2679
2680 Result parse(const char8_t *s, size_t n, std::any &dt,
2681 const char *path = nullptr, Log log = nullptr) const {
2682 return parse(reinterpret_cast<const char *>(s), n, dt, path, log);
2683 }
2684
2685 Result parse(const char8_t *s, std::any &dt, const char *path = nullptr,
2686 Log log = nullptr) const {
2687 return parse(reinterpret_cast<const char *>(s), dt, path, log);
2688 }
2689
2690 template <typename T>
2691 Result parse_and_get_value(const char8_t *s, size_t n, T &val,
2692 const char *path = nullptr,
2693 Log log = nullptr) const {
2694 return parse_and_get_value(reinterpret_cast<const char *>(s), n, val, path,
2695 log);
2696 }
2697
2698 template <typename T>
2699 Result parse_and_get_value(const char8_t *s, T &val,
2700 const char *path = nullptr,
2701 Log log = nullptr) const {
2702 return parse_and_get_value(reinterpret_cast<const char *>(s), val, path,
2703 log);
2704 }
2705
2706 template <typename T>
2707 Result parse_and_get_value(const char8_t *s, size_t n, std::any &dt, T &val,
2708 const char *path = nullptr,
2709 Log log = nullptr) const {
2710 return parse_and_get_value(reinterpret_cast<const char *>(s), n, dt, val,
2711 path, log);
2712 }
2713
2714 template <typename T>
2715 Result parse_and_get_value(const char8_t *s, std::any &dt, T &val,
2716 const char *path = nullptr,
2717 Log log = nullptr) const {
2718 return parse_and_get_value(reinterpret_cast<const char *>(s), dt, val, path,
2719 log);
2720 }
2721#endif
2722
2723 void operator=(Action a) { action = a; }
2724
2725 template <typename T> Definition &operator,(T fn) {
2726 operator=(fn);
2727 return *this;
2728 }
2729
2731 ignoreSemanticValue = true;
2732 return *this;
2733 }
2734
2735 void accept(Ope::Visitor &v) { holder_->accept(v); }
2736
2737 std::shared_ptr<Ope> get_core_operator() const { return holder_->ope_; }
2738
2739 bool is_token() const {
2740 std::call_once(is_token_init_, [this]() {
2742 });
2743 return is_token_;
2744 }
2745
2746 std::string name;
2747 const char *s_ = nullptr;
2748 std::pair<size_t, size_t> line_ = {1, 1};
2749
2751
2752 size_t id = 0;
2754 std::function<void(const Context &c, const char *s, size_t n, std::any &dt)>
2756 std::function<void(const Context &c, const char *s, size_t n, size_t matchlen,
2757 std::any &value, std::any &dt)>
2760 std::shared_ptr<Ope> whitespaceOpe;
2761 std::shared_ptr<Ope> wordOpe;
2763 bool is_macro = false;
2764 std::vector<std::string> params;
2765 bool disable_action = false;
2766 bool is_left_recursive = false;
2767 bool can_be_empty = false;
2768
2771 bool verbose_trace = false;
2774
2775 std::string error_message;
2776 bool no_ast_opt = false;
2777
2778 bool eoi_check = true;
2779
2780 // Per-rule packrat stats (optional, for profiling)
2781 mutable bool collect_packrat_stats = false;
2782 mutable std::vector<Context::PackratStats> packrat_stats_;
2783
2784private:
2785 friend class Reference;
2786 friend class ParserGenerator;
2787
2790
2792 std::call_once(definition_ids_init_, [&]() {
2794 holder_->accept(vis);
2795 if (whitespaceOpe) { whitespaceOpe->accept(vis); }
2796 if (wordOpe) { wordOpe->accept(vis); }
2797 definition_ids_.swap(vis.ids);
2798 });
2799 }
2800
2801 void initialize_packrat_filter() const;
2802
2803 Result parse_core(const char *s, size_t n, SemanticValues &vs, std::any &dt,
2804 const char *path, Log log) const {
2806
2807 std::shared_ptr<Ope> ope = holder_;
2808
2809 std::any trace_data;
2810 if (tracer_start) { tracer_start(trace_data); }
2811 auto se = scope_exit([&]() {
2812 if (tracer_end) { tracer_end(trace_data); }
2813 });
2814
2815 Context c(path, s, n, definition_ids_.size(), whitespaceOpe, wordOpe,
2817 verbose_trace, log);
2818
2820 packrat_stats_.resize(definition_ids_.size());
2821 c.packrat_stats = &packrat_stats_;
2822 }
2823
2826 if (!packrat_filter_.empty()) {
2827 c.packrat_rule_filter = &packrat_filter_;
2828 }
2829 }
2830
2831 size_t i = 0;
2832
2833 if (whitespaceOpe) {
2834 auto save_ignore_trace_state = c.ignore_trace_state;
2835 c.ignore_trace_state = !c.verbose_trace;
2836 auto se =
2837 scope_exit([&]() { c.ignore_trace_state = save_ignore_trace_state; });
2838
2839 auto len = whitespaceOpe->parse(s, n, vs, c, dt);
2840 if (fail(len)) { return Result{false, c.recovered, i, c.error_info}; }
2841
2842 i = len;
2843 }
2844
2845 auto len = ope->parse(s + i, n - i, vs, c, dt);
2846 auto ret = success(len);
2847 if (ret) {
2848 i += len;
2849 if (eoi_check) {
2850 if (i < n) {
2851 if (c.error_info.error_pos - c.s < s + i - c.s) {
2852 c.error_info.message_pos = s + i;
2853 c.error_info.message = "expected end of input";
2854 }
2855 ret = false;
2856 }
2857 }
2858 }
2859 return Result{ret, c.recovered, i, c.error_info};
2860 }
2861
2862 std::shared_ptr<Holder> holder_;
2863 mutable std::once_flag is_token_init_;
2864 mutable bool is_token_ = false;
2865 mutable std::once_flag assign_id_to_definition_init_;
2866 mutable std::once_flag definition_ids_init_;
2867 mutable std::unordered_map<void *, size_t> definition_ids_;
2868 mutable std::once_flag packrat_filter_init_;
2869 mutable std::vector<bool> packrat_filter_;
2870};
2871
2872/*
2873 * Implementations
2874 */
2875
2876inline size_t parse_literal(const char *s, size_t n, SemanticValues &vs,
2877 Context &c, std::any &dt, const std::string &lit,
2878 std::once_flag &init_is_word, bool &is_word,
2879 bool ignore_case, const std::string &lower_lit) {
2880 size_t i = 0;
2881 for (; i < lit.size(); i++) {
2882 if (i >= n ||
2883 (ignore_case ? (static_cast<char>(std::tolower(
2884 static_cast<unsigned char>(s[i]))) != lower_lit[i])
2885 : (s[i] != lit[i]))) {
2886 c.set_error_pos(s, lit.data());
2887 return static_cast<size_t>(-1);
2888 }
2889 }
2890
2891 // Word check
2892 if (c.wordOpe) {
2893 auto save_ignore_trace_state = c.ignore_trace_state;
2895 auto se =
2896 scope_exit([&]() { c.ignore_trace_state = save_ignore_trace_state; });
2897
2898 std::call_once(init_is_word, [&]() {
2899 SemanticValues dummy_vs;
2900 Context dummy_c(nullptr, c.s, c.l, 0, nullptr, nullptr, false, nullptr,
2901 nullptr, nullptr, false, nullptr);
2902 std::any dummy_dt;
2903
2904 auto len =
2905 c.wordOpe->parse(lit.data(), lit.size(), dummy_vs, dummy_c, dummy_dt);
2906 is_word = success(len);
2907 });
2908
2909 if (is_word) {
2910 SemanticValues dummy_vs;
2911 Context dummy_c(nullptr, c.s, c.l, 0, nullptr, nullptr, false, nullptr,
2912 nullptr, nullptr, false, nullptr);
2913 std::any dummy_dt;
2914
2915 NotPredicate ope(c.wordOpe);
2916 auto len = ope.parse(s + i, n - i, dummy_vs, dummy_c, dummy_dt);
2917 if (fail(len)) {
2918 c.set_error_pos(s, lit.data());
2919 return len;
2920 }
2921 i += len;
2922 }
2923 }
2924
2925 // Skip whitespace
2926 auto wl = c.skip_whitespace(s + i, n - i, vs, dt);
2927 if (fail(wl)) { return wl; }
2928 i += wl;
2929
2930 return i;
2931}
2932
2933inline std::pair<size_t, size_t> SemanticValues::line_info() const {
2934 assert(c_);
2935 return c_->line_info(sv_.data());
2936}
2937
2938inline void ErrorInfo::output_log(const Log &log, const char *s, size_t n) {
2939 if (message_pos) {
2942 auto line = line_info(s, message_pos);
2943 std::string msg;
2944 if (auto unexpected_token = heuristic_error_token(s, n, message_pos);
2945 !unexpected_token.empty()) {
2946 msg = replace_all(message, "%t", unexpected_token);
2947
2948 auto unexpected_char = unexpected_token.substr(
2949 0,
2950 codepoint_length(unexpected_token.data(), unexpected_token.size()));
2951
2952 msg = replace_all(msg, "%c", unexpected_char);
2953 } else {
2954 msg = message;
2955 }
2956 log(line.first, line.second, msg, label);
2957 }
2958 } else if (error_pos) {
2959 if (error_pos > last_output_pos) {
2961 auto line = line_info(s, error_pos);
2962
2963 std::string msg;
2964 if (expected_tokens.empty()) {
2965 msg = "syntax error.";
2966 } else {
2967 msg = "syntax error";
2968
2969 // unexpected token
2970 if (auto unexpected_token = heuristic_error_token(s, n, error_pos);
2971 !unexpected_token.empty()) {
2972 msg += ", unexpected '";
2973 msg += unexpected_token;
2974 msg += "'";
2975 }
2976
2977 auto first_item = true;
2978 size_t i = 0;
2979 while (i < expected_tokens.size()) {
2980 auto [error_literal, error_rule] = expected_tokens[i];
2981
2982 // Skip rules start with '_'
2983 if (!(error_rule && error_rule->name[0] == '_')) {
2984 msg += (first_item ? ", expecting " : ", ");
2985 if (error_literal) {
2986 msg += "'";
2987 msg += error_literal;
2988 msg += "'";
2989 } else {
2990 msg += "<" + error_rule->name + ">";
2991 if (label.empty()) { label = error_rule->name; }
2992 }
2993 first_item = false;
2994 }
2995
2996 i++;
2997 }
2998 msg += ".";
2999 }
3000 log(line.first, line.second, msg, label);
3001 }
3002 }
3003}
3004
3005inline size_t Context::skip_whitespace(const char *a_s, size_t n,
3006 SemanticValues &vs, std::any &dt) {
3007 if (in_token_boundary_count || !whitespaceOpe) { return 0; }
3008 auto save = ignore_trace_state;
3010 auto se = scope_exit([&]() { ignore_trace_state = save; });
3011 return whitespaceOpe->parse(a_s, n, vs, *this, dt);
3012}
3013
3014inline void Context::set_error_pos(const char *a_s, const char *literal) {
3015 if (log) {
3016 if (error_info.error_pos <= a_s) {
3017 if (error_info.error_pos < a_s || !error_info.keep_previous_token) {
3018 error_info.error_pos = a_s;
3019 error_info.expected_tokens.clear();
3020 }
3021
3022 const char *error_literal = nullptr;
3023 const Definition *error_rule = nullptr;
3024
3025 if (literal) {
3026 error_literal = literal;
3027 } else if (!rule_stack.empty()) {
3028 auto rule = rule_stack.back();
3029 auto ope = rule->get_core_operator();
3030 if (auto token = FindLiteralToken::token(*ope);
3031 token && token[0] != '\0') {
3032 error_literal = token;
3033 }
3034 }
3035
3036 for (auto r : rule_stack) {
3037 error_rule = r;
3038 if (r->is_token()) { break; }
3039 }
3040
3041 if (error_literal || error_rule) {
3042 error_info.add(error_literal, error_rule);
3043 }
3044 }
3045 }
3046}
3047
3048inline void Context::trace_enter(const Ope &ope, const char *a_s, size_t n,
3049 const SemanticValues &vs, std::any &dt) {
3050 trace_ids.push_back(next_trace_id++);
3051 tracer_enter(ope, a_s, n, vs, *this, dt, trace_data);
3052}
3053
3054inline void Context::trace_leave(const Ope &ope, const char *a_s, size_t n,
3055 const SemanticValues &vs, std::any &dt,
3056 size_t len) {
3057 tracer_leave(ope, a_s, n, vs, *this, dt, len, trace_data);
3058 trace_ids.pop_back();
3059}
3060
3061inline bool Context::is_traceable(const Ope &ope) const {
3062 if (tracer_enter && tracer_leave) {
3063 if (ignore_trace_state) { return false; }
3064 return !dynamic_cast<const peg::Reference *>(&ope);
3065 }
3066 return false;
3067}
3068
3069inline size_t Ope::parse(const char *s, size_t n, SemanticValues &vs,
3070 Context &c, std::any &dt) const {
3071 if (c.is_traceable(*this)) {
3072 c.trace_enter(*this, s, n, vs, dt);
3073 auto len = parse_core(s, n, vs, c, dt);
3074 c.trace_leave(*this, s, n, vs, dt, len);
3075 return len;
3076 }
3077 return parse_core(s, n, vs, c, dt);
3078}
3079
3080inline size_t Dictionary::parse_core(const char *s, size_t n,
3081 SemanticValues &vs, Context &c,
3082 std::any &dt) const {
3083 size_t id;
3084 auto i = trie_.match(s, n, id);
3085
3086 if (i == 0) {
3087 c.set_error_pos(s);
3088 return static_cast<size_t>(-1);
3089 }
3090
3091 vs.choice_count_ = trie_.items_count();
3092 vs.choice_ = id;
3093
3094 // Word check
3095 if (c.wordOpe) {
3096 auto save_ignore_trace_state = c.ignore_trace_state;
3098 auto se =
3099 scope_exit([&]() { c.ignore_trace_state = save_ignore_trace_state; });
3100
3101 {
3102 SemanticValues dummy_vs;
3103 Context dummy_c(nullptr, c.s, c.l, 0, nullptr, nullptr, false, nullptr,
3104 nullptr, nullptr, false, nullptr);
3105 std::any dummy_dt;
3106
3107 NotPredicate ope(c.wordOpe);
3108 auto len = ope.parse(s + i, n - i, dummy_vs, dummy_c, dummy_dt);
3109 if (fail(len)) {
3110 c.set_error_pos(s);
3111 return len;
3112 }
3113 i += len;
3114 }
3115 }
3116
3117 // Skip whitespace
3118 auto wl = c.skip_whitespace(s + i, n - i, vs, dt);
3119 if (fail(wl)) { return wl; }
3120 i += wl;
3121
3122 return i;
3123}
3124
3125inline size_t LiteralString::parse_core(const char *s, size_t n,
3126 SemanticValues &vs, Context &c,
3127 std::any &dt) const {
3128 return parse_literal(s, n, vs, c, dt, lit_, init_is_word_, is_word_,
3130}
3131
3132inline size_t TokenBoundary::parse_core(const char *s, size_t n,
3133 SemanticValues &vs, Context &c,
3134 std::any &dt) const {
3135 auto save_ignore_trace_state = c.ignore_trace_state;
3137 auto se =
3138 scope_exit([&]() { c.ignore_trace_state = save_ignore_trace_state; });
3139
3140 size_t len;
3141 {
3143 auto se = scope_exit([&]() { c.in_token_boundary_count--; });
3144 len = ope_->parse(s, n, vs, c, dt);
3145 }
3146
3147 if (success(len)) {
3148 vs.tokens.emplace_back(std::string_view(s, len));
3149
3150 auto wl = c.skip_whitespace(s + len, n - len, vs, dt);
3151 if (fail(wl)) { return wl; }
3152 len += wl;
3153 }
3154 return len;
3155}
3156
3157inline size_t Holder::parse_core(const char *s, size_t n, SemanticValues &vs,
3158 Context &c, std::any &dt) const {
3159 if (!ope_) {
3160 throw std::logic_error("Uninitialized definition ope was used...");
3161 }
3162
3163 // Macro reference
3164 if (outer_->is_macro) {
3165 c.rule_stack.push_back(outer_);
3166 auto len = ope_->parse(s, n, vs, c, dt);
3167 c.rule_stack.pop_back();
3168 return len;
3169 }
3170
3171 size_t len;
3172 std::any val;
3173
3174 // Shared parse body: invokes enter/leave callbacks, parses the rule's
3175 // operator, handles actions/predicates/errors, and calls reduce.
3176 // Returns {parse_len, parse_val}.
3177 auto do_parse = [&]() {
3178 size_t parse_len;
3179 std::any parse_val;
3180
3181 if (outer_->enter) { outer_->enter(c, s, n, dt); }
3182 auto &chvs = c.push_semantic_values_scope();
3183 auto se = scope_exit([&]() {
3185 if (outer_->leave) { outer_->leave(c, s, n, parse_len, parse_val, dt); }
3186 });
3187
3188 c.rule_stack.push_back(outer_);
3189 parse_len = ope_->parse(s, n, chvs, c, dt);
3190 c.rule_stack.pop_back();
3191
3192 if (success(parse_len)) {
3193 chvs.sv_ = std::string_view(s, parse_len);
3194 chvs.name_ = outer_->name;
3195
3196 auto ope_ptr = ope_.get();
3197 if (ope_ptr->is_token_boundary) {
3198 ope_ptr = static_cast<const peg::TokenBoundary *>(ope_ptr)->ope_.get();
3199 }
3200 if (!ope_ptr->is_choice_like) {
3201 chvs.choice_count_ = 0;
3202 chvs.choice_ = 0;
3203 }
3204
3205 std::string msg;
3206 std::any predicate_data;
3207 if (outer_->predicate) {
3208 if (!outer_->predicate(chvs, dt, msg, predicate_data)) {
3209 if (c.log && !msg.empty() && c.error_info.message_pos < s) {
3210 c.error_info.message_pos = s;
3211 c.error_info.message = msg;
3212 c.error_info.label = outer_->name;
3213 }
3214 parse_len = static_cast<size_t>(-1);
3215 }
3216 }
3217
3218 if (success(parse_len)) {
3219 if (!c.recovered) { parse_val = reduce(chvs, dt, predicate_data); }
3220 } else {
3221 if (c.log && !msg.empty() && c.error_info.message_pos < s) {
3222 c.error_info.message_pos = s;
3223 c.error_info.message = msg;
3224 c.error_info.label = outer_->name;
3225 }
3226 }
3227 } else {
3228 if (c.log && !outer_->error_message.empty() &&
3229 c.error_info.message_pos < s) {
3230 c.error_info.message_pos = s;
3231 c.error_info.message = outer_->error_message;
3232 c.error_info.label = outer_->name;
3233 }
3234 }
3235
3236 return std::make_pair(parse_len, std::move(parse_val));
3237 };
3238
3239 if (outer_->is_left_recursive) {
3240 auto lr_key = std::make_pair(outer_, s);
3241
3242 // Check LR memo first
3243 auto it = c.lr_memo.find(lr_key);
3244 if (it != c.lr_memo.end()) {
3245 if (success(it->second.len)) {
3246 len = it->second.len;
3247 val = it->second.val;
3248 } else {
3249 len = static_cast<size_t>(-1);
3250 }
3251 // Record that this rule's lr_memo was accessed.
3252 // Any LR rule currently seeding will know we're in its cycle.
3253 c.lr_refs_hit.insert(outer_);
3254 } else {
3255 // Seed with FAIL
3256 c.lr_memo[lr_key] = {static_cast<size_t>(-1), {}};
3257
3258 // Mark as active seed (protects our lr_memo from inner growers)
3259 c.lr_active_seeds.insert(lr_key);
3260 auto seed_guard = scope_exit([&]() { c.lr_active_seeds.erase(lr_key); });
3261
3262 // Track which LR rules are referenced during our parse
3263 // to identify cycle members
3264 auto saved_refs = std::move(c.lr_refs_hit);
3265 c.lr_refs_hit.clear();
3266
3267 // Initial parse (self-references will hit the FAIL seed)
3268 auto [initial_len, initial_val] = do_parse();
3269
3270 // Rules whose lr_memo was hit during our parse are in our cycle.
3271 // If we detected cycle members, we ourselves are also part of
3272 // the cycle, so add self — this lets parent seeders see us as
3273 // a transitive cycle member.
3274 auto cycle_rules = c.lr_refs_hit;
3275 if (!cycle_rules.empty()) { cycle_rules.insert(outer_); }
3276
3277 // Restore parent's refs and propagate cycle info upward
3278 c.lr_refs_hit = std::move(saved_refs);
3279 c.lr_refs_hit.insert(cycle_rules.begin(), cycle_rules.end());
3280
3281 if (!success(initial_len)) {
3282 // Keep FAIL in lr_memo so we don't re-seed
3283 len = static_cast<size_t>(-1);
3284 } else {
3285 // Got initial seed, now grow
3286 len = initial_len;
3287 val = std::move(initial_val);
3288 c.lr_memo[lr_key] = {len, val};
3289
3290 while (true) {
3291 // Clear this rule's packrat cache
3292 c.clear_packrat_cache(s, outer_->id);
3293
3294 // Clear lr_memo for cycle-dependent rules at this position,
3295 // but NOT for rules currently in their own seeding phase
3296 // (lr_active_seeds) — those are outer growers we must not
3297 // interfere with.
3298 for (auto memo_it = c.lr_memo.begin(); memo_it != c.lr_memo.end();) {
3299 if (memo_it->first.second == s && memo_it->first.first != outer_ &&
3300 cycle_rules.count(memo_it->first.first) &&
3301 !c.lr_active_seeds.count(memo_it->first)) {
3302 memo_it = c.lr_memo.erase(memo_it);
3303 } else {
3304 ++memo_it;
3305 }
3306 }
3307
3308 auto [new_len, new_val] = do_parse();
3309
3310 if (!success(new_len) || new_len <= len) {
3311 break; // No improvement, done growing
3312 }
3313
3314 len = new_len;
3315 val = std::move(new_val);
3316 c.lr_memo[lr_key] = {len, val};
3317 }
3318 }
3319
3320 // Write final result to packrat cache (lr_memo entry is kept as
3321 // the primary lookup for LR rules at this position)
3322 if (success(len)) { c.write_packrat_cache(s, outer_->id, len, val); }
3323 }
3324 } else {
3325 if (c.enablePackratParsing) {
3326 // Packrat cache acts as re-entry guard (pre-registered as
3327 // failure before fn is called).
3328 c.packrat(s, outer_->id, len, val, [&](std::any &a_val) {
3329 auto [parse_len, parse_val] = do_parse();
3330 len = parse_len;
3331 if (success(len)) { a_val = std::move(parse_val); }
3332 });
3333 } else {
3334 // Without packrat, use lr_memo as re-entry guard to prevent
3335 // stack overflow from undetected left recursion.
3336 auto guard_key = std::make_pair(outer_, s);
3337 if (c.lr_memo.count(guard_key)) {
3338 len = static_cast<size_t>(-1);
3339 } else {
3340 c.lr_memo[guard_key] = {static_cast<size_t>(-1), {}};
3341 auto [parse_len, parse_val] = do_parse();
3342 len = parse_len;
3343 val = std::move(parse_val);
3344 c.lr_memo.erase(guard_key);
3345 }
3346 }
3347 }
3348
3349 if (success(len)) {
3350 if (!outer_->ignoreSemanticValue) {
3351 vs.emplace_back(std::move(val));
3352 vs.tags.emplace_back(str2tag(outer_->name));
3353 }
3354 }
3355
3356 return len;
3357}
3358
3359inline std::any Holder::reduce(SemanticValues &vs, std::any &dt,
3360 const std::any &predicate_data) const {
3361 if (outer_->action && !outer_->disable_action) {
3362 return outer_->action(vs, dt, predicate_data);
3363 } else if (vs.empty()) {
3364 return std::any();
3365 } else {
3366 return std::move(vs.front());
3367 }
3368}
3369
3370inline const std::string &Holder::name() const { return outer_->name; }
3371
3372inline const std::string &Holder::trace_name() const {
3373 std::call_once(trace_name_init_,
3374 [this]() { trace_name_ = "[" + outer_->name + "]"; });
3375 return trace_name_;
3376}
3377
3378inline size_t Reference::parse_core(const char *s, size_t n, SemanticValues &vs,
3379 Context &c, std::any &dt) const {
3380 auto save_ignore_trace_state = c.ignore_trace_state;
3381 if (rule_ && rule_->ignoreSemanticValue) {
3383 }
3384 auto se =
3385 scope_exit([&]() { c.ignore_trace_state = save_ignore_trace_state; });
3386
3387 if (rule_) {
3388 // Reference rule
3389 if (rule_->is_macro) {
3390 // Macro
3391 FindReference vis(c.top_args(), c.rule_stack.back()->params);
3392
3393 // Collect arguments
3394 std::vector<std::shared_ptr<Ope>> args;
3395 for (const auto &arg : args_) {
3396 arg->accept(vis);
3397 args.emplace_back(std::move(vis.found_ope));
3398 }
3399
3400 c.push_args(std::move(args));
3401 auto se = scope_exit([&]() { c.pop_args(); });
3402 return rule_->holder_->parse(s, n, vs, c, dt);
3403 } else {
3404 // Definition
3405 c.push_args(std::vector<std::shared_ptr<Ope>>());
3406 auto se2 = scope_exit([&]() { c.pop_args(); });
3407 return rule_->holder_->parse(s, n, vs, c, dt);
3408 }
3409 } else {
3410 // Reference parameter in macro
3411 const auto &args = c.top_args();
3412 return args[iarg_]->parse(s, n, vs, c, dt);
3413 }
3414}
3415
3416inline std::shared_ptr<Ope> Reference::get_core_operator() const {
3417 return rule_->holder_;
3418}
3419
3420inline size_t BackReference::parse_core(const char *s, size_t n,
3421 SemanticValues &vs, Context &c,
3422 std::any &dt) const {
3423 for (auto it = c.capture_entries.rbegin(); it != c.capture_entries.rend();
3424 ++it) {
3425 if (it->first == name_) {
3426 const auto &lit = it->second;
3427 std::once_flag init_is_word;
3428 auto is_word = false;
3429 static const std::string empty;
3430 return parse_literal(s, n, vs, c, dt, lit, init_is_word, is_word, false,
3431 empty);
3432 }
3433 }
3434
3435 c.error_info.message_pos = s;
3436 c.error_info.message = "undefined back reference '$" + name_ + "'...";
3437 return static_cast<size_t>(-1);
3438}
3439
3440inline Definition &
3442 if (rule_.is_macro) {
3443 // Reference parameter in macro
3444 const auto &args = c.top_args();
3445 auto iarg = dynamic_cast<Reference &>(*binop_).iarg_;
3446 auto arg = args[iarg];
3447 return *dynamic_cast<Reference &>(*arg).rule_;
3448 }
3449
3450 return *dynamic_cast<Reference &>(*binop_).rule_;
3451}
3452
3453inline size_t PrecedenceClimbing::parse_expression(const char *s, size_t n,
3454 SemanticValues &vs,
3455 Context &c, std::any &dt,
3456 size_t min_prec) const {
3457 auto len = atom_->parse(s, n, vs, c, dt);
3458 if (fail(len)) { return len; }
3459
3460 std::string tok;
3461 auto &rule = get_reference_for_binop(c);
3462 auto action = std::move(rule.action);
3463
3464 rule.action = [&](SemanticValues &vs2, std::any &dt2,
3465 const std::any &predicate_data2) {
3466 tok = vs2.token();
3467 if (action) {
3468 return action(vs2, dt2, predicate_data2);
3469 } else if (!vs2.empty()) {
3470 return vs2[0];
3471 }
3472 return std::any();
3473 };
3474 auto action_se = scope_exit([&]() { rule.action = std::move(action); });
3475
3476 auto i = len;
3477 while (i < n) {
3478 std::vector<std::any> save_values(vs.begin(), vs.end());
3479 auto save_tokens = vs.tokens;
3480
3481 auto chvs = c.push_semantic_values_scope();
3482 auto chlen = binop_->parse(s + i, n - i, chvs, c, dt);
3484
3485 if (fail(chlen)) { break; }
3486
3487 auto it = info_.find(tok);
3488 if (it == info_.end()) { break; }
3489
3490 auto level = std::get<0>(it->second);
3491 auto assoc = std::get<1>(it->second);
3492
3493 if (level < min_prec) { break; }
3494
3495 vs.emplace_back(std::move(chvs[0]));
3496 i += chlen;
3497
3498 auto next_min_prec = level;
3499 if (assoc == 'L') { next_min_prec = level + 1; }
3500
3501 chvs = c.push_semantic_values_scope();
3502 chlen = parse_expression(s + i, n - i, chvs, c, dt, next_min_prec);
3504
3505 if (fail(chlen)) {
3506 vs.assign(save_values.begin(), save_values.end());
3507 vs.tokens = save_tokens;
3508 i = chlen;
3509 break;
3510 }
3511
3512 vs.emplace_back(std::move(chvs[0]));
3513 i += chlen;
3514
3515 std::any val;
3516 if (rule_.action) {
3517 vs.sv_ = std::string_view(s, i);
3518 static const std::any empty_predicate_data;
3519 val = rule_.action(vs, dt, empty_predicate_data);
3520 } else if (!vs.empty()) {
3521 val = vs[0];
3522 }
3523 vs.clear();
3524 vs.emplace_back(std::move(val));
3525 }
3526
3527 return i;
3528}
3529
3530inline size_t Recovery::parse_core(const char *s, size_t n,
3531 SemanticValues & /*vs*/, Context &c,
3532 std::any & /*dt*/) const {
3533 const auto &rule = dynamic_cast<Reference &>(*ope_);
3534
3535 // Custom error message
3536 if (c.log) {
3537 auto label = dynamic_cast<Reference *>(rule.args_[0].get());
3538 if (label && !label->rule_->error_message.empty()) {
3539 c.error_info.message_pos = s;
3540 c.error_info.message = label->rule_->error_message;
3541 c.error_info.label = label->rule_->name;
3542 }
3543 }
3544
3545 // Recovery
3546 auto len = static_cast<size_t>(-1);
3547 {
3548 auto save_log = c.log;
3549 c.log = nullptr;
3550 auto se = scope_exit([&]() { c.log = save_log; });
3551
3552 SemanticValues dummy_vs;
3553 std::any dummy_dt;
3554
3555 len = rule.parse(s, n, dummy_vs, c, dummy_dt);
3556 }
3557
3558 if (success(len)) {
3559 c.recovered = true;
3560
3561 if (c.log) {
3562 c.error_info.output_log(c.log, c.s, c.l);
3563 c.error_info.clear();
3564 }
3565 }
3566
3567 // Cut
3568 if (!c.cut_stack.empty()) {
3569 c.cut_stack.back() = true;
3570
3571 if (c.cut_stack.size() == 1) {
3572 // TODO: Remove unneeded entries in packrat memoise table
3573 }
3574 }
3575
3576 return len;
3577}
3578
3579inline void Sequence::accept(Visitor &v) { v.visit(*this); }
3580inline void PrioritizedChoice::accept(Visitor &v) { v.visit(*this); }
3581inline void Repetition::accept(Visitor &v) { v.visit(*this); }
3582inline void AndPredicate::accept(Visitor &v) { v.visit(*this); }
3583inline void NotPredicate::accept(Visitor &v) { v.visit(*this); }
3584inline void Dictionary::accept(Visitor &v) { v.visit(*this); }
3585inline void LiteralString::accept(Visitor &v) { v.visit(*this); }
3586inline void CharacterClass::accept(Visitor &v) { v.visit(*this); }
3587inline void Character::accept(Visitor &v) { v.visit(*this); }
3588inline void AnyCharacter::accept(Visitor &v) { v.visit(*this); }
3589inline void CaptureScope::accept(Visitor &v) { v.visit(*this); }
3590inline void Capture::accept(Visitor &v) { v.visit(*this); }
3591inline void TokenBoundary::accept(Visitor &v) { v.visit(*this); }
3592inline void Ignore::accept(Visitor &v) { v.visit(*this); }
3593inline void User::accept(Visitor &v) { v.visit(*this); }
3594inline void WeakHolder::accept(Visitor &v) { v.visit(*this); }
3595inline void Holder::accept(Visitor &v) { v.visit(*this); }
3596inline void Reference::accept(Visitor &v) { v.visit(*this); }
3597inline void Whitespace::accept(Visitor &v) { v.visit(*this); }
3598inline void BackReference::accept(Visitor &v) { v.visit(*this); }
3599inline void PrecedenceClimbing::accept(Visitor &v) { v.visit(*this); }
3600inline void Recovery::accept(Visitor &v) { v.visit(*this); }
3601inline void Cut::accept(Visitor &v) { v.visit(*this); }
3602
3604 auto p = static_cast<void *>(ope.outer_);
3605 if (ids.count(p)) { return; }
3606 auto id = ids.size();
3607 ids[p] = id;
3608 ope.outer_->id = id;
3609 ope.ope_->accept(*this);
3610}
3611
3613 if (ope.rule_) {
3614 for (const auto &arg : ope.args_) {
3615 arg->accept(*this);
3616 }
3617 ope.rule_->accept(*this);
3618 }
3619}
3620
3622 ope.atom_->accept(*this);
3623 ope.binop_->accept(*this);
3624}
3625
3627 if (ope.is_macro_) {
3628 for (const auto &arg : ope.args_) {
3629 arg->accept(*this);
3630 }
3631 } else {
3632 has_rule_ = true;
3633 }
3634}
3635
3637 if (ope.is_macro_) {
3638 ope.rule_->accept(*this);
3639 for (const auto &arg : ope.args_) {
3640 arg->accept(*this);
3641 }
3642 }
3643}
3644
3646 result = ope.rule_ && ope.rule_->can_be_empty;
3647}
3648
3650 if (ope.name_ == name_) {
3651 error_s = ope.s_;
3652 } else if (!ope.rule_ && !macro_args_stack_.empty()) {
3653 // Macro parameter reference: resolve through nested macro arg
3654 // stacks (e.g. B(X) <- C(X) where X is itself a param ref).
3655 auto resolved = resolve_macro_arg(ope.iarg_);
3656 if (resolved) {
3657 resolved->accept(*this);
3658 if (done_ == false) { return; }
3659 }
3660 } else if (!refs_.count(ope.name_)) {
3661 refs_.insert(ope.name_);
3662 if (ope.rule_) {
3663 if (ope.is_macro_) { macro_args_stack_.push_back(&ope.args_); }
3664 ope.rule_->accept(*this);
3665 if (ope.is_macro_) { macro_args_stack_.pop_back(); }
3666 if (done_ == false) { return; }
3667 }
3668 }
3669 // If the referenced rule can match empty, don't mark as done —
3670 // the sequence may continue past this element to find LR.
3671 if (!ope.rule_ && !macro_args_stack_.empty()) {
3672 auto resolved = resolve_macro_arg(ope.iarg_);
3673 if (resolved) {
3675 resolved->accept(cbe);
3676 done_ = !cbe.result;
3677 } else {
3678 done_ = true;
3679 }
3680 } else {
3681 done_ = !(ope.rule_ && ope.rule_->can_be_empty);
3682 }
3683}
3684
3685inline std::shared_ptr<Ope>
3687 for (int i = static_cast<int>(macro_args_stack_.size()) - 1; i >= 0; i--) {
3688 auto &args = *macro_args_stack_[i];
3689 if (iarg >= args.size()) { return nullptr; }
3690 auto ref = dynamic_cast<Reference *>(args[iarg].get());
3691 if (ref && !ref->rule_) {
3692 // Another param ref — resolve using parent level's args
3693 iarg = ref->iarg_;
3694 continue;
3695 }
3696 return args[iarg];
3697 }
3698 return nullptr;
3699}
3700
3702 auto save_is_empty = false;
3703 const char *save_error_s = nullptr;
3704 std::string save_error_name;
3705
3706 auto it = ope.opes_.begin();
3707 while (it != ope.opes_.end()) {
3708 (*it)->accept(*this);
3709 if (!is_empty) {
3710 ++it;
3711 while (it != ope.opes_.end()) {
3713 (*it)->accept(vis);
3714 if (vis.has_error) {
3715 is_empty = true;
3716 error_s = vis.error_s;
3717 error_name = vis.error_name;
3718 }
3719 ++it;
3720 }
3721 return;
3722 }
3723
3724 save_is_empty = is_empty;
3725 save_error_s = error_s;
3726 save_error_name = error_name;
3727
3728 is_empty = false;
3729 error_name.clear();
3730 ++it;
3731 }
3732
3733 is_empty = save_is_empty;
3734 error_s = save_error_s;
3735 error_name = save_error_name;
3736}
3737
3739 auto it = std::find_if(refs_.begin(), refs_.end(),
3740 [&](const std::pair<const char *, std::string> &ref) {
3741 return ope.name_ == ref.second;
3742 });
3743 if (it != refs_.end()) { return; }
3744
3745 if (ope.rule_) {
3746 refs_.emplace_back(ope.s_, ope.name_);
3747 ope.rule_->accept(*this);
3748 refs_.pop_back();
3749 }
3750}
3751
3753 auto it = std::find_if(refs_.begin(), refs_.end(),
3754 [&](const std::pair<const char *, std::string> &ref) {
3755 return ope.name_ == ref.second;
3756 });
3757 if (it != refs_.end()) { return; }
3758
3759 if (ope.rule_) {
3760 auto it = has_error_cache_.find(ope.name_);
3761 if (it != has_error_cache_.end()) {
3762 has_error = it->second;
3763 } else {
3764 refs_.emplace_back(ope.s_, ope.name_);
3765 ope.rule_->accept(*this);
3766 refs_.pop_back();
3768 }
3769 }
3770
3771 if (ope.is_macro_) {
3772 for (const auto &arg : ope.args_) {
3773 arg->accept(*this);
3774 }
3775 }
3776}
3777
3779 auto it = std::find(params_.begin(), params_.end(), ope.name_);
3780 if (it != params_.end()) { return; }
3781
3782 if (!grammar_.count(ope.name_)) {
3783 error_s[ope.name_] = ope.s_;
3784 error_message[ope.name_] = "'" + ope.name_ + "' is not defined.";
3785 } else {
3786 if (!referenced.count(ope.name_)) { referenced.insert(ope.name_); }
3787 const auto &rule = grammar_.at(ope.name_);
3788 if (rule.is_macro) {
3789 if (!ope.is_macro_ || ope.args_.size() != rule.params.size()) {
3790 error_s[ope.name_] = ope.s_;
3791 error_message[ope.name_] = "incorrect number of arguments.";
3792 }
3793 } else if (ope.is_macro_) {
3794 error_s[ope.name_] = ope.s_;
3795 error_message[ope.name_] = "'" + ope.name_ + "' is not macro.";
3796 }
3797 for (const auto &arg : ope.args_) {
3798 arg->accept(*this);
3799 }
3800 }
3801}
3802
3804 if (!ope.rule_) {
3805 // Macro parameter reference — can't predict what it will match
3806 result_.any_char = true;
3807 return;
3808 }
3809 if (refs_.count(ope.name_)) { return; }
3810 refs_.insert(ope.name_);
3811 ope.rule_->accept(*this);
3812 if (!result_.first_rule && ope.rule_->is_token()) {
3813 result_.first_rule = ope.rule_;
3814 }
3815 refs_.erase(ope.name_);
3816}
3817
3819 if (!ope.rule_ || refs_.count(ope.name_)) { return; }
3820 refs_.insert(ope.name_);
3821 ope.rule_->accept(*this);
3822 refs_.erase(ope.name_);
3823}
3824
3826 ope.kw_guard_.reset();
3828 for (const auto &op : ope.opes_) {
3829 op->accept(*this);
3830 }
3831}
3832
3834 // Detect pattern: NotPredicate(Reference→PrioritizedChoice<literals>)
3835 // TokenBoundary(Sequence[CharacterClass,
3836 // Repetition(CharacterClass)])
3837 // This is the pattern used by: PlainIdentifier <- !ReservedKeyword
3838 // <[a-z_]i[a-z0-9_]i*>
3839 if (seq.opes_.size() != 2) { return; }
3840
3841 // Child 0 must be NotPredicate
3842 auto *not_pred = dynamic_cast<NotPredicate *>(seq.opes_[0].get());
3843 if (!not_pred) { return; }
3844
3845 // NotPredicate's child must be Reference to a rule
3846 auto *ref = dynamic_cast<Reference *>(not_pred->ope_.get());
3847 if (!ref || !ref->rule_) { return; }
3848
3849 // The referenced rule's inner operator (Holder) must contain
3850 // PrioritizedChoice
3851 auto *holder = dynamic_cast<Holder *>(ref->get_core_operator().get());
3852 if (!holder) { return; }
3853 auto *choice = dynamic_cast<PrioritizedChoice *>(holder->ope_.get());
3854 if (!choice) { return; }
3855
3856 // Extract keywords from PrioritizedChoice alternatives
3857 std::vector<std::string> exact_keywords;
3858 std::vector<std::string> prefix_keywords;
3859
3860 for (const auto &alt : choice->opes_) {
3861 auto *lit = dynamic_cast<LiteralString *>(alt.get());
3862 if (lit) {
3863 if (!lit->ignore_case_) { return; }
3864 exact_keywords.push_back(to_lower(lit->lit_));
3865 continue;
3866 }
3867 // Check for compound keyword (Sequence of LiteralStrings)
3868 auto *sub_seq = dynamic_cast<Sequence *>(alt.get());
3869 if (sub_seq && !sub_seq->opes_.empty()) {
3870 auto *first_lit = dynamic_cast<LiteralString *>(sub_seq->opes_[0].get());
3871 if (first_lit) {
3872 auto all_ignore_case_lits =
3873 std::all_of(sub_seq->opes_.begin(), sub_seq->opes_.end(),
3874 [](const auto &child) {
3875 auto *l = dynamic_cast<LiteralString *>(child.get());
3876 return l && l->ignore_case_;
3877 });
3878 if (all_ignore_case_lits) {
3879 prefix_keywords.push_back(to_lower(first_lit->lit_));
3880 continue;
3881 }
3882 }
3883 }
3884 // Unrecognized alternative — bail out
3885 return;
3886 }
3887
3888 if (exact_keywords.empty()) { return; }
3889
3890 // Child 1 must be TokenBoundary
3891 auto *tb = dynamic_cast<TokenBoundary *>(seq.opes_[1].get());
3892 if (!tb) { return; }
3893
3894 // TokenBoundary content: Sequence[CharacterClass, Repetition(CharacterClass)]
3895 // or just CharacterClass (single char identifier)
3896 CharacterClass *first_cc = nullptr;
3897 CharacterClass *rest_cc = nullptr;
3898
3899 auto *inner_seq = dynamic_cast<Sequence *>(tb->ope_.get());
3900 if (inner_seq && inner_seq->opes_.size() == 2) {
3901 first_cc = dynamic_cast<CharacterClass *>(inner_seq->opes_[0].get());
3902 auto *rep = dynamic_cast<Repetition *>(inner_seq->opes_[1].get());
3903 if (rep) { rest_cc = dynamic_cast<CharacterClass *>(rep->ope_.get()); }
3904 }
3905
3906 if (!first_cc || !rest_cc) { return; }
3907 if (!first_cc->is_ascii_only() || !rest_cc->is_ascii_only()) { return; }
3908
3909 // All conditions met — set up the fast path
3910 auto kw = std::make_unique<KeywordGuardData>();
3911 kw->identifier_first = first_cc->ascii_bitset();
3912 kw->identifier_rest = rest_cc->ascii_bitset();
3913
3914 // Compute keyword length range for early-out in hot path
3915 size_t min_len = SIZE_MAX, max_len = 0;
3916 for (const auto &k : exact_keywords) {
3917 min_len = std::min(min_len, k.size());
3918 max_len = std::max(max_len, k.size());
3919 }
3920 for (const auto &k : prefix_keywords) {
3921 min_len = std::min(min_len, k.size());
3922 max_len = std::max(max_len, k.size());
3923 }
3924 kw->min_keyword_len = min_len;
3925 kw->max_keyword_len = max_len;
3926
3927 kw->exact_keywords = std::move(exact_keywords);
3928 kw->prefix_keywords = std::move(prefix_keywords);
3929 seq.kw_guard_ = std::move(kw);
3930}
3931
3932// Compute which rules benefit from packrat memoization.
3933// A rule benefits if it's reachable from 2+ alternatives of the same
3934// PrioritizedChoice (backtracking will re-visit it at the same position).
3936 std::call_once(packrat_filter_init_, [&]() {
3937 auto def_count = definition_ids_.size();
3938 if (def_count == 0) { return; }
3939
3940 // Collect rule IDs reachable from an Ope subtree (bitvector indexed by
3941 // def_id)
3942 struct CollectReachableRules : public TraversalVisitor {
3944 std::vector<bool> reachable; // indexed by def_id
3945
3946 CollectReachableRules(size_t n) : reachable(n, false) {}
3947
3948 void visit(Holder &ope) override {
3949 auto id = ope.outer_->id;
3950 if (id < reachable.size()) { reachable[id] = true; }
3951 ope.ope_->accept(*this);
3952 }
3953 void visit(Reference &ope) override {
3954 if (ope.rule_ && ope.rule_->id < reachable.size() &&
3955 !reachable[ope.rule_->id]) {
3956 reachable[ope.rule_->id] = true;
3957 ope.rule_->accept(*this);
3958 }
3959 }
3960 };
3961
3962 // Find rules that benefit: reachable from 2+ alternatives of same choice
3963 std::vector<bool> benefits(def_count, false);
3964
3965 struct FindBacktrackRules : public TraversalVisitor {
3967 std::vector<bool> &benefits;
3968 size_t def_count;
3969 std::vector<bool> visited_rules; // indexed by def_id
3970
3971 FindBacktrackRules(std::vector<bool> &b, size_t n)
3972 : benefits(b), def_count(n), visited_rules(n, false) {}
3973
3974 void visit(PrioritizedChoice &ope) override {
3975 // For each alternative, collect reachable rules as bitvectors
3976 std::vector<std::vector<bool>> alt_reachable;
3977 for (auto &op : ope.opes_) {
3978 CollectReachableRules crr(def_count);
3979 op->accept(crr);
3980 alt_reachable.push_back(std::move(crr.reachable));
3981 }
3982
3983 // Mark rules reachable from 2+ alternatives
3984 for (size_t id = 0; id < def_count; id++) {
3985 size_t count = 0;
3986 for (auto &alt : alt_reachable) {
3987 if (alt[id]) { count++; }
3988 }
3989 if (count >= 2) { benefits[id] = true; }
3990 }
3991
3992 // Recurse into alternatives
3993 for (auto &op : ope.opes_) {
3994 op->accept(*this);
3995 }
3996 }
3997 void visit(Holder &ope) override {
3998 auto id = ope.outer_->id;
3999 if (id < visited_rules.size() && !visited_rules[id]) {
4000 visited_rules[id] = true;
4001 ope.ope_->accept(*this);
4002 }
4003 }
4004 void visit(Reference &ope) override {
4005 if (ope.rule_) { ope.rule_->accept(*this); }
4006 }
4007 };
4008
4009 FindBacktrackRules finder(benefits, def_count);
4010 holder_->accept(finder);
4011 if (whitespaceOpe) { whitespaceOpe->accept(finder); }
4012 if (wordOpe) { wordOpe->accept(finder); }
4013
4014 packrat_filter_ = std::move(benefits);
4015 });
4016}
4017
4019 // Check if the reference is a macro parameter
4020 auto found_param = false;
4021 for (size_t i = 0; i < params_.size(); i++) {
4022 const auto &param = params_[i];
4023 if (param == ope.name_) {
4024 ope.iarg_ = i;
4025 found_param = true;
4026 break;
4027 }
4028 }
4029
4030 // Check if the reference is a definition rule
4031 if (!found_param && grammar_.count(ope.name_)) {
4032 auto &rule = grammar_.at(ope.name_);
4033 ope.rule_ = &rule;
4034 }
4035
4036 for (const auto &arg : ope.args_) {
4037 arg->accept(*this);
4038 }
4039}
4040
4042 for (size_t i = 0; i < args_.size(); i++) {
4043 const auto &name = params_[i];
4044 if (name == ope.name_) {
4045 found_ope = args_[i];
4046 return;
4047 }
4048 }
4049 found_ope = ope.shared_from_this();
4050}
4051
4052/*-----------------------------------------------------------------------------
4053 * PEG parser generator
4054 *---------------------------------------------------------------------------*/
4055
4056using Rules = std::unordered_map<std::string, std::shared_ptr<Ope>>;
4057
4059public:
4061 std::shared_ptr<Grammar> grammar;
4062 std::string start;
4064 };
4065
4066 static ParserContext parse(const char *s, size_t n, const Rules &rules,
4067 Log log, std::string_view start,
4068 bool enable_left_recursion = true) {
4069 return get_instance().perform_core(s, n, rules, log, std::string(start),
4070 enable_left_recursion);
4071 }
4072
4073 // For debugging purpose
4074 static bool parse_test(const char *d, const char *s) {
4075 Data data;
4076 std::any dt = &data;
4077
4078 auto n = strlen(s);
4079 auto r = get_instance().g[d].parse(s, n, dt);
4080 return r.ret && r.len == n;
4081 }
4082
4083#if defined(__cpp_lib_char8_t)
4084 static bool parse_test(const char *d, const char8_t *s) {
4085 return parse_test(d, reinterpret_cast<const char *>(s));
4086 }
4087#endif
4088
4089private:
4091 static ParserGenerator instance;
4092 return instance;
4093 }
4094
4096 make_grammar();
4097 setup_actions();
4098 }
4099
4101 std::string type;
4102 std::any data;
4103 std::string_view sv;
4104 };
4105
4106 struct Data {
4107 std::shared_ptr<Grammar> grammar;
4108 std::string start;
4109 const char *start_pos = nullptr;
4110
4111 std::vector<std::pair<std::string, const char *>> duplicates_of_definition;
4112
4113 std::vector<std::pair<std::string, const char *>> duplicates_of_instruction;
4114 std::map<std::string, std::vector<Instruction>> instructions;
4115
4116 std::vector<std::pair<std::string, const char *>> undefined_back_references;
4117 std::vector<std::set<std::string_view>> captures_stack{{}};
4118
4119 std::set<std::string_view> captures_in_current_definition;
4121
4122 Data() : grammar(std::make_shared<Grammar>()) {}
4123 };
4124
4125 class SyntaxErrorException : public std::runtime_error {
4126 public:
4127 SyntaxErrorException(const char *what_arg, std::pair<size_t, size_t> r)
4128 : std::runtime_error(what_arg), r_(r) {}
4129
4130 std::pair<size_t, size_t> line_info() const { return r_; }
4131
4132 private:
4133 std::pair<size_t, size_t> r_;
4134 };
4135
4137 // Setup PEG syntax parser
4138 g["Grammar"] <= seq(g["Spacing"], oom(g["Definition"]), g["EndOfFile"]);
4139 g["Definition"] <=
4140 cho(seq(g["Ignore"], g["IdentCont"], g["Parameters"], g["LEFTARROW"],
4141 g["Expression"], opt(g["Instruction"])),
4142 seq(g["Ignore"], g["Identifier"], g["LEFTARROW"], g["Expression"],
4143 opt(g["Instruction"])));
4144 g["Expression"] <= seq(g["Sequence"], zom(seq(g["SLASH"], g["Sequence"])));
4145 g["Sequence"] <= zom(cho(g["CUT"], g["Prefix"]));
4146 g["Prefix"] <= seq(opt(cho(g["AND"], g["NOT"])), g["SuffixWithLabel"]);
4147 g["SuffixWithLabel"] <=
4148 seq(g["Suffix"], opt(seq(g["LABEL"], g["Identifier"])));
4149 g["Suffix"] <= seq(g["Primary"], opt(g["Loop"]));
4150 g["Loop"] <= cho(g["QUESTION"], g["STAR"], g["PLUS"], g["Repetition"]);
4151 g["Primary"] <= cho(seq(g["Ignore"], g["IdentCont"], g["Arguments"],
4152 npd(g["LEFTARROW"])),
4153 seq(g["Ignore"], g["Identifier"],
4154 npd(seq(opt(g["Parameters"]), g["LEFTARROW"]))),
4155 seq(g["OPEN"], g["Expression"], g["CLOSE"]),
4156 seq(g["BeginTok"], g["Expression"], g["EndTok"]),
4157 g["CapScope"],
4158 seq(g["BeginCap"], g["Expression"], g["EndCap"]),
4159 g["BackRef"], g["DictionaryI"], g["LiteralI"],
4160 g["Dictionary"], g["Literal"], g["NegatedClassI"],
4161 g["NegatedClass"], g["ClassI"], g["Class"], g["DOT"]);
4162
4163 g["Identifier"] <= seq(g["IdentCont"], g["Spacing"]);
4164 g["IdentCont"] <= tok(seq(g["IdentStart"], zom(g["IdentRest"])));
4165
4166 const static std::vector<std::pair<char32_t, char32_t>> range = {
4167 {0x0080, 0xFFFF}};
4168 g["IdentStart"] <= seq(npd(lit(u8(u8"↑"))), npd(lit(u8(u8"⇑"))),
4169 cho(cls("a-zA-Z_%"), cls(range)));
4170
4171 g["IdentRest"] <= cho(g["IdentStart"], cls("0-9"));
4172
4173 g["Dictionary"] <= seq(g["LiteralD"], oom(seq(g["PIPE"], g["LiteralD"])));
4174
4175 g["DictionaryI"] <=
4176 seq(g["LiteralID"], oom(seq(g["PIPE"], g["LiteralID"])));
4177
4178 auto lit_ope = cho(seq(cls("'"), tok(zom(seq(npd(cls("'")), g["Char"]))),
4179 cls("'"), g["Spacing"]),
4180 seq(cls("\""), tok(zom(seq(npd(cls("\"")), g["Char"]))),
4181 cls("\""), g["Spacing"]));
4182 g["Literal"] <= lit_ope;
4183 g["LiteralD"] <= lit_ope;
4184
4185 auto lit_case_ignore_ope =
4186 cho(seq(cls("'"), tok(zom(seq(npd(cls("'")), g["Char"]))), lit("'i"),
4187 g["Spacing"]),
4188 seq(cls("\""), tok(zom(seq(npd(cls("\"")), g["Char"]))), lit("\"i"),
4189 g["Spacing"]));
4190 g["LiteralI"] <= lit_case_ignore_ope;
4191 g["LiteralID"] <= lit_case_ignore_ope;
4192
4193 // NOTE: The original Brian Ford's paper uses 'zom' instead of 'oom'.
4194 g["Class"] <= seq(chr('['), npd(chr('^')),
4195 tok(oom(seq(npd(chr(']')), g["Range"]))), chr(']'),
4196 g["Spacing"]);
4197 g["ClassI"] <= seq(chr('['), npd(chr('^')),
4198 tok(oom(seq(npd(chr(']')), g["Range"]))), lit("]i"),
4199 g["Spacing"]);
4200
4201 g["NegatedClass"] <= seq(lit("[^"),
4202 tok(oom(seq(npd(chr(']')), g["Range"]))), chr(']'),
4203 g["Spacing"]);
4204 g["NegatedClassI"] <= seq(lit("[^"),
4205 tok(oom(seq(npd(chr(']')), g["Range"]))),
4206 lit("]i"), g["Spacing"]);
4207
4208 // NOTE: This is different from The original Brian Ford's paper, and this
4209 // modification allows us to specify `[+-]` as a valid char class.
4210 g["Range"] <=
4211 cho(seq(g["Char"], chr('-'), npd(chr(']')), g["Char"]), g["Char"]);
4212
4213 g["Char"] <=
4214 cho(seq(chr('\\'), cls("fnrtv'\"[]\\^-")),
4215 seq(chr('\\'), cls("0-3"), cls("0-7"), cls("0-7")),
4216 seq(chr('\\'), cls("0-7"), opt(cls("0-7"))),
4217 seq(lit("\\x"), cls("0-9a-fA-F"), opt(cls("0-9a-fA-F"))),
4218 seq(lit("\\u"),
4219 cho(seq(cho(seq(chr('0'), cls("0-9a-fA-F")), lit("10")),
4220 rep(cls("0-9a-fA-F"), 4, 4)),
4221 rep(cls("0-9a-fA-F"), 4, 5))),
4222 seq(npd(chr('\\')), dot()));
4223
4224 g["Repetition"] <=
4225 seq(g["BeginBracket"], g["RepetitionRange"], g["EndBracket"]);
4226 g["RepetitionRange"] <= cho(seq(g["Number"], g["COMMA"], g["Number"]),
4227 seq(g["Number"], g["COMMA"]), g["Number"],
4228 seq(g["COMMA"], g["Number"]));
4229 g["Number"] <= seq(oom(cls("0-9")), g["Spacing"]);
4230
4231 g["CapScope"] <= seq(g["BeginCapScope"], g["Expression"], g["EndCapScope"]);
4232
4233 g["LEFTARROW"] <= seq(cho(lit("<-"), lit(u8(u8"←"))), g["Spacing"]);
4234 ~g["SLASH"] <= seq(chr('/'), g["Spacing"]);
4235 ~g["PIPE"] <= seq(chr('|'), g["Spacing"]);
4236 g["AND"] <= seq(chr('&'), g["Spacing"]);
4237 g["NOT"] <= seq(chr('!'), g["Spacing"]);
4238 g["QUESTION"] <= seq(chr('?'), g["Spacing"]);
4239 g["STAR"] <= seq(chr('*'), g["Spacing"]);
4240 g["PLUS"] <= seq(chr('+'), g["Spacing"]);
4241 ~g["OPEN"] <= seq(chr('('), g["Spacing"]);
4242 ~g["CLOSE"] <= seq(chr(')'), g["Spacing"]);
4243 g["DOT"] <= seq(chr('.'), g["Spacing"]);
4244
4245 g["CUT"] <= seq(lit(u8(u8"↑")), g["Spacing"]);
4246 ~g["LABEL"] <= seq(cho(chr('^'), lit(u8(u8"⇑"))), g["Spacing"]);
4247
4248 ~g["Spacing"] <= zom(cho(g["Space"], g["Comment"]));
4249 g["Comment"] <= seq(chr('#'), zom(seq(npd(g["EndOfLine"]), dot())),
4250 opt(g["EndOfLine"]));
4251 g["Space"] <= cho(chr(' '), chr('\t'), g["EndOfLine"]);
4252 g["EndOfLine"] <= cho(lit("\r\n"), chr('\n'), chr('\r'));
4253 g["EndOfFile"] <= npd(dot());
4254
4255 ~g["BeginTok"] <= seq(chr('<'), g["Spacing"]);
4256 ~g["EndTok"] <= seq(chr('>'), g["Spacing"]);
4257
4258 ~g["BeginCapScope"] <= seq(chr('$'), chr('('), g["Spacing"]);
4259 ~g["EndCapScope"] <= seq(chr(')'), g["Spacing"]);
4260
4261 g["BeginCap"] <= seq(chr('$'), tok(g["IdentCont"]), chr('<'), g["Spacing"]);
4262 ~g["EndCap"] <= seq(chr('>'), g["Spacing"]);
4263
4264 g["BackRef"] <= seq(chr('$'), tok(g["IdentCont"]), g["Spacing"]);
4265
4266 g["IGNORE"] <= chr('~');
4267
4268 g["Ignore"] <= opt(g["IGNORE"]);
4269 g["Parameters"] <= seq(g["OPEN"], g["Identifier"],
4270 zom(seq(g["COMMA"], g["Identifier"])), g["CLOSE"]);
4271 g["Arguments"] <= seq(g["OPEN"], g["Expression"],
4272 zom(seq(g["COMMA"], g["Expression"])), g["CLOSE"]);
4273 ~g["COMMA"] <= seq(chr(','), g["Spacing"]);
4274
4275 // Instruction grammars
4276 g["Instruction"] <=
4277 seq(g["BeginBracket"],
4278 opt(seq(g["InstructionItem"], zom(seq(g["InstructionItemSeparator"],
4279 g["InstructionItem"])))),
4280 g["EndBracket"]);
4281 g["InstructionItem"] <=
4282 cho(g["PrecedenceClimbing"], g["ErrorMessage"], g["NoAstOpt"]);
4283 ~g["InstructionItemSeparator"] <= seq(chr(';'), g["Spacing"]);
4284
4285 ~g["SpacesZom"] <= zom(g["Space"]);
4286 ~g["SpacesOom"] <= oom(g["Space"]);
4287 ~g["BeginBracket"] <= seq(chr('{'), g["Spacing"]);
4288 ~g["EndBracket"] <= seq(chr('}'), g["Spacing"]);
4289
4290 // PrecedenceClimbing instruction
4291 g["PrecedenceClimbing"] <=
4292 seq(lit("precedence"), g["SpacesOom"], g["PrecedenceInfo"],
4293 zom(seq(g["SpacesOom"], g["PrecedenceInfo"])), g["SpacesZom"]);
4294 g["PrecedenceInfo"] <=
4295 seq(g["PrecedenceAssoc"],
4296 oom(seq(ign(g["SpacesOom"]), g["PrecedenceOpe"])));
4297 g["PrecedenceOpe"] <=
4298 cho(seq(cls("'"),
4299 tok(zom(seq(npd(cho(g["Space"], cls("'"))), g["Char"]))),
4300 cls("'")),
4301 seq(cls("\""),
4302 tok(zom(seq(npd(cho(g["Space"], cls("\""))), g["Char"]))),
4303 cls("\"")),
4304 tok(oom(seq(npd(cho(g["PrecedenceAssoc"], g["Space"], chr('}'))),
4305 dot()))));
4306 g["PrecedenceAssoc"] <= cls("LR");
4307
4308 // Error message instruction
4309 g["ErrorMessage"] <= seq(lit("error_message"), g["SpacesOom"],
4310 g["LiteralD"], g["SpacesZom"]);
4311
4312 // No Ast node optimization instruction
4313 g["NoAstOpt"] <= seq(lit("no_ast_opt"), g["SpacesZom"]);
4314
4315 // Set definition names
4316 for (auto &x : g) {
4317 x.second.name = x.first;
4318 }
4319 }
4320
4322 g["Definition"] = [&](const SemanticValues &vs, std::any &dt) {
4323 auto &data = *std::any_cast<Data *>(dt);
4324
4325 auto is_macro = vs.choice() == 0;
4326 auto ignore = std::any_cast<bool>(vs[0]);
4327 auto name = std::any_cast<std::string>(vs[1]);
4328
4329 std::vector<std::string> params;
4330 std::shared_ptr<Ope> ope;
4331 auto has_instructions = false;
4332
4333 if (is_macro) {
4334 params = std::any_cast<std::vector<std::string>>(vs[2]);
4335 ope = std::any_cast<std::shared_ptr<Ope>>(vs[4]);
4336 if (vs.size() == 6) { has_instructions = true; }
4337 } else {
4338 ope = std::any_cast<std::shared_ptr<Ope>>(vs[3]);
4339 if (vs.size() == 5) { has_instructions = true; }
4340 }
4341
4342 if (has_instructions) {
4343 auto index = is_macro ? 5 : 4;
4344 std::unordered_set<std::string> types;
4345 for (const auto &instruction :
4346 std::any_cast<std::vector<Instruction>>(vs[index])) {
4347 const auto &type = instruction.type;
4348 if (types.find(type) == types.end()) {
4349 data.instructions[name].push_back(instruction);
4350 types.insert(instruction.type);
4351 if (type == "declare_symbol" || type == "check_symbol") {
4352 if (!TokenChecker::is_token(*ope)) { ope = tok(ope); }
4353 }
4354 } else {
4355 data.duplicates_of_instruction.emplace_back(type,
4356 instruction.sv.data());
4357 }
4358 }
4359 }
4360
4361 auto &grammar = *data.grammar;
4362 if (!grammar.count(name)) {
4363 auto &rule = grammar[name];
4364 rule <= ope;
4365 rule.name = name;
4366 rule.s_ = vs.sv().data();
4367 rule.line_ = line_info(vs.ss, rule.s_);
4368 rule.ignoreSemanticValue = ignore;
4369 rule.is_macro = is_macro;
4370 rule.params = params;
4371
4372 if (data.start.empty()) {
4373 data.start = rule.name;
4374 data.start_pos = rule.s_;
4375 }
4376 } else {
4377 data.duplicates_of_definition.emplace_back(name, vs.sv().data());
4378 }
4379 };
4380
4381 g["Definition"].enter = [](const Context & /*c*/, const char * /*s*/,
4382 size_t /*n*/, std::any &dt) {
4383 auto &data = *std::any_cast<Data *>(dt);
4384 data.captures_in_current_definition.clear();
4385 };
4386
4387 g["Expression"] = [&](const SemanticValues &vs) {
4388 if (vs.size() == 1) {
4389 return std::any_cast<std::shared_ptr<Ope>>(vs[0]);
4390 } else {
4391 std::vector<std::shared_ptr<Ope>> opes;
4392 for (auto i = 0u; i < vs.size(); i++) {
4393 opes.emplace_back(std::any_cast<std::shared_ptr<Ope>>(vs[i]));
4394 }
4395 const std::shared_ptr<Ope> ope =
4396 std::make_shared<PrioritizedChoice>(opes);
4397 return ope;
4398 }
4399 };
4400
4401 g["Sequence"] = [&](const SemanticValues &vs) {
4402 if (vs.empty()) {
4403 return npd(lit(""));
4404 } else if (vs.size() == 1) {
4405 return std::any_cast<std::shared_ptr<Ope>>(vs[0]);
4406 } else {
4407 std::vector<std::shared_ptr<Ope>> opes;
4408 for (const auto &x : vs) {
4409 opes.emplace_back(std::any_cast<std::shared_ptr<Ope>>(x));
4410 }
4411 const std::shared_ptr<Ope> ope = std::make_shared<Sequence>(opes);
4412 return ope;
4413 }
4414 };
4415
4416 g["Prefix"] = [&](const SemanticValues &vs) {
4417 std::shared_ptr<Ope> ope;
4418 if (vs.size() == 1) {
4419 ope = std::any_cast<std::shared_ptr<Ope>>(vs[0]);
4420 } else {
4421 assert(vs.size() == 2);
4422 auto tok = std::any_cast<char>(vs[0]);
4423 ope = std::any_cast<std::shared_ptr<Ope>>(vs[1]);
4424 if (tok == '&') {
4425 ope = apd(ope);
4426 } else { // '!'
4427 ope = npd(ope);
4428 }
4429 }
4430 return ope;
4431 };
4432
4433 g["SuffixWithLabel"] = [&](const SemanticValues &vs, std::any &dt) {
4434 auto ope = std::any_cast<std::shared_ptr<Ope>>(vs[0]);
4435 if (vs.size() == 1) {
4436 return ope;
4437 } else {
4438 assert(vs.size() == 2);
4439 auto &data = *std::any_cast<Data *>(dt);
4440 const auto &ident = std::any_cast<std::string>(vs[1]);
4441 auto label = ref(*data.grammar, ident, vs.sv().data(), false, {});
4442 auto recovery = rec(ref(*data.grammar, RECOVER_DEFINITION_NAME,
4443 vs.sv().data(), true, {label}));
4444 return cho4label_(ope, recovery);
4445 }
4446 };
4447
4448 struct Loop {
4449 enum class Type { opt = 0, zom, oom, rep };
4450 Type type;
4451 std::pair<size_t, size_t> range;
4452 };
4453
4454 g["Suffix"] = [&](const SemanticValues &vs) {
4455 auto ope = std::any_cast<std::shared_ptr<Ope>>(vs[0]);
4456 if (vs.size() == 1) {
4457 return ope;
4458 } else {
4459 assert(vs.size() == 2);
4460 auto loop = std::any_cast<Loop>(vs[1]);
4461 switch (loop.type) {
4462 case Loop::Type::opt: return opt(ope);
4463 case Loop::Type::zom: return zom(ope);
4464 case Loop::Type::oom: return oom(ope);
4465 default: // Regex-like repetition
4466 return rep(ope, loop.range.first, loop.range.second);
4467 }
4468 }
4469 };
4470
4471 g["Loop"] = [&](const SemanticValues &vs) {
4472 switch (vs.choice()) {
4473 case 0: // Option
4474 return Loop{Loop::Type::opt, std::pair<size_t, size_t>()};
4475 case 1: // Zero or More
4476 return Loop{Loop::Type::zom, std::pair<size_t, size_t>()};
4477 case 2: // One or More
4478 return Loop{Loop::Type::oom, std::pair<size_t, size_t>()};
4479 default: // Regex-like repetition
4480 return Loop{Loop::Type::rep,
4481 std::any_cast<std::pair<size_t, size_t>>(vs[0])};
4482 }
4483 };
4484
4485 g["Primary"] = [&](const SemanticValues &vs, std::any &dt) {
4486 auto &data = *std::any_cast<Data *>(dt);
4487
4488 switch (vs.choice()) {
4489 case 0: // Macro Reference
4490 case 1: { // Reference
4491 auto is_macro = vs.choice() == 0;
4492 auto ignore = std::any_cast<bool>(vs[0]);
4493 const auto &ident = std::any_cast<std::string>(vs[1]);
4494
4495 std::vector<std::shared_ptr<Ope>> args;
4496 if (is_macro) {
4497 args = std::any_cast<std::vector<std::shared_ptr<Ope>>>(vs[2]);
4498 }
4499
4500 auto ope = ref(*data.grammar, ident, vs.sv().data(), is_macro, args);
4501 if (ident == RECOVER_DEFINITION_NAME) { ope = rec(ope); }
4502
4503 if (ignore) {
4504 return ign(ope);
4505 } else {
4506 return ope;
4507 }
4508 }
4509 case 2: { // (Expression)
4510 return std::any_cast<std::shared_ptr<Ope>>(vs[0]);
4511 }
4512 case 3: { // TokenBoundary
4513 return tok(std::any_cast<std::shared_ptr<Ope>>(vs[0]));
4514 }
4515 case 4: { // CaptureScope
4516 return csc(std::any_cast<std::shared_ptr<Ope>>(vs[0]));
4517 }
4518 case 5: { // Capture
4519 const auto &name = std::any_cast<std::string_view>(vs[0]);
4520 auto ope = std::any_cast<std::shared_ptr<Ope>>(vs[1]);
4521
4522 data.captures_stack.back().insert(name);
4523 data.captures_in_current_definition.insert(name);
4524
4525 return cap(ope, [name](const char *a_s, size_t a_n, Context &c) {
4526 c.capture_entries.emplace_back(name, std::string(a_s, a_n));
4527 });
4528 }
4529 default: {
4530 return std::any_cast<std::shared_ptr<Ope>>(vs[0]);
4531 }
4532 }
4533 };
4534
4535 g["IdentCont"] = [](const SemanticValues &vs) {
4536 return std::string(vs.sv().data(), vs.sv().length());
4537 };
4538
4539 g["Dictionary"] = [](const SemanticValues &vs) {
4540 auto items = vs.transform<std::string>();
4541 return dic(items, false);
4542 };
4543 g["DictionaryI"] = [](const SemanticValues &vs) {
4544 auto items = vs.transform<std::string>();
4545 return dic(items, true);
4546 };
4547
4548 g["Literal"] = [](const SemanticValues &vs) {
4549 const auto &tok = vs.tokens.front();
4550 return lit(resolve_escape_sequence(tok.data(), tok.size()));
4551 };
4552 g["LiteralI"] = [](const SemanticValues &vs) {
4553 const auto &tok = vs.tokens.front();
4554 return liti(resolve_escape_sequence(tok.data(), tok.size()));
4555 };
4556 g["LiteralD"] = [](const SemanticValues &vs) {
4557 auto &tok = vs.tokens.front();
4558 return resolve_escape_sequence(tok.data(), tok.size());
4559 };
4560 g["LiteralID"] = [](const SemanticValues &vs) {
4561 auto &tok = vs.tokens.front();
4562 return resolve_escape_sequence(tok.data(), tok.size());
4563 };
4564
4565 g["Class"] = [](const SemanticValues &vs) {
4566 auto ranges = vs.transform<std::pair<char32_t, char32_t>>();
4567 return cls(ranges);
4568 };
4569 g["ClassI"] = [](const SemanticValues &vs) {
4570 auto ranges = vs.transform<std::pair<char32_t, char32_t>>();
4571 return cls(ranges, true);
4572 };
4573 g["NegatedClass"] = [](const SemanticValues &vs) {
4574 auto ranges = vs.transform<std::pair<char32_t, char32_t>>();
4575 return ncls(ranges);
4576 };
4577 g["NegatedClassI"] = [](const SemanticValues &vs) {
4578 auto ranges = vs.transform<std::pair<char32_t, char32_t>>();
4579 return ncls(ranges, true);
4580 };
4581 g["Range"] = [](const SemanticValues &vs) {
4582 switch (vs.choice()) {
4583 case 0: {
4584 auto s1 = std::any_cast<std::string>(vs[0]);
4585 auto s2 = std::any_cast<std::string>(vs[1]);
4586 auto cp1 = decode_codepoint(s1.data(), s1.length());
4587 auto cp2 = decode_codepoint(s2.data(), s2.length());
4588 if (cp1 > cp2) {
4589 throw SyntaxErrorException("characer range is out of order...",
4590 vs.line_info());
4591 }
4592 return std::pair(cp1, cp2);
4593 }
4594 case 1: {
4595 auto s = std::any_cast<std::string>(vs[0]);
4596 auto cp = decode_codepoint(s.data(), s.length());
4597 return std::pair(cp, cp);
4598 }
4599 }
4600 return std::pair<char32_t, char32_t>(0, 0);
4601 };
4602 g["Char"] = [](const SemanticValues &vs) {
4603 return resolve_escape_sequence(vs.sv().data(), vs.sv().length());
4604 };
4605
4606 g["RepetitionRange"] = [&](const SemanticValues &vs) {
4607 switch (vs.choice()) {
4608 case 0: { // Number COMMA Number
4609 auto min = std::any_cast<size_t>(vs[0]);
4610 auto max = std::any_cast<size_t>(vs[1]);
4611 return std::pair(min, max);
4612 }
4613 case 1: // Number COMMA
4614 return std::pair(std::any_cast<size_t>(vs[0]),
4615 std::numeric_limits<size_t>::max());
4616 case 2: { // Number
4617 auto n = std::any_cast<size_t>(vs[0]);
4618 return std::pair(n, n);
4619 }
4620 default: // COMMA Number
4621 return std::pair(std::numeric_limits<size_t>::min(),
4622 std::any_cast<size_t>(vs[0]));
4623 }
4624 };
4625 g["Number"] = [&](const SemanticValues &vs) {
4626 return vs.token_to_number<size_t>();
4627 };
4628
4629 g["CapScope"].enter = [](const Context & /*c*/, const char * /*s*/,
4630 size_t /*n*/, std::any &dt) {
4631 auto &data = *std::any_cast<Data *>(dt);
4632 data.captures_stack.emplace_back();
4633 };
4634 g["CapScope"].leave = [](const Context & /*c*/, const char * /*s*/,
4635 size_t /*n*/, size_t /*matchlen*/,
4636 std::any & /*value*/, std::any &dt) {
4637 auto &data = *std::any_cast<Data *>(dt);
4638 data.captures_stack.pop_back();
4639 };
4640
4641 g["AND"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
4642 g["NOT"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
4643 g["QUESTION"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
4644 g["STAR"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
4645 g["PLUS"] = [](const SemanticValues &vs) { return *vs.sv().data(); };
4646
4647 g["DOT"] = [](const SemanticValues & /*vs*/) { return dot(); };
4648
4649 g["CUT"] = [](const SemanticValues & /*vs*/) { return cut(); };
4650
4651 g["BeginCap"] = [](const SemanticValues &vs) { return vs.token(); };
4652
4653 g["BackRef"] = [&](const SemanticValues &vs, std::any &dt) {
4654 auto &data = *std::any_cast<Data *>(dt);
4655
4656 // Undefined back reference check
4657 {
4658 auto found = false;
4659 auto it = data.captures_stack.rbegin();
4660 while (it != data.captures_stack.rend()) {
4661 if (it->find(vs.token()) != it->end()) {
4662 found = true;
4663 break;
4664 }
4665 ++it;
4666 }
4667 if (!found) {
4668 auto ptr = vs.token().data() - 1; // include '$' symbol
4669 data.undefined_back_references.emplace_back(vs.token(), ptr);
4670 }
4671 }
4672
4673 // NOTE: Disable packrat parsing if a back reference is not defined in
4674 // captures in the current definition rule.
4675 if (data.captures_in_current_definition.find(vs.token()) ==
4676 data.captures_in_current_definition.end()) {
4677 data.enablePackratParsing = false;
4678 }
4679
4680 return bkr(vs.token_to_string());
4681 };
4682
4683 g["Ignore"] = [](const SemanticValues &vs) { return vs.size() > 0; };
4684
4685 g["Parameters"] = [](const SemanticValues &vs) {
4686 return vs.transform<std::string>();
4687 };
4688
4689 g["Arguments"] = [](const SemanticValues &vs) {
4690 return vs.transform<std::shared_ptr<Ope>>();
4691 };
4692
4693 g["PrecedenceClimbing"] = [](const SemanticValues &vs) {
4695 size_t level = 1;
4696 for (const auto &v : vs) {
4697 auto tokens = std::any_cast<std::vector<std::string_view>>(v);
4698 auto assoc = tokens[0][0];
4699 for (size_t i = 1; i < tokens.size(); i++) {
4700 binOpeInfo[tokens[i]] = std::pair(level, assoc);
4701 }
4702 level++;
4703 }
4704 Instruction instruction;
4705 instruction.type = "precedence";
4706 instruction.data = binOpeInfo;
4707 instruction.sv = vs.sv();
4708 return instruction;
4709 };
4710 g["PrecedenceInfo"] = [](const SemanticValues &vs) {
4711 return vs.transform<std::string_view>();
4712 };
4713 g["PrecedenceOpe"] = [](const SemanticValues &vs) { return vs.token(); };
4714 g["PrecedenceAssoc"] = [](const SemanticValues &vs) { return vs.token(); };
4715
4716 g["ErrorMessage"] = [](const SemanticValues &vs) {
4717 Instruction instruction;
4718 instruction.type = "error_message";
4719 instruction.data = std::any_cast<std::string>(vs[0]);
4720 instruction.sv = vs.sv();
4721 return instruction;
4722 };
4723
4724 g["NoAstOpt"] = [](const SemanticValues &vs) {
4725 Instruction instruction;
4726 instruction.type = "no_ast_opt";
4727 instruction.sv = vs.sv();
4728 return instruction;
4729 };
4730
4731 g["Instruction"] = [](const SemanticValues &vs) {
4732 return vs.transform<Instruction>();
4733 };
4734 }
4735
4738 const char *s, Log log) {
4739 try {
4740 auto &seq = dynamic_cast<Sequence &>(*rule.get_core_operator());
4741 auto atom = seq.opes_[0];
4742 auto &rep = dynamic_cast<Repetition &>(*seq.opes_[1]);
4743 auto &seq1 = dynamic_cast<Sequence &>(*rep.ope_);
4744 auto binop = seq1.opes_[0];
4745 auto atom1 = seq1.opes_[1];
4746
4747 auto atom_name = dynamic_cast<Reference &>(*atom).name_;
4748 auto binop_name = dynamic_cast<Reference &>(*binop).name_;
4749 auto atom1_name = dynamic_cast<Reference &>(*atom1).name_;
4750
4751 if (!rep.is_zom() || atom_name != atom1_name || atom_name == binop_name) {
4752 if (log) {
4753 auto line = line_info(s, rule.s_);
4754 log(line.first, line.second,
4755 "'precedence' instruction cannot be applied to '" + rule.name +
4756 "'.",
4757 "");
4758 }
4759 return false;
4760 }
4761
4762 rule.holder_->ope_ = pre(atom, binop, info, rule);
4763 rule.disable_action = true;
4764 } catch (...) {
4765 if (log) {
4766 auto line = line_info(s, rule.s_);
4767 log(line.first, line.second,
4768 "'precedence' instruction cannot be applied to '" + rule.name +
4769 "'.",
4770 "");
4771 }
4772 return false;
4773 }
4774 return true;
4775 }
4776
4777 ParserContext perform_core(const char *s, size_t n, const Rules &rules,
4778 Log log, std::string requested_start,
4779 bool enable_left_recursion = true) {
4780 Data data;
4781 auto &grammar = *data.grammar;
4782
4783 // Built-in macros
4784 {
4785 // `%recover`
4786 {
4787 auto &rule = grammar[RECOVER_DEFINITION_NAME];
4788 rule <= ref(grammar, "x", "", false, {});
4789 rule.name = RECOVER_DEFINITION_NAME;
4790 rule.s_ = "[native]";
4791 rule.ignoreSemanticValue = true;
4792 rule.is_macro = true;
4793 rule.params = {"x"};
4794 }
4795 }
4796
4797 try {
4798 std::any dt = &data;
4799 auto r = g["Grammar"].parse(s, n, dt, nullptr, log);
4800
4801 if (!r.ret) {
4802 if (log) {
4803 if (r.error_info.message_pos) {
4804 auto line = line_info(s, r.error_info.message_pos);
4805 log(line.first, line.second, r.error_info.message,
4806 r.error_info.label);
4807 } else {
4808 auto line = line_info(s, r.error_info.error_pos);
4809 log(line.first, line.second, "syntax error", r.error_info.label);
4810 }
4811 }
4812 return {};
4813 }
4814 } catch (const SyntaxErrorException &e) {
4815 if (log) {
4816 auto line = e.line_info();
4817 log(line.first, line.second, e.what(), "");
4818 }
4819 return {};
4820 }
4821
4822 // User provided rules
4823 for (auto [user_name, user_rule] : rules) {
4824 auto name = user_name;
4825 auto ignore = false;
4826 if (!name.empty() && name[0] == '~') {
4827 ignore = true;
4828 name.erase(0, 1);
4829 }
4830 if (!name.empty()) {
4831 auto &rule = grammar[name];
4832 rule <= user_rule;
4833 rule.name = name;
4834 rule.ignoreSemanticValue = ignore;
4835 }
4836 }
4837
4838 // Check duplicated definitions
4839 auto ret = true;
4840
4841 if (!data.duplicates_of_definition.empty()) {
4842 for (const auto &[name, ptr] : data.duplicates_of_definition) {
4843 if (log) {
4844 auto line = line_info(s, ptr);
4845 log(line.first, line.second,
4846 "the definition '" + name + "' is already defined.", "");
4847 }
4848 }
4849 ret = false;
4850 }
4851
4852 // Check duplicated instructions
4853 if (!data.duplicates_of_instruction.empty()) {
4854 for (const auto &[type, ptr] : data.duplicates_of_instruction) {
4855 if (log) {
4856 auto line = line_info(s, ptr);
4857 log(line.first, line.second,
4858 "the instruction '" + type + "' is already defined.", "");
4859 }
4860 }
4861 ret = false;
4862 }
4863
4864 // Check undefined back references
4865 if (!data.undefined_back_references.empty()) {
4866 for (const auto &[name, ptr] : data.undefined_back_references) {
4867 if (log) {
4868 auto line = line_info(s, ptr);
4869 log(line.first, line.second,
4870 "the back reference '" + name + "' is undefined.", "");
4871 }
4872 }
4873 ret = false;
4874 }
4875
4876 // Set root definition
4877 auto start = data.start;
4878
4879 if (!requested_start.empty()) {
4880 if (grammar.count(requested_start)) {
4881 start = requested_start;
4882 } else {
4883 if (log) {
4884 auto line = line_info(s, s);
4885 log(line.first, line.second,
4886 "the specified start rule '" + requested_start +
4887 "' is undefined.",
4888 "");
4889 }
4890 ret = false;
4891 }
4892 }
4893
4894 if (!ret) { return {}; }
4895
4896 auto &start_rule = grammar[start];
4897
4898 // Check if the start rule has ignore operator
4899 {
4900 if (start_rule.ignoreSemanticValue) {
4901 if (log) {
4902 auto line = line_info(s, start_rule.s_);
4903 log(line.first, line.second,
4904 "ignore operator cannot be applied to '" + start_rule.name + "'.",
4905 "");
4906 }
4907 ret = false;
4908 }
4909 }
4910
4911 if (!ret) { return {}; }
4912
4913 // Check missing definitions
4914 auto referenced = std::unordered_set<std::string>{
4918 start_rule.name,
4919 };
4920
4921 for (auto &[_, rule] : grammar) {
4922 ReferenceChecker vis(grammar, rule.params);
4923 rule.accept(vis);
4924 referenced.insert(vis.referenced.begin(), vis.referenced.end());
4925 for (const auto &[name, ptr] : vis.error_s) {
4926 if (log) {
4927 auto line = line_info(s, ptr);
4928 log(line.first, line.second, vis.error_message[name], "");
4929 }
4930 ret = false;
4931 }
4932 }
4933
4934 for (auto &[name, rule] : grammar) {
4935 if (!referenced.count(name)) {
4936 if (log) {
4937 auto line = line_info(s, rule.s_);
4938 auto msg = "'" + name + "' is not referenced.";
4939 log(line.first, line.second, msg, "");
4940 }
4941 }
4942 }
4943
4944 if (!ret) { return {}; }
4945
4946 // Link references
4947 for (auto &x : grammar) {
4948 auto &rule = x.second;
4949 LinkReferences vis(grammar, rule.params);
4950 rule.accept(vis);
4951 }
4952
4953 // Compute can_be_empty for each rule (fixed-point iteration)
4954 {
4955 bool changed = true;
4956 while (changed) {
4957 changed = false;
4958 for (auto &[name, rule] : grammar) {
4960 rule.accept(vis);
4961 if (vis.result != rule.can_be_empty) {
4962 rule.can_be_empty = vis.result;
4963 changed = true;
4964 }
4965 }
4966 }
4967 }
4968
4969 // Check left recursion
4970 if (enable_left_recursion) {
4971 for (auto &[name, rule] : grammar) {
4972 DetectLeftRecursion vis(name);
4973 rule.accept(vis);
4974 if (vis.error_s) { rule.is_left_recursive = true; }
4975 }
4976 } else {
4977 ret = true;
4978
4979 for (auto &[name, rule] : grammar) {
4980 DetectLeftRecursion vis(name);
4981 rule.accept(vis);
4982 if (vis.error_s) {
4983 if (log) {
4984 auto line = line_info(s, vis.error_s);
4985 log(line.first, line.second, "'" + name + "' is left recursive.",
4986 "");
4987 }
4988 ret = false;
4989 }
4990 }
4991
4992 if (!ret) { return {}; }
4993 }
4994
4995 // Check infinite loop
4996 if (detect_infiniteLoop(data, start_rule, log, s)) { return {}; }
4997
4998 // Automatic whitespace skipping
4999 if (grammar.count(WHITESPACE_DEFINITION_NAME)) {
5000 for (auto &x : grammar) {
5001 auto &rule = x.second;
5002 auto ope = rule.get_core_operator();
5003 if (IsLiteralToken::check(*ope)) { rule <= tok(ope); }
5004 }
5005
5006 auto &rule = grammar[WHITESPACE_DEFINITION_NAME];
5007 start_rule.whitespaceOpe = wsp(rule.get_core_operator());
5008
5009 if (detect_infiniteLoop(data, rule, log, s)) { return {}; }
5010 }
5011
5012 // Word expression
5013 if (grammar.count(WORD_DEFINITION_NAME)) {
5014 auto &rule = grammar[WORD_DEFINITION_NAME];
5015 start_rule.wordOpe = rule.get_core_operator();
5016
5017 if (detect_infiniteLoop(data, rule, log, s)) { return {}; }
5018 }
5019
5020 // Apply instructions
5021 for (const auto &[name, instructions] : data.instructions) {
5022 auto &rule = grammar[name];
5023
5024 for (const auto &instruction : instructions) {
5025 if (instruction.type == "precedence") {
5026 const auto &info =
5027 std::any_cast<PrecedenceClimbing::BinOpeInfo>(instruction.data);
5028
5029 if (!apply_precedence_instruction(rule, info, s, log)) { return {}; }
5030 } else if (instruction.type == "error_message") {
5031 rule.error_message = std::any_cast<std::string>(instruction.data);
5032 } else if (instruction.type == "no_ast_opt") {
5033 rule.no_ast_opt = true;
5034 }
5035 }
5036 }
5037
5038 // Setup First-Set and ISpan optimizations
5039 for (auto &x : grammar) {
5040 SetupFirstSets vis;
5041 x.second.accept(vis);
5042 }
5043
5044 return {data.grammar, start, data.enablePackratParsing};
5045 }
5046
5047 bool detect_infiniteLoop(const Data &data, Definition &rule, const Log &log,
5048 const char *s) const {
5049 std::vector<std::pair<const char *, std::string>> refs;
5050 std::unordered_map<std::string, bool> has_error_cache;
5051 DetectInfiniteLoop vis(data.start_pos, rule.name, refs, has_error_cache);
5052 rule.accept(vis);
5053 if (vis.has_error) {
5054 if (log) {
5055 auto line = line_info(s, vis.error_s);
5056 log(line.first, line.second,
5057 "infinite loop is detected in '" + vis.error_name + "'.", "");
5058 }
5059 return true;
5060 }
5061 return false;
5062 }
5063
5065};
5066
5067/*-----------------------------------------------------------------------------
5068 * AST
5069 *---------------------------------------------------------------------------*/
5070
5071template <typename Annotation> struct AstBase : public Annotation {
5072 AstBase(const char *path, size_t line, size_t column, const char *name,
5073 const std::vector<std::shared_ptr<AstBase>> &nodes,
5074 size_t position = 0, size_t length = 0, size_t choice_count = 0,
5075 size_t choice = 0)
5076 : path(path ? path : ""), line(line), column(column), name(name),
5081
5082 AstBase(const char *path, size_t line, size_t column, const char *name,
5083 const std::string_view &token, size_t position = 0, size_t length = 0,
5084 size_t choice_count = 0, size_t choice = 0)
5085 : path(path ? path : ""), line(line), column(column), name(name),
5090
5091 AstBase(const AstBase &ast, const char *original_name, size_t position = 0,
5092 size_t length = 0, size_t original_choice_count = 0,
5093 size_t original_choice = 0)
5094 : path(ast.path), line(ast.line), column(ast.column), name(ast.name),
5100 token(ast.token), nodes(ast.nodes), parent(ast.parent) {}
5101
5102 const std::string path;
5103 const size_t line = 1;
5104 const size_t column = 1;
5105
5106 const std::string name;
5107 size_t position;
5108 size_t length;
5109 const size_t choice_count;
5110 const size_t choice;
5111 const std::string original_name;
5113 const size_t original_choice;
5114 const unsigned int tag;
5115 const unsigned int original_tag;
5116
5117 const bool is_token;
5118 const std::string_view token;
5119
5120 std::vector<std::shared_ptr<AstBase<Annotation>>> nodes;
5121 std::weak_ptr<AstBase<Annotation>> parent;
5122
5123 std::string token_to_string() const {
5124 assert(is_token);
5125 return std::string(token);
5126 }
5127
5128 template <typename T> T token_to_number() const {
5129 return token_to_number_<T>(token);
5130 }
5131};
5132
5133template <typename T>
5134void ast_to_s_core(const std::shared_ptr<T> &ptr, std::string &s, int level,
5135 std::function<std::string(const T &ast, int level)> fn) {
5136 const auto &ast = *ptr;
5137 for (auto i = 0; i < level; i++) {
5138 s += " ";
5139 }
5140 auto name = ast.original_name;
5141 if (ast.original_choice_count > 0) {
5142 name += "/" + std::to_string(ast.original_choice);
5143 }
5144 if (ast.name != ast.original_name) { name += "[" + ast.name + "]"; }
5145 if (ast.is_token) {
5146 s += "- " + name + " (";
5147 s += ast.token;
5148 s += ")\n";
5149 } else {
5150 s += "+ " + name + "\n";
5151 }
5152 if (fn) { s += fn(ast, level + 1); }
5153 for (const auto &node : ast.nodes) {
5154 ast_to_s_core(node, s, level + 1, fn);
5155 }
5156}
5157
5158template <typename T>
5159std::string
5160ast_to_s(const std::shared_ptr<T> &ptr,
5161 std::function<std::string(const T &ast, int level)> fn = nullptr) {
5162 std::string s;
5163 ast_to_s_core(ptr, s, 0, fn);
5164 return s;
5165}
5166
5168 AstOptimizer(bool mode, const std::vector<std::string> &rules = {})
5169 : mode_(mode), rules_(rules) {}
5170
5171 template <typename T>
5172 std::shared_ptr<T> optimize(std::shared_ptr<T> original,
5173 std::shared_ptr<T> parent = nullptr) {
5174 auto found =
5175 std::find(rules_.begin(), rules_.end(), original->name) != rules_.end();
5176 auto opt = mode_ ? !found : found;
5177
5178 if (opt && original->nodes.size() == 1) {
5179 auto child = optimize(original->nodes[0], parent);
5180 auto ast = std::make_shared<T>(*child, original->name.data(),
5181 original->position, original->length,
5182 original->choice_count, original->choice);
5183 for (auto &node : ast->nodes) {
5184 node->parent = ast;
5185 }
5186 return ast;
5187 }
5188
5189 auto ast = std::make_shared<T>(*original);
5190 ast->parent = parent;
5191 ast->nodes.clear();
5192 for (const auto &node : original->nodes) {
5193 auto child = optimize(node, ast);
5194 ast->nodes.push_back(child);
5195 }
5196 return ast;
5197 }
5198
5199private:
5200 const bool mode_;
5201 const std::vector<std::string> rules_;
5202};
5203
5204struct EmptyType {};
5205using Ast = AstBase<EmptyType>;
5206
5207template <typename T = Ast> void add_ast_action(Definition &rule) {
5208 rule.action = [&](const SemanticValues &vs) {
5209 auto line = vs.line_info();
5210
5211 if (rule.is_token()) {
5212 return std::make_shared<T>(
5213 vs.path, line.first, line.second, rule.name.data(), vs.token(),
5214 std::distance(vs.ss, vs.sv().data()), vs.sv().length(),
5215 vs.choice_count(), vs.choice());
5216 }
5217
5218 auto ast =
5219 std::make_shared<T>(vs.path, line.first, line.second, rule.name.data(),
5220 vs.transform<std::shared_ptr<T>>(),
5221 std::distance(vs.ss, vs.sv().data()),
5222 vs.sv().length(), vs.choice_count(), vs.choice());
5223
5224 for (auto &node : ast->nodes) {
5225 node->parent = ast;
5226 }
5227 return ast;
5228 };
5229}
5230
5231#define PEG_EXPAND(...) __VA_ARGS__
5232#define PEG_CONCAT(a, b) a##b
5233#define PEG_CONCAT2(a, b) PEG_CONCAT(a, b)
5234
5235#define PEG_PICK( \
5236 a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, \
5237 a17, a18, a19, a20, a21, a22, a23, a24, a25, a26, a27, a28, a29, a30, a31, \
5238 a32, a33, a34, a35, a36, a37, a38, a39, a40, a41, a42, a43, a44, a45, a46, \
5239 a47, a48, a49, a50, a51, a52, a53, a54, a55, a56, a57, a58, a59, a60, a61, \
5240 a62, a63, a64, a65, a66, a67, a68, a69, a70, a71, a72, a73, a74, a75, a76, \
5241 a77, a78, a79, a80, a81, a82, a83, a84, a85, a86, a87, a88, a89, a90, a91, \
5242 a92, a93, a94, a95, a96, a97, a98, a99, a100, ...) \
5243 a100
5244
5245#define PEG_COUNT(...) \
5246 PEG_EXPAND(PEG_PICK( \
5247 __VA_ARGS__, 100, 99, 98, 97, 96, 95, 94, 93, 92, 91, 90, 89, 88, 87, \
5248 86, 85, 84, 83, 82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, \
5249 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, \
5250 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, \
5251 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, \
5252 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0))
5253
5254#define PEG_DEF_1(r) \
5255 peg::Definition r; \
5256 r.name = #r; \
5257 peg::add_ast_action(r);
5258
5259#define PEG_DEF_2(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_1(__VA_ARGS__))
5260#define PEG_DEF_3(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_2(__VA_ARGS__))
5261#define PEG_DEF_4(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_3(__VA_ARGS__))
5262#define PEG_DEF_5(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_4(__VA_ARGS__))
5263#define PEG_DEF_6(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_5(__VA_ARGS__))
5264#define PEG_DEF_7(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_6(__VA_ARGS__))
5265#define PEG_DEF_8(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_7(__VA_ARGS__))
5266#define PEG_DEF_9(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_8(__VA_ARGS__))
5267#define PEG_DEF_10(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_9(__VA_ARGS__))
5268#define PEG_DEF_11(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_10(__VA_ARGS__))
5269#define PEG_DEF_12(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_11(__VA_ARGS__))
5270#define PEG_DEF_13(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_12(__VA_ARGS__))
5271#define PEG_DEF_14(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_13(__VA_ARGS__))
5272#define PEG_DEF_15(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_14(__VA_ARGS__))
5273#define PEG_DEF_16(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_15(__VA_ARGS__))
5274#define PEG_DEF_17(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_16(__VA_ARGS__))
5275#define PEG_DEF_18(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_17(__VA_ARGS__))
5276#define PEG_DEF_19(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_18(__VA_ARGS__))
5277#define PEG_DEF_20(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_19(__VA_ARGS__))
5278#define PEG_DEF_21(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_20(__VA_ARGS__))
5279#define PEG_DEF_22(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_21(__VA_ARGS__))
5280#define PEG_DEF_23(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_22(__VA_ARGS__))
5281#define PEG_DEF_24(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_23(__VA_ARGS__))
5282#define PEG_DEF_25(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_24(__VA_ARGS__))
5283#define PEG_DEF_26(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_25(__VA_ARGS__))
5284#define PEG_DEF_27(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_26(__VA_ARGS__))
5285#define PEG_DEF_28(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_27(__VA_ARGS__))
5286#define PEG_DEF_29(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_28(__VA_ARGS__))
5287#define PEG_DEF_30(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_29(__VA_ARGS__))
5288#define PEG_DEF_31(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_30(__VA_ARGS__))
5289#define PEG_DEF_32(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_31(__VA_ARGS__))
5290#define PEG_DEF_33(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_32(__VA_ARGS__))
5291#define PEG_DEF_34(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_33(__VA_ARGS__))
5292#define PEG_DEF_35(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_34(__VA_ARGS__))
5293#define PEG_DEF_36(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_35(__VA_ARGS__))
5294#define PEG_DEF_37(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_36(__VA_ARGS__))
5295#define PEG_DEF_38(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_37(__VA_ARGS__))
5296#define PEG_DEF_39(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_38(__VA_ARGS__))
5297#define PEG_DEF_40(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_39(__VA_ARGS__))
5298#define PEG_DEF_41(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_40(__VA_ARGS__))
5299#define PEG_DEF_42(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_41(__VA_ARGS__))
5300#define PEG_DEF_43(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_42(__VA_ARGS__))
5301#define PEG_DEF_44(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_43(__VA_ARGS__))
5302#define PEG_DEF_45(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_44(__VA_ARGS__))
5303#define PEG_DEF_46(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_45(__VA_ARGS__))
5304#define PEG_DEF_47(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_46(__VA_ARGS__))
5305#define PEG_DEF_48(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_47(__VA_ARGS__))
5306#define PEG_DEF_49(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_48(__VA_ARGS__))
5307#define PEG_DEF_50(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_49(__VA_ARGS__))
5308#define PEG_DEF_51(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_50(__VA_ARGS__))
5309#define PEG_DEF_52(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_51(__VA_ARGS__))
5310#define PEG_DEF_53(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_52(__VA_ARGS__))
5311#define PEG_DEF_54(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_53(__VA_ARGS__))
5312#define PEG_DEF_55(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_54(__VA_ARGS__))
5313#define PEG_DEF_56(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_55(__VA_ARGS__))
5314#define PEG_DEF_57(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_56(__VA_ARGS__))
5315#define PEG_DEF_58(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_57(__VA_ARGS__))
5316#define PEG_DEF_59(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_58(__VA_ARGS__))
5317#define PEG_DEF_60(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_59(__VA_ARGS__))
5318#define PEG_DEF_61(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_60(__VA_ARGS__))
5319#define PEG_DEF_62(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_61(__VA_ARGS__))
5320#define PEG_DEF_63(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_62(__VA_ARGS__))
5321#define PEG_DEF_64(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_63(__VA_ARGS__))
5322#define PEG_DEF_65(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_64(__VA_ARGS__))
5323#define PEG_DEF_66(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_65(__VA_ARGS__))
5324#define PEG_DEF_67(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_66(__VA_ARGS__))
5325#define PEG_DEF_68(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_67(__VA_ARGS__))
5326#define PEG_DEF_69(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_68(__VA_ARGS__))
5327#define PEG_DEF_70(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_69(__VA_ARGS__))
5328#define PEG_DEF_71(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_70(__VA_ARGS__))
5329#define PEG_DEF_72(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_71(__VA_ARGS__))
5330#define PEG_DEF_73(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_72(__VA_ARGS__))
5331#define PEG_DEF_74(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_73(__VA_ARGS__))
5332#define PEG_DEF_75(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_74(__VA_ARGS__))
5333#define PEG_DEF_76(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_75(__VA_ARGS__))
5334#define PEG_DEF_77(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_76(__VA_ARGS__))
5335#define PEG_DEF_78(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_77(__VA_ARGS__))
5336#define PEG_DEF_79(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_78(__VA_ARGS__))
5337#define PEG_DEF_80(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_79(__VA_ARGS__))
5338#define PEG_DEF_81(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_80(__VA_ARGS__))
5339#define PEG_DEF_82(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_81(__VA_ARGS__))
5340#define PEG_DEF_83(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_82(__VA_ARGS__))
5341#define PEG_DEF_84(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_83(__VA_ARGS__))
5342#define PEG_DEF_85(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_84(__VA_ARGS__))
5343#define PEG_DEF_86(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_85(__VA_ARGS__))
5344#define PEG_DEF_87(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_86(__VA_ARGS__))
5345#define PEG_DEF_88(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_87(__VA_ARGS__))
5346#define PEG_DEF_89(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_88(__VA_ARGS__))
5347#define PEG_DEF_90(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_89(__VA_ARGS__))
5348#define PEG_DEF_91(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_90(__VA_ARGS__))
5349#define PEG_DEF_92(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_91(__VA_ARGS__))
5350#define PEG_DEF_93(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_92(__VA_ARGS__))
5351#define PEG_DEF_94(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_93(__VA_ARGS__))
5352#define PEG_DEF_95(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_94(__VA_ARGS__))
5353#define PEG_DEF_96(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_95(__VA_ARGS__))
5354#define PEG_DEF_97(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_96(__VA_ARGS__))
5355#define PEG_DEF_98(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_97(__VA_ARGS__))
5356#define PEG_DEF_99(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_98(__VA_ARGS__))
5357#define PEG_DEF_100(r1, ...) PEG_EXPAND(PEG_DEF_1(r1) PEG_DEF_99(__VA_ARGS__))
5358
5359#define AST_DEFINITIONS(...) \
5360 PEG_EXPAND(PEG_CONCAT2(PEG_DEF_, PEG_COUNT(__VA_ARGS__))(__VA_ARGS__))
5361
5362/*-----------------------------------------------------------------------------
5363 * parser
5364 *---------------------------------------------------------------------------*/
5365
5366class parser {
5367public:
5368 parser() = default;
5369
5370 parser(const char *s, size_t n, const Rules &rules,
5371 std::string_view start = {}) {
5372 load_grammar(s, n, rules, start);
5373 }
5374
5375 parser(const char *s, size_t n, std::string_view start = {})
5376 : parser(s, n, Rules(), start) {}
5377
5378 parser(std::string_view sv, const Rules &rules, std::string_view start = {})
5379 : parser(sv.data(), sv.size(), rules, start) {}
5380
5381 parser(std::string_view sv, std::string_view start = {})
5382 : parser(sv.data(), sv.size(), Rules(), start) {}
5383
5384#if defined(__cpp_lib_char8_t)
5385 parser(std::u8string_view sv, const Rules &rules, std::string_view start = {})
5386 : parser(reinterpret_cast<const char *>(sv.data()), sv.size(), rules,
5387 start) {}
5388
5389 parser(std::u8string_view sv, std::string_view start = {})
5390 : parser(reinterpret_cast<const char *>(sv.data()), sv.size(), Rules(),
5391 start) {}
5392#endif
5393
5394 operator bool() const { return grammar_ != nullptr; }
5395
5396 bool load_grammar(const char *s, size_t n, const Rules &rules,
5397 std::string_view start = {}) {
5398 auto cxt =
5399 ParserGenerator::parse(s, n, rules, log_, start, enableLeftRecursion_);
5400 grammar_ = cxt.grammar;
5401 start_ = cxt.start;
5402 enablePackratParsing_ = cxt.enablePackratParsing;
5403 return grammar_ != nullptr;
5404 }
5405
5406 bool load_grammar(const char *s, size_t n, std::string_view start = {}) {
5407 return load_grammar(s, n, Rules(), start);
5408 }
5409
5410 bool load_grammar(std::string_view sv, const Rules &rules,
5411 std::string_view start = {}) {
5412 return load_grammar(sv.data(), sv.size(), rules, start);
5413 }
5414
5415 bool load_grammar(std::string_view sv, std::string_view start = {}) {
5416 return load_grammar(sv.data(), sv.size(), Rules(), start);
5417 }
5418
5419 bool parse_n(const char *s, size_t n, const char *path = nullptr) const {
5420 if (grammar_ != nullptr) {
5421 const auto &rule = (*grammar_)[start_];
5422 auto result = rule.parse(s, n, path, log_);
5423 return post_process(s, n, result);
5424 }
5425 return false;
5426 }
5427
5428 bool parse_n(const char *s, size_t n, std::any &dt,
5429 const char *path = nullptr) const {
5430 if (grammar_ != nullptr) {
5431 const auto &rule = (*grammar_)[start_];
5432 auto result = rule.parse(s, n, dt, path, log_);
5433 return post_process(s, n, result);
5434 }
5435 return false;
5436 }
5437
5438 template <typename T>
5439 bool parse_n(const char *s, size_t n, T &val,
5440 const char *path = nullptr) const {
5441 if (grammar_ != nullptr) {
5442 const auto &rule = (*grammar_)[start_];
5443 auto result = rule.parse_and_get_value(s, n, val, path, log_);
5444 return post_process(s, n, result);
5445 }
5446 return false;
5447 }
5448
5449 template <typename T>
5450 bool parse_n(const char *s, size_t n, std::any &dt, T &val,
5451 const char *path = nullptr) const {
5452 if (grammar_ != nullptr) {
5453 const auto &rule = (*grammar_)[start_];
5454 auto result = rule.parse_and_get_value(s, n, dt, val, path, log_);
5455 return post_process(s, n, result);
5456 }
5457 return false;
5458 }
5459
5460 bool parse(std::string_view sv, const char *path = nullptr) const {
5461 return parse_n(sv.data(), sv.size(), path);
5462 }
5463
5464 bool parse(std::string_view sv, std::any &dt,
5465 const char *path = nullptr) const {
5466 return parse_n(sv.data(), sv.size(), dt, path);
5467 }
5468
5469 template <typename T>
5470 bool parse(std::string_view sv, T &val, const char *path = nullptr) const {
5471 return parse_n(sv.data(), sv.size(), val, path);
5472 }
5473
5474 template <typename T>
5475 bool parse(std::string_view sv, std::any &dt, T &val,
5476 const char *path = nullptr) const {
5477 return parse_n(sv.data(), sv.size(), dt, val, path);
5478 }
5479
5480#if defined(__cpp_lib_char8_t)
5481 bool parse(std::u8string_view sv, const char *path = nullptr) const {
5482 return parse_n(reinterpret_cast<const char *>(sv.data()), sv.size(), path);
5483 }
5484
5485 bool parse(std::u8string_view sv, std::any &dt,
5486 const char *path = nullptr) const {
5487 return parse_n(reinterpret_cast<const char *>(sv.data()), sv.size(), dt,
5488 path);
5489 }
5490
5491 template <typename T>
5492 bool parse(std::u8string_view sv, T &val, const char *path = nullptr) const {
5493 return parse_n(reinterpret_cast<const char *>(sv.data()), sv.size(), val,
5494 path);
5495 }
5496
5497 template <typename T>
5498 bool parse(std::u8string_view sv, std::any &dt, T &val,
5499 const char *path = nullptr) const {
5500 return parse_n(reinterpret_cast<const char *>(sv.data()), sv.size(), dt,
5501 val, path);
5502 }
5503#endif
5504
5505 Definition &operator[](const char *s) { return (*grammar_)[s]; }
5506
5507 const Definition &operator[](const char *s) const { return (*grammar_)[s]; }
5508
5509 const Grammar &get_grammar() const { return *grammar_; }
5510
5512 if (grammar_ != nullptr) {
5513 auto &rule = (*grammar_)[start_];
5514 rule.eoi_check = false;
5515 }
5516 }
5517
5518 void enable_left_recursion(bool enable = true) {
5519 enableLeftRecursion_ = enable;
5520 }
5521
5523 if (grammar_ != nullptr) {
5524 auto &rule = (*grammar_)[start_];
5525 rule.enablePackratParsing = enablePackratParsing_;
5526 }
5527 }
5528
5529 void enable_trace(TracerEnter tracer_enter, TracerLeave tracer_leave) {
5530 if (grammar_ != nullptr) {
5531 auto &rule = (*grammar_)[start_];
5532 rule.tracer_enter = tracer_enter;
5533 rule.tracer_leave = tracer_leave;
5534 }
5535 }
5536
5537 void enable_trace(TracerEnter tracer_enter, TracerLeave tracer_leave,
5538 TracerStartOrEnd tracer_start,
5539 TracerStartOrEnd tracer_end) {
5540 if (grammar_ != nullptr) {
5541 auto &rule = (*grammar_)[start_];
5542 rule.tracer_enter = tracer_enter;
5543 rule.tracer_leave = tracer_leave;
5544 rule.tracer_start = tracer_start;
5545 rule.tracer_end = tracer_end;
5546 }
5547 }
5548
5549 void set_verbose_trace(bool verbose_trace) {
5550 if (grammar_ != nullptr) {
5551 auto &rule = (*grammar_)[start_];
5552 rule.verbose_trace = verbose_trace;
5553 }
5554 }
5555
5556 template <typename T = Ast> parser &enable_ast() {
5557 for (auto &[_, rule] : *grammar_) {
5558 if (!rule.action) { add_ast_action<T>(rule); }
5559 }
5560 return *this;
5561 }
5562
5563 template <typename T>
5564 std::shared_ptr<T> optimize_ast(std::shared_ptr<T> ast,
5565 bool opt_mode = true) const {
5566 return AstOptimizer(opt_mode, get_no_ast_opt_rules()).optimize(ast);
5567 }
5568
5569 void set_logger(Log log) { log_ = log; }
5570
5572 std::function<void(size_t line, size_t col, const std::string &msg)>
5573 log) {
5574 log_ = [log](size_t line, size_t col, const std::string &msg,
5575 const std::string & /*rule*/) { log(line, col, msg); };
5576 }
5577
5578private:
5579 bool post_process(const char *s, size_t n, Definition::Result &r) const {
5580 if (log_ && !r.ret) { r.error_info.output_log(log_, s, n); }
5581 return r.ret && !r.recovered;
5582 }
5583
5584 std::vector<std::string> get_no_ast_opt_rules() const {
5585 std::vector<std::string> rules;
5586 for (auto &[name, rule] : *grammar_) {
5587 if (rule.no_ast_opt) { rules.push_back(name); }
5588 }
5589 return rules;
5590 }
5591
5592 std::shared_ptr<Grammar> grammar_;
5593 std::string start_;
5597};
5598
5599/*-----------------------------------------------------------------------------
5600 * enable_tracing
5601 *---------------------------------------------------------------------------*/
5602
5603inline void enable_tracing(parser &parser, std::ostream &os) {
5605 [&](auto &ope, auto s, auto, auto &, auto &c, auto &, auto &trace_data) {
5606 auto prev_pos = std::any_cast<size_t>(trace_data);
5607 auto pos = static_cast<size_t>(s - c.s);
5608 auto backtrack = (pos < prev_pos ? "*" : "");
5609 std::string indent;
5610 auto level = c.trace_ids.size() - 1;
5611 while (level--) {
5612 indent += "│";
5613 }
5614 std::string name;
5615 {
5616 name = peg::TraceOpeName::get(const_cast<peg::Ope &>(ope));
5617
5618 auto lit = dynamic_cast<const peg::LiteralString *>(&ope);
5619 if (lit) { name += " '" + peg::escape_characters(lit->lit_) + "'"; }
5620 }
5621 os << "E " << pos + 1 << backtrack << "\t" << indent << "┌" << name
5622 << " #" << c.trace_ids.back() << std::endl;
5623 trace_data = static_cast<size_t>(pos);
5624 },
5625 [&](auto &ope, auto s, auto, auto &sv, auto &c, auto &, auto len,
5626 auto &) {
5627 auto pos = static_cast<size_t>(s - c.s);
5628 if (len != static_cast<size_t>(-1)) { pos += len; }
5629 std::string indent;
5630 auto level = c.trace_ids.size() - 1;
5631 while (level--) {
5632 indent += "│";
5633 }
5634 auto ret = len != static_cast<size_t>(-1) ? "└o " : "└x ";
5635 auto name = peg::TraceOpeName::get(const_cast<peg::Ope &>(ope));
5636 std::stringstream choice;
5637 if (sv.choice_count() > 0) {
5638 choice << " " << sv.choice() << "/" << sv.choice_count();
5639 }
5640 std::string token;
5641 if (!sv.tokens.empty()) {
5642 token += ", token '";
5643 token += sv.tokens[0];
5644 token += "'";
5645 }
5646 std::string matched;
5647 if (peg::success(len) &&
5648 peg::TokenChecker::is_token(const_cast<peg::Ope &>(ope))) {
5649 matched = ", match '" + peg::escape_characters(s, len) + "'";
5650 }
5651 os << "L " << pos + 1 << "\t" << indent << ret << name << " #"
5652 << c.trace_ids.back() << choice.str() << token << matched
5653 << std::endl;
5654 },
5655 [&](auto &trace_data) { trace_data = static_cast<size_t>(0); },
5656 [&](auto &) {});
5657}
5658
5659/*-----------------------------------------------------------------------------
5660 * enable_profiling
5661 *---------------------------------------------------------------------------*/
5662
5663inline void enable_profiling(parser &parser, std::ostream &os) {
5664 struct Stats {
5665 struct Item {
5666 std::string name;
5667 size_t success;
5668 size_t fail;
5669 };
5670 std::vector<Item> items;
5671 std::map<std::string, size_t> index;
5672 size_t total = 0;
5673 std::chrono::steady_clock::time_point start;
5674 };
5675
5677 [&](auto &ope, auto, auto, auto &, auto &, auto &, std::any &trace_data) {
5678 if (auto holder = dynamic_cast<const peg::Holder *>(&ope)) {
5679 auto &stats = *std::any_cast<Stats *>(trace_data);
5680
5681 auto &name = holder->name();
5682 if (stats.index.find(name) == stats.index.end()) {
5683 stats.index[name] = stats.index.size();
5684 stats.items.push_back({name, 0, 0});
5685 }
5686 stats.total++;
5687 }
5688 },
5689 [&](auto &ope, auto, auto, auto &, auto &, auto &, auto len,
5690 std::any &trace_data) {
5691 if (auto holder = dynamic_cast<const peg::Holder *>(&ope)) {
5692 auto &stats = *std::any_cast<Stats *>(trace_data);
5693
5694 auto &name = holder->name();
5695 auto index = stats.index[name];
5696 auto &stat = stats.items[index];
5697 if (len != static_cast<size_t>(-1)) {
5698 stat.success++;
5699 } else {
5700 stat.fail++;
5701 }
5702
5703 if (index == 0) {
5704 auto end = std::chrono::steady_clock::now();
5705 auto nano = std::chrono::duration_cast<std::chrono::microseconds>(
5706 end - stats.start)
5707 .count();
5708 auto sec = nano / 1000000.0;
5709 os << "duration: " << sec << "s (" << nano << "µs)" << std::endl
5710 << std::endl;
5711
5712 char buff[BUFSIZ];
5713 size_t total_success = 0;
5714 size_t total_fail = 0;
5715 for (auto &[name, success, fail] : stats.items) {
5716 total_success += success;
5717 total_fail += fail;
5718 }
5719
5720 os << " id total % success fail "
5721 "definition"
5722 << std::endl;
5723
5724 auto grand_total = total_success + total_fail;
5725 snprintf(buff, BUFSIZ, "%4s %10zu %5s %10zu %10zu %s", "",
5726 grand_total, "", total_success, total_fail,
5727 "Total counters");
5728 os << buff << std::endl;
5729
5730 snprintf(buff, BUFSIZ, "%4s %10s %5s %10.2f %10.2f %s", "", "",
5731 "", total_success * 100.0 / grand_total,
5732 total_fail * 100.0 / grand_total, "% success/fail");
5733 os << buff << std::endl << std::endl;
5734 ;
5735
5736 size_t id = 0;
5737 for (auto &[name, success, fail] : stats.items) {
5738 auto total = success + fail;
5739 auto ratio = total * 100.0 / stats.total;
5740 snprintf(buff, BUFSIZ, "%4zu %10zu %5.2f %10zu %10zu %s", id,
5741 total, ratio, success, fail, name.c_str());
5742 os << buff << std::endl;
5743 id++;
5744 }
5745 }
5746 }
5747 },
5748 [&](auto &trace_data) {
5749 auto stats = new Stats{};
5750 stats->start = std::chrono::steady_clock::now();
5751 trace_data = stats;
5752 },
5753 [&](auto &trace_data) {
5754 auto stats = std::any_cast<Stats *>(trace_data);
5755 delete stats;
5756 });
5757}
5758} // namespace peg
Definition peglib.h:641
void operator=(F fn)
Definition peglib.h:646
Action()=default
Action(Action &&rhs)=default
Fty make_adaptor(F fn)
Definition peglib.h:660
std::function< std::any(SemanticValues &vs, std::any &dt, const std::any &predicate_data)> Fty
Definition peglib.h:657
std::any operator()(SemanticValues &vs, std::any &dt, const std::any &predicate_data) const
Definition peglib.h:651
Fty fn_
Definition peglib.h:676
Action(F fn)
Definition peglib.h:645
Action & operator=(const Action &rhs)=default
Definition peglib.h:1374
AndPredicate(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1376
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:1378
std::shared_ptr< Ope > ope_
Definition peglib.h:1392
void accept(Visitor &v) override
Definition peglib.h:3582
Definition peglib.h:1586
size_t parse_core(const char *s, size_t n, SemanticValues &, Context &c, std::any &) const override
Definition peglib.h:1588
void accept(Visitor &v) override
Definition peglib.h:3588
Definition peglib.h:1768
std::string name_
Definition peglib.h:1779
BackReference(const std::string &name)
Definition peglib.h:1772
BackReference(std::string &&name)
Definition peglib.h:1770
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:3420
void accept(Visitor &v) override
Definition peglib.h:3598
Definition peglib.h:1601
void accept(Visitor &v) override
Definition peglib.h:3589
CaptureScope(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1603
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:1605
std::shared_ptr< Ope > ope_
Definition peglib.h:1615
Definition peglib.h:1618
MatchAction match_action_
Definition peglib.h:1635
std::function< void(const char *s, size_t n, Context &c)> MatchAction
Definition peglib.h:1620
std::shared_ptr< Ope > ope_
Definition peglib.h:1634
void accept(Visitor &v) override
Definition peglib.h:3590
Capture(const std::shared_ptr< Ope > &ope, MatchAction ma)
Definition peglib.h:1622
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:1625
Definition peglib.h:1458
bool negated_
Definition peglib.h:1553
bool ignore_case_
Definition peglib.h:1554
std::vector< std::pair< char32_t, char32_t > > ranges_
Definition peglib.h:1552
CharacterClass(const std::string &s, bool negated, bool ignore_case)
Definition peglib.h:1460
void setup_ascii_bitset()
Definition peglib.h:1534
size_t parse_core(const char *s, size_t n, SemanticValues &, Context &c, std::any &) const override
Definition peglib.h:1487
bool in_range(const std::pair< char32_t, char32_t > &range, char32_t cp) const
Definition peglib.h:1524
CharacterClass(const std::vector< std::pair< char32_t, char32_t > > &ranges, bool negated, bool ignore_case)
Definition peglib.h:1480
bool is_ascii_only_
Definition peglib.h:1556
bool is_ascii_only() const
Definition peglib.h:1520
friend struct ComputeFirstSet
Definition peglib.h:1518
const std::bitset< 256 > & ascii_bitset() const
Definition peglib.h:1521
void accept(Visitor &v) override
Definition peglib.h:3586
std::bitset< 256 > ascii_bitset_
Definition peglib.h:1555
Definition peglib.h:1559
Character(char32_t ch)
Definition peglib.h:1561
char32_t ch_
Definition peglib.h:1582
void accept(Visitor &v) override
Definition peglib.h:3587
size_t parse_core(const char *s, size_t n, SemanticValues &, Context &c, std::any &) const override
Definition peglib.h:1563
Definition peglib.h:810
size_t in_token_boundary_count
Definition peglib.h:825
std::vector< Definition * > rule_stack
Definition peglib.h:822
void trace_leave(const Ope &ope, const char *a_s, size_t n, const SemanticValues &vs, std::any &dt, size_t len)
Definition peglib.h:3054
std::vector< std::pair< std::string_view, std::string > > capture_entries
Definition peglib.h:832
std::once_flag source_line_index_init_
Definition peglib.h:1073
std::shared_ptr< Ope > wordOpe
Definition peglib.h:830
std::set< std::pair< const Definition *, const char * > > lr_active_seeds
Definition peglib.h:857
void trace_enter(const Ope &ope, const char *a_s, size_t n, const SemanticValues &vs, std::any &dt)
Definition peglib.h:3048
TracerEnter tracer_enter
Definition peglib.h:882
std::vector< bool > cut_stack
Definition peglib.h:834
size_t skip_whitespace(const char *a_s, size_t n, SemanticValues &vs, std::any &dt)
Definition peglib.h:3005
const std::vector< std::shared_ptr< Ope > > & top_args() const
Definition peglib.h:1004
std::vector< bool > cache_success
Definition peglib.h:839
std::set< const Definition * > lr_refs_hit
Definition peglib.h:853
Context operator=(const Context &)=delete
std::shared_ptr< Ope > whitespaceOpe
Definition peglib.h:827
void clear_packrat_cache(const char *pos, size_t def_id)
Definition peglib.h:859
std::map< std::pair< size_t, size_t >, std::tuple< size_t, std::any > > cache_values
Definition peglib.h:842
const size_t def_count
Definition peglib.h:836
std::map< std::pair< const Definition *, const char * >, LRMemo > lr_memo
Definition peglib.h:849
Context(Context &&)=delete
Context(const char *path, const char *s, size_t l, size_t def_count, std::shared_ptr< Ope > whitespaceOpe, std::shared_ptr< Ope > wordOpe, bool enablePackratParsing, TracerEnter tracer_enter, TracerLeave tracer_leave, std::any trace_data, bool verbose_trace, Log log)
Definition peglib.h:889
const bool verbose_trace
Definition peglib.h:885
Log log
Definition peglib.h:887
std::vector< PackratStats > * packrat_stats
Definition peglib.h:918
const char * s
Definition peglib.h:813
size_t next_trace_id
Definition peglib.h:1070
SemanticValues & push_semantic_values_scope()
Definition peglib.h:973
void pop_semantic_values_scope()
Definition peglib.h:995
bool is_traceable(const Ope &ope) const
Definition peglib.h:3061
const char * path
Definition peglib.h:812
std::any trace_data
Definition peglib.h:884
TracerLeave tracer_leave
Definition peglib.h:883
Snapshot snapshot(const SemanticValues &vs) const
Definition peglib.h:1019
std::vector< size_t > trace_ids
Definition peglib.h:1071
void write_packrat_cache(const char *pos, size_t def_id, size_t len, const std::any &val)
Definition peglib.h:870
size_t value_stack_size
Definition peglib.h:820
const size_t l
Definition peglib.h:814
void push_args(std::vector< std::shared_ptr< Ope > > &&args)
Definition peglib.h:998
void pop_args()
Definition peglib.h:1002
ErrorInfo error_info
Definition peglib.h:816
std::vector< bool > cache_registered
Definition peglib.h:838
bool in_whitespace
Definition peglib.h:828
std::pair< size_t, size_t > line_info(const char *cur) const
Definition peglib.h:1051
std::vector< size_t > source_line_index
Definition peglib.h:1074
bool recovered
Definition peglib.h:817
std::vector< std::shared_ptr< SemanticValues > > value_stack
Definition peglib.h:819
~Context()
Definition peglib.h:904
void packrat(const char *a_s, size_t def_id, size_t &len, std::any &val, T fn)
Definition peglib.h:926
const bool enablePackratParsing
Definition peglib.h:837
void rollback(SemanticValues &vs, const Snapshot &snap)
Definition peglib.h:1024
Context(const Context &)=delete
void set_error_pos(const char *a_s, const char *literal=nullptr)
Definition peglib.h:3014
bool ignore_trace_state
Definition peglib.h:1072
const std::vector< bool > * packrat_rule_filter
Definition peglib.h:923
std::vector< std::vector< std::shared_ptr< Ope > > > args_stack
Definition peglib.h:823
Definition peglib.h:1822
void accept(Visitor &v) override
Definition peglib.h:3601
size_t parse_core(const char *, size_t, SemanticValues &, Context &c, std::any &) const override
Definition peglib.h:1824
Definition peglib.h:2575
std::shared_ptr< Ope > wordOpe
Definition peglib.h:2761
bool is_macro
Definition peglib.h:2763
bool ignoreSemanticValue
Definition peglib.h:2759
bool eoi_check
Definition peglib.h:2778
Predicate predicate
Definition peglib.h:2750
Definition & operator<=(const std::shared_ptr< Ope > &ope)
Definition peglib.h:2599
std::function< void(const Context &c, const char *s, size_t n, size_t matchlen, std::any &value, std::any &dt)> leave
Definition peglib.h:2758
void initialize_packrat_filter() const
Definition peglib.h:3935
Definition()
Definition peglib.h:2584
Result parse(const char *s, std::any &dt, const char *path=nullptr, Log log=nullptr) const
Definition peglib.h:2623
bool is_left_recursive
Definition peglib.h:2766
bool disable_action
Definition peglib.h:2765
TracerEnter tracer_enter
Definition peglib.h:2769
std::vector< std::string > params
Definition peglib.h:2764
std::once_flag packrat_filter_init_
Definition peglib.h:2868
Definition & operator~()
Definition peglib.h:2730
Result parse(const char *s, size_t n, const char *path=nullptr, Log log=nullptr) const
Definition peglib.h:2604
bool enablePackratParsing
Definition peglib.h:2762
std::pair< size_t, size_t > line_
Definition peglib.h:2748
friend class ParserGenerator
Definition peglib.h:2786
std::vector< bool > packrat_filter_
Definition peglib.h:2869
std::once_flag is_token_init_
Definition peglib.h:2863
TracerStartOrEnd tracer_end
Definition peglib.h:2773
std::vector< Context::PackratStats > packrat_stats_
Definition peglib.h:2782
std::once_flag definition_ids_init_
Definition peglib.h:2866
bool collect_packrat_stats
Definition peglib.h:2781
bool no_ast_opt
Definition peglib.h:2776
Definition & operator,(T fn)
Definition peglib.h:2725
friend class Reference
Definition peglib.h:2785
void operator=(Action a)
Definition peglib.h:2723
Result parse_and_get_value(const char *s, size_t n, std::any &dt, T &val, const char *path=nullptr, Log log=nullptr) const
Definition peglib.h:2650
Result parse(const char *s, size_t n, std::any &dt, const char *path=nullptr, Log log=nullptr) const
Definition peglib.h:2617
std::unordered_map< void *, size_t > definition_ids_
Definition peglib.h:2867
Result parse_and_get_value(const char *s, size_t n, T &val, const char *path=nullptr, Log log=nullptr) const
Definition peglib.h:2630
std::once_flag assign_id_to_definition_init_
Definition peglib.h:2865
TracerLeave tracer_leave
Definition peglib.h:2770
size_t id
Definition peglib.h:2752
bool can_be_empty
Definition peglib.h:2767
std::shared_ptr< Ope > whitespaceOpe
Definition peglib.h:2760
bool is_token() const
Definition peglib.h:2739
Result parse_and_get_value(const char *s, T &val, const char *path=nullptr, Log log=nullptr) const
Definition peglib.h:2643
Definition & operator=(Definition &&rhs)
bool is_token_
Definition peglib.h:2864
Definition(const Definition &rhs)
Definition peglib.h:2586
Result parse_core(const char *s, size_t n, SemanticValues &vs, std::any &dt, const char *path, Log log) const
Definition peglib.h:2803
Definition & operator=(const Definition &rhs)
void accept(Ope::Visitor &v)
Definition peglib.h:2735
bool verbose_trace
Definition peglib.h:2771
std::string name
Definition peglib.h:2746
std::shared_ptr< Holder > holder_
Definition peglib.h:2862
std::string error_message
Definition peglib.h:2775
std::function< void(const Context &c, const char *s, size_t n, std::any &dt)> enter
Definition peglib.h:2755
Result parse_and_get_value(const char *s, std::any &dt, T &val, const char *path=nullptr, Log log=nullptr) const
Definition peglib.h:2662
TracerStartOrEnd tracer_start
Definition peglib.h:2772
Action action
Definition peglib.h:2753
Result parse(const char *s, const char *path=nullptr, Log log=nullptr) const
Definition peglib.h:2611
void initialize_definition_ids() const
Definition peglib.h:2791
std::shared_ptr< Ope > get_core_operator() const
Definition peglib.h:2737
Definition(const std::shared_ptr< Ope > &ope)
Definition peglib.h:2590
const char * s_
Definition peglib.h:2747
Definition peglib.h:1417
Dictionary(const std::vector< std::string > &v, bool ignore_case)
Definition peglib.h:1419
void accept(Visitor &v) override
Definition peglib.h:3584
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:3080
Trie trie_
Definition peglib.h:1429
Definition peglib.h:1701
Holder(Definition *outer)
Definition peglib.h:1703
const std::string & name() const
Definition peglib.h:3370
Definition * outer_
Definition peglib.h:1717
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:3157
const std::string & trace_name() const
Definition peglib.h:3372
void accept(Visitor &v) override
Definition peglib.h:3595
std::string trace_name_
Definition peglib.h:1719
friend class Definition
Definition peglib.h:1721
std::once_flag trace_name_init_
Definition peglib.h:1718
std::any reduce(SemanticValues &vs, std::any &dt, const std::any &predicate_data) const
Definition peglib.h:3359
std::shared_ptr< Ope > ope_
Definition peglib.h:1716
Definition peglib.h:1652
void accept(Visitor &v) override
Definition peglib.h:3592
Ignore(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1654
std::shared_ptr< Ope > ope_
Definition peglib.h:1665
size_t parse_core(const char *s, size_t n, SemanticValues &, Context &c, std::any &dt) const override
Definition peglib.h:1656
Definition peglib.h:1433
void accept(Visitor &v) override
Definition peglib.h:3585
bool ignore_case_
Definition peglib.h:1451
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:3125
std::string lower_lit_
Definition peglib.h:1452
std::string lit_
Definition peglib.h:1450
LiteralString(std::string &&s, bool ignore_case)
Definition peglib.h:1435
std::once_flag init_is_word_
Definition peglib.h:1453
bool is_word_
Definition peglib.h:1454
LiteralString(const std::string &s, bool ignore_case)
Definition peglib.h:1440
Definition peglib.h:1395
std::shared_ptr< Ope > ope_
Definition peglib.h:1414
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:1399
NotPredicate(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1397
void accept(Visitor &v) override
Definition peglib.h:3583
Definition peglib.h:1080
bool is_choice_like
Definition peglib.h:1092
bool is_token_boundary
Definition peglib.h:1091
virtual ~Ope()=default
size_t parse(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const
Definition peglib.h:3069
virtual size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const =0
virtual void accept(Visitor &v)=0
std::pair< size_t, size_t > r_
Definition peglib.h:4133
std::pair< size_t, size_t > line_info() const
Definition peglib.h:4130
SyntaxErrorException(const char *what_arg, std::pair< size_t, size_t > r)
Definition peglib.h:4127
ParserContext perform_core(const char *s, size_t n, const Rules &rules, Log log, std::string requested_start, bool enable_left_recursion=true)
Definition peglib.h:4777
bool apply_precedence_instruction(Definition &rule, const PrecedenceClimbing::BinOpeInfo &info, const char *s, Log log)
Definition peglib.h:4736
void make_grammar()
Definition peglib.h:4136
Grammar g
Definition peglib.h:5064
ParserGenerator()
Definition peglib.h:4095
static bool parse_test(const char *d, const char *s)
Definition peglib.h:4074
static ParserContext parse(const char *s, size_t n, const Rules &rules, Log log, std::string_view start, bool enable_left_recursion=true)
Definition peglib.h:4066
bool detect_infiniteLoop(const Data &data, Definition &rule, const Log &log, const char *s) const
Definition peglib.h:5047
static ParserGenerator & get_instance()
Definition peglib.h:4090
void setup_actions()
Definition peglib.h:4321
Definition peglib.h:1782
std::shared_ptr< Ope > atom_
Definition peglib.h:1798
std::map< std::string_view, std::pair< size_t, char > > BinOpeInfo
Definition peglib.h:1784
PrecedenceClimbing(const std::shared_ptr< Ope > &atom, const std::shared_ptr< Ope > &binop, const BinOpeInfo &info, const Definition &rule)
Definition peglib.h:1786
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:1791
const Definition & rule_
Definition peglib.h:1801
std::shared_ptr< Ope > binop_
Definition peglib.h:1799
Definition & get_reference_for_binop(Context &c) const
Definition peglib.h:3441
BinOpeInfo info_
Definition peglib.h:1800
void accept(Visitor &v) override
Definition peglib.h:3599
size_t parse_expression(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt, size_t min_prec) const
Definition peglib.h:3453
Definition peglib.h:679
Predicate()=default
bool operator()(const SemanticValues &vs, const std::any &dt, std::string &msg, std::any &predicate_data) const
Definition peglib.h:689
Predicate & operator=(const Predicate &rhs)=default
Fty make_adaptor(F fn)
Definition peglib.h:698
std::function< bool(const SemanticValues &vs, const std::any &dt, std::string &msg, std::any &predicate_data)> Fty
Definition peglib.h:695
Fty fn_
Definition peglib.h:710
Predicate(Predicate &&rhs)=default
Predicate(F fn)
Definition peglib.h:683
void operator=(F fn)
Definition peglib.h:684
Definition peglib.h:1210
PrioritizedChoice(bool for_label, const Args &...args)
Definition peglib.h:1213
size_t size() const
Definition peglib.h:1287
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:1227
void accept(Visitor &v) override
Definition peglib.h:3580
bool for_label_
Definition peglib.h:1290
std::vector< std::shared_ptr< Ope > > opes_
Definition peglib.h:1289
PrioritizedChoice(const std::vector< std::shared_ptr< Ope > > &opes)
Definition peglib.h:1218
std::vector< FirstSet > first_sets_
Definition peglib.h:1291
PrioritizedChoice(std::vector< std::shared_ptr< Ope > > &&opes)
Definition peglib.h:1222
Definition peglib.h:1810
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:3530
void accept(Visitor &v) override
Definition peglib.h:3600
Recovery(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1812
std::shared_ptr< Ope > ope_
Definition peglib.h:1819
Definition peglib.h:1726
const std::string name_
Definition peglib.h:1741
Definition * rule_
Definition peglib.h:1747
std::shared_ptr< Ope > get_core_operator() const
Definition peglib.h:3416
const char * s_
Definition peglib.h:1742
void accept(Visitor &v) override
Definition peglib.h:3596
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:3378
const bool is_macro_
Definition peglib.h:1744
const std::vector< std::shared_ptr< Ope > > args_
Definition peglib.h:1745
size_t iarg_
Definition peglib.h:1748
const Grammar & grammar_
Definition peglib.h:1740
Reference(const Grammar &grammar, const std::string &name, const char *s, bool is_macro, const std::vector< std::shared_ptr< Ope > > &args)
Definition peglib.h:1728
Definition peglib.h:1294
static std::shared_ptr< Repetition > zom(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1353
static std::shared_ptr< Repetition > opt(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1363
bool is_zom() const
Definition peglib.h:1349
const std::bitset< 256 > * span_bitset_
Definition peglib.h:1370
Repetition(const std::shared_ptr< Ope > &ope, size_t min, size_t max)
Definition peglib.h:1296
std::shared_ptr< Ope > ope_
Definition peglib.h:1367
size_t max_
Definition peglib.h:1369
void accept(Visitor &v) override
Definition peglib.h:3581
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:1299
size_t min_
Definition peglib.h:1368
static std::shared_ptr< Repetition > oom(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1358
Definition peglib.h:1112
Sequence(std::vector< std::shared_ptr< Ope > > &&opes)
Definition peglib.h:1118
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:1120
std::unique_ptr< KeywordGuardData > kw_guard_
Definition peglib.h:1145
std::vector< std::shared_ptr< Ope > > opes_
Definition peglib.h:1141
std::optional< size_t > parse_keyword_guarded(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const
Definition peglib.h:1148
void accept(Visitor &v) override
Definition peglib.h:3579
Sequence(const Args &...args)
Definition peglib.h:1115
Sequence(const std::vector< std::shared_ptr< Ope > > &opes)
Definition peglib.h:1117
friend struct SetupFirstSets
Definition peglib.h:1144
Definition peglib.h:1638
void accept(Visitor &v) override
Definition peglib.h:3591
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:3132
std::shared_ptr< Ope > ope_
Definition peglib.h:1649
TokenBoundary(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1640
Definition peglib.h:394
size_t max_len_
Definition peglib.h:464
std::map< std::string, Info, std::less<> > dic_
Definition peglib.h:460
size_t match(const char *text, size_t text_len, size_t &id) const
Definition peglib.h:418
friend struct ComputeFirstSet
Definition peglib.h:449
Trie(const std::vector< std::string > &items, bool ignore_case)
Definition peglib.h:396
size_t size() const
Definition peglib.h:446
bool ignore_case_
Definition peglib.h:462
size_t items_count_
Definition peglib.h:463
size_t items_count() const
Definition peglib.h:447
Definition peglib.h:1671
std::function< size_t(const char *s, size_t n, SemanticValues &vs, std::any &dt)> fn_
Definition peglib.h:1682
void accept(Visitor &v) override
Definition peglib.h:3593
User(Parser fn)
Definition peglib.h:1673
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &, std::any &dt) const override
Definition peglib.h:1674
Definition peglib.h:1685
WeakHolder(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1687
void accept(Visitor &v) override
Definition peglib.h:3594
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:1689
std::weak_ptr< Ope > weak_
Definition peglib.h:1698
Definition peglib.h:1751
std::shared_ptr< Ope > ope_
Definition peglib.h:1765
size_t parse_core(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt) const override
Definition peglib.h:1755
void accept(Visitor &v) override
Definition peglib.h:3597
Whitespace(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1753
Definition peglib.h:5366
parser(const char *s, size_t n, std::string_view start={})
Definition peglib.h:5375
Log log_
Definition peglib.h:5596
bool enablePackratParsing_
Definition peglib.h:5595
parser(const char *s, size_t n, const Rules &rules, std::string_view start={})
Definition peglib.h:5370
bool parse_n(const char *s, size_t n, std::any &dt, T &val, const char *path=nullptr) const
Definition peglib.h:5450
std::string start_
Definition peglib.h:5593
const Grammar & get_grammar() const
Definition peglib.h:5509
std::shared_ptr< Grammar > grammar_
Definition peglib.h:5592
bool parse_n(const char *s, size_t n, std::any &dt, const char *path=nullptr) const
Definition peglib.h:5428
void set_logger(Log log)
Definition peglib.h:5569
bool enableLeftRecursion_
Definition peglib.h:5594
bool load_grammar(std::string_view sv, std::string_view start={})
Definition peglib.h:5415
void enable_packrat_parsing()
Definition peglib.h:5522
parser & enable_ast()
Definition peglib.h:5556
parser(std::string_view sv, const Rules &rules, std::string_view start={})
Definition peglib.h:5378
void disable_eoi_check()
Definition peglib.h:5511
bool load_grammar(const char *s, size_t n, std::string_view start={})
Definition peglib.h:5406
std::shared_ptr< T > optimize_ast(std::shared_ptr< T > ast, bool opt_mode=true) const
Definition peglib.h:5564
void set_verbose_trace(bool verbose_trace)
Definition peglib.h:5549
void enable_left_recursion(bool enable=true)
Definition peglib.h:5518
parser()=default
const Definition & operator[](const char *s) const
Definition peglib.h:5507
bool parse_n(const char *s, size_t n, T &val, const char *path=nullptr) const
Definition peglib.h:5439
bool parse(std::string_view sv, std::any &dt, const char *path=nullptr) const
Definition peglib.h:5464
parser(std::string_view sv, std::string_view start={})
Definition peglib.h:5381
void set_logger(std::function< void(size_t line, size_t col, const std::string &msg)> log)
Definition peglib.h:5571
bool load_grammar(const char *s, size_t n, const Rules &rules, std::string_view start={})
Definition peglib.h:5396
void enable_trace(TracerEnter tracer_enter, TracerLeave tracer_leave)
Definition peglib.h:5529
bool parse(std::string_view sv, std::any &dt, T &val, const char *path=nullptr) const
Definition peglib.h:5475
bool post_process(const char *s, size_t n, Definition::Result &r) const
Definition peglib.h:5579
std::vector< std::string > get_no_ast_opt_rules() const
Definition peglib.h:5584
bool load_grammar(std::string_view sv, const Rules &rules, std::string_view start={})
Definition peglib.h:5410
bool parse(std::string_view sv, T &val, const char *path=nullptr) const
Definition peglib.h:5470
Definition & operator[](const char *s)
Definition peglib.h:5505
bool parse_n(const char *s, size_t n, const char *path=nullptr) const
Definition peglib.h:5419
bool parse(std::string_view sv, const char *path=nullptr) const
Definition peglib.h:5460
void enable_trace(TracerEnter tracer_enter, TracerLeave tracer_leave, TracerStartOrEnd tracer_start, TracerStartOrEnd tracer_end)
Definition peglib.h:5537
Definition peglib.h:506
Definition filter_string.h:27
std::string escape_characters(const char *s, size_t n)
Definition peglib.h:221
size_t parse_literal(const char *s, size_t n, SemanticValues &vs, Context &c, std::any &dt, const std::string &lit, std::once_flag &init_is_word, bool &is_word, bool ignore_case, const std::string &lower_lit)
Definition peglib.h:2876
static const char * WORD_DEFINITION_NAME
Definition peglib.h:2569
const char * u8(const T *s)
Definition peglib.h:213
std::shared_ptr< Ope > ref(const Grammar &grammar, const std::string &name, const char *s, bool is_macro, const std::vector< std::shared_ptr< Ope > > &args)
Definition peglib.h:1938
size_t encode_codepoint(char32_t cp, char *buff)
Definition peglib.h:114
std::shared_ptr< Ope > cut()
Definition peglib.h:1963
std::shared_ptr< Ope > tok(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1923
std::shared_ptr< Ope > csc(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1914
bool decode_codepoint(const char *s8, size_t l, size_t &bytes, char32_t &cp)
Definition peglib.h:151
size_t codepoint_count(const char *s8, size_t l)
Definition peglib.h:100
std::shared_ptr< Ope > apd(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1867
std::pair< int, size_t > parse_octal_number(const char *s, size_t n, size_t i)
Definition peglib.h:278
std::u32string decode(const char *s8, size_t l)
Definition peglib.h:200
std::pair< size_t, size_t > line_info(const char *start, const char *cur)
Definition peglib.h:474
std::shared_ptr< Ope > rec(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1959
std::function< size_t(const char *s, size_t n, SemanticValues &vs, std::any &dt)> Parser
Definition peglib.h:1668
std::shared_ptr< Ope > cls(const std::string &s)
Definition peglib.h:1888
std::shared_ptr< Ope > wsp(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1944
bool fail(size_t len)
Definition peglib.h:718
T token_to_number_(std::string_view sv)
Definition peglib.h:367
std::shared_ptr< Ope > pre(const std::shared_ptr< Ope > &atom, const std::shared_ptr< Ope > &binop, const PrecedenceClimbing::BinOpeInfo &info, const Definition &rule)
Definition peglib.h:1952
bool is_digit(char c, int &v)
Definition peglib.h:259
std::shared_ptr< Ope > dic(const std::vector< std::string > &v, bool ignore_case)
Definition peglib.h:1875
std::shared_ptr< Ope > liti(std::string &&s)
Definition peglib.h:1884
std::shared_ptr< Ope > ign(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1927
std::unordered_map< std::string, Definition > Grammar
Definition peglib.h:1724
std::function< void( const Ope &ope, const char *s, size_t n, const SemanticValues &vs, const Context &c, const std::any &dt, size_t, std::any &trace_data)> TracerLeave
Definition peglib.h:804
size_t codepoint_length(const char *s8, size_t l)
Definition peglib.h:84
static const char * WHITESPACE_DEFINITION_NAME
Definition peglib.h:2568
std::string resolve_escape_sequence(const char *s, size_t n)
Definition peglib.h:289
std::shared_ptr< Ope > lit(std::string &&s)
Definition peglib.h:1880
std::shared_ptr< Ope > cho4label_(Args &&...args)
Definition peglib.h:1845
std::shared_ptr< Ope > dot()
Definition peglib.h:1912
std::function< void( const Ope &name, const char *s, size_t n, const SemanticValues &vs, const Context &c, const std::any &dt, std::any &trace_data)> TracerEnter
Definition peglib.h:800
std::unordered_map< std::string, std::shared_ptr< Ope > > Rules
Definition peglib.h:4056
void enable_profiling(parser &parser, std::ostream &os)
Definition peglib.h:5663
std::shared_ptr< Ope > chr(char32_t dt)
Definition peglib.h:1908
bool is_hex(char c, int &v)
Definition peglib.h:245
std::string ast_to_s(const std::shared_ptr< T > &ptr, std::function< std::string(const T &ast, int level)> fn=nullptr)
Definition peglib.h:5160
std::function< void(size_t line, size_t col, const std::string &msg, const std::string &rule)> Log
Definition peglib.h:723
std::shared_ptr< Ope > opt(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1858
std::function< void(std::any &trace_data)> TracerStartOrEnd
Definition peglib.h:808
std::shared_ptr< Ope > oom(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1854
std::shared_ptr< Ope > cho(Args &&...args)
Definition peglib.h:1840
std::shared_ptr< Ope > rep(const std::shared_ptr< Ope > &ope, size_t min, size_t max)
Definition peglib.h:1862
constexpr unsigned int str2tag_core(const char *s, size_t l, unsigned int h)
Definition peglib.h:495
std::shared_ptr< Ope > npd(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1871
void ast_to_s_core(const std::shared_ptr< T > &ptr, std::string &s, int level, std::function< std::string(const T &ast, int level)> fn)
Definition peglib.h:5134
constexpr unsigned int str2tag(std::string_view sv)
Definition peglib.h:502
std::shared_ptr< Ope > seq(Args &&...args)
Definition peglib.h:1836
bool success(size_t len)
Definition peglib.h:716
std::shared_ptr< Ope > ncls(const std::string &s)
Definition peglib.h:1898
std::pair< int, size_t > parse_hex_number(const char *s, size_t n, size_t i)
Definition peglib.h:267
std::shared_ptr< Ope > cap(const std::shared_ptr< Ope > &ope, Capture::MatchAction ma)
Definition peglib.h:1918
void add_ast_action(Definition &rule)
Definition peglib.h:5207
AstBase< EmptyType > Ast
Definition filter_string.h:30
static const char * RECOVER_DEFINITION_NAME
Definition peglib.h:2570
std::any call(F fn, Args &&...args)
Definition peglib.h:616
std::shared_ptr< Ope > zom(const std::shared_ptr< Ope > &ope)
Definition peglib.h:1850
std::shared_ptr< Ope > bkr(std::string &&name)
Definition peglib.h:1948
std::shared_ptr< Ope > usr(std::function< size_t(const char *s, size_t n, SemanticValues &vs, std::any &dt)> fn)
Definition peglib.h:1932
std::string to_lower(std::string s)
Definition peglib.h:383
void enable_tracing(parser &parser, std::ostream &os)
Definition peglib.h:5603
#define CPPPEGLIB_HEURISTIC_ERROR_TOKEN_MAX_CHAR_COUNT
Definition peglib.h:15
Definition clipboard_testing.h:11
Definition peglib.h:2058
void visit(Holder &ope) override
Definition peglib.h:3603
std::unordered_map< void *, size_t > ids
Definition peglib.h:2065
Definition peglib.h:5071
const size_t column
Definition peglib.h:5104
AstBase(const AstBase &ast, const char *original_name, size_t position=0, size_t length=0, size_t original_choice_count=0, size_t original_choice=0)
Definition peglib.h:5091
std::weak_ptr< AstBase< EmptyType > > parent
Definition peglib.h:5121
AstBase(const char *path, size_t line, size_t column, const char *name, const std::string_view &token, size_t position=0, size_t length=0, size_t choice_count=0, size_t choice=0)
Definition peglib.h:5082
const std::string name
Definition peglib.h:5106
T token_to_number() const
Definition peglib.h:5128
const bool is_token
Definition peglib.h:5117
const size_t line
Definition peglib.h:5103
size_t length
Definition peglib.h:5108
const unsigned int original_tag
Definition peglib.h:5115
const size_t choice
Definition peglib.h:5110
size_t position
Definition peglib.h:5107
const size_t original_choice_count
Definition peglib.h:5112
const std::string_view token
Definition peglib.h:5118
std::vector< std::shared_ptr< AstBase< EmptyType > > > nodes
Definition peglib.h:5120
const size_t choice_count
Definition peglib.h:5109
const size_t original_choice
Definition peglib.h:5113
const std::string path
Definition peglib.h:5102
std::string token_to_string() const
Definition peglib.h:5123
AstBase(const char *path, size_t line, size_t column, const char *name, const std::vector< std::shared_ptr< AstBase > > &nodes, size_t position=0, size_t length=0, size_t choice_count=0, size_t choice=0)
Definition peglib.h:5072
const unsigned int tag
Definition peglib.h:5114
const std::string original_name
Definition peglib.h:5111
Definition peglib.h:5167
const bool mode_
Definition peglib.h:5200
const std::vector< std::string > rules_
Definition peglib.h:5201
std::shared_ptr< T > optimize(std::shared_ptr< T > original, std::shared_ptr< T > parent=nullptr)
Definition peglib.h:5172
AstOptimizer(bool mode, const std::vector< std::string > &rules={})
Definition peglib.h:5168
Definition peglib.h:2190
void visit(Repetition &ope) override
Definition peglib.h:2209
void visit(BackReference &) override
Definition peglib.h:2219
void visit(Cut &) override
Definition peglib.h:2220
void visit(LiteralString &ope) override
Definition peglib.h:2213
void visit(NotPredicate &) override
Definition peglib.h:2211
bool result
Definition peglib.h:2193
void visit(Dictionary &) override
Definition peglib.h:2212
void visit(User &) override
Definition peglib.h:2217
void visit(Character &) override
Definition peglib.h:2215
void visit(AndPredicate &) override
Definition peglib.h:2210
void visit(Sequence &ope) override
Definition peglib.h:2195
void visit(PrioritizedChoice &ope) override
Definition peglib.h:2202
void visit(AnyCharacter &) override
Definition peglib.h:2216
void visit(CharacterClass &) override
Definition peglib.h:2214
Definition peglib.h:2432
void visit(User &) override
Definition peglib.h:2524
void visit(BackReference &) override
Definition peglib.h:2526
void visit(AndPredicate &) override
Definition peglib.h:2466
void visit(NotPredicate &) override
Definition peglib.h:2467
void visit(Cut &) override
Definition peglib.h:2527
void visit(LiteralString &ope) override
Definition peglib.h:2480
void visit(PrioritizedChoice &ope) override
Definition peglib.h:2453
void visit(Repetition &ope) override
Definition peglib.h:2462
std::unordered_set< std::string > refs_
Definition peglib.h:2532
void visit(Dictionary &ope) override
Definition peglib.h:2468
void visit(AnyCharacter &) override
Definition peglib.h:2523
void visit(CharacterClass &ope) override
Definition peglib.h:2493
FirstSet result_
Definition peglib.h:2529
void visit(Character &ope) override
Definition peglib.h:2516
void visit(Sequence &ope) override
Definition peglib.h:2435
Definition peglib.h:845
std::any val
Definition peglib.h:847
size_t len
Definition peglib.h:846
Definition peglib.h:914
size_t misses
Definition peglib.h:916
size_t hits
Definition peglib.h:915
Definition peglib.h:1009
std::string_view sv_sv
Definition peglib.h:1013
size_t sv_tags_size
Definition peglib.h:1011
size_t sv_tokens_size
Definition peglib.h:1012
size_t capture_size
Definition peglib.h:1016
size_t choice
Definition peglib.h:1015
size_t choice_count
Definition peglib.h:1014
size_t sv_size
Definition peglib.h:1010
Definition peglib.h:2577
bool ret
Definition peglib.h:2578
size_t len
Definition peglib.h:2580
ErrorInfo error_info
Definition peglib.h:2581
bool recovered
Definition peglib.h:2579
Definition peglib.h:2264
void visit(Repetition &ope) override
Definition peglib.h:2290
void visit(PrioritizedChoice &ope) override
Definition peglib.h:2284
DetectInfiniteLoop(std::vector< std::pair< const char *, std::string > > &refs, std::unordered_map< std::string, bool > &has_error_cache)
Definition peglib.h:2274
std::unordered_map< std::string, bool > & has_error_cache_
Definition peglib.h:2311
void visit(Sequence &ope) override
Definition peglib.h:2278
bool has_error
Definition peglib.h:2305
std::vector< std::pair< const char *, std::string > > & refs_
Definition peglib.h:2310
const char * error_s
Definition peglib.h:2306
DetectInfiniteLoop(const char *s, const std::string &name, std::vector< std::pair< const char *, std::string > > &refs, std::unordered_map< std::string, bool > &has_error_cache)
Definition peglib.h:2267
std::string error_name
Definition peglib.h:2307
Definition peglib.h:2132
bool done_
Definition peglib.h:2186
void visit(AnyCharacter &) override
Definition peglib.h:2173
void visit(Sequence &ope) override
Definition peglib.h:2137
void visit(AndPredicate &ope) override
Definition peglib.h:2161
void visit(PrioritizedChoice &ope) override
Definition peglib.h:2148
void visit(BackReference &) override
Definition peglib.h:2176
const char * error_s
Definition peglib.h:2179
void visit(Repetition &ope) override
Definition peglib.h:2157
std::unordered_set< std::string > refs_
Definition peglib.h:2185
void visit(Cut &) override
Definition peglib.h:2177
void visit(Character &) override
Definition peglib.h:2172
std::string name_
Definition peglib.h:2184
void visit(LiteralString &ope) override
Definition peglib.h:2170
void visit(CharacterClass &) override
Definition peglib.h:2171
DetectLeftRecursion(const std::string &name)
Definition peglib.h:2135
void visit(Dictionary &) override
Definition peglib.h:2169
void visit(NotPredicate &ope) override
Definition peglib.h:2165
std::vector< const std::vector< std::shared_ptr< Ope > > * > macro_args_stack_
Definition peglib.h:2187
void visit(User &) override
Definition peglib.h:2174
std::shared_ptr< Ope > resolve_macro_arg(size_t iarg) const
Definition peglib.h:3686
Definition peglib.h:5204
Definition peglib.h:731
std::vector< std::pair< const char *, const Definition * > > expected_tokens
Definition peglib.h:733
std::string replace_all(std::string str, const std::string &from, const std::string &to) const
Definition peglib.h:784
void clear()
Definition peglib.h:740
const char * message_pos
Definition peglib.h:734
int cast_char(char c) const
Definition peglib.h:757
std::string heuristic_error_token(const char *s, size_t n, const char *pos) const
Definition peglib.h:759
const char * last_output_pos
Definition peglib.h:737
void output_log(const Log &log, const char *s, size_t n)
Definition peglib.h:2938
bool keep_previous_token
Definition peglib.h:738
void add(const char *error_literal, const Definition *error_rule)
Definition peglib.h:747
std::string message
Definition peglib.h:735
std::string label
Definition peglib.h:736
const char * error_pos
Definition peglib.h:732
Definition peglib.h:2113
void visit(LiteralString &ope) override
Definition peglib.h:2116
static const char * token(Ope &ope)
Definition peglib.h:2122
void visit(TokenBoundary &ope) override
Definition peglib.h:2117
const char * token_
Definition peglib.h:2129
void visit(Ignore &ope) override
Definition peglib.h:2118
void visit(Recovery &ope) override
Definition peglib.h:2120
Definition peglib.h:2345
const std::vector< std::string > & params_
Definition peglib.h:2426
void visit(Repetition &ope) override
Definition peglib.h:2368
void visit(WeakHolder &ope) override
Definition peglib.h:2405
void visit(Character &ope) override
Definition peglib.h:2387
void visit(CharacterClass &ope) override
Definition peglib.h:2384
void visit(PrioritizedChoice &ope) override
Definition peglib.h:2360
void visit(Ignore &ope) override
Definition peglib.h:2401
void visit(Cut &ope) override
Definition peglib.h:2420
void visit(AnyCharacter &ope) override
Definition peglib.h:2388
void visit(LiteralString &ope) override
Definition peglib.h:2381
void visit(Recovery &ope) override
Definition peglib.h:2416
void visit(Holder &ope) override
Definition peglib.h:2406
const std::vector< std::shared_ptr< Ope > > & args_
Definition peglib.h:2425
void visit(Dictionary &ope) override
Definition peglib.h:2380
void visit(Whitespace &ope) override
Definition peglib.h:2408
void visit(TokenBoundary &ope) override
Definition peglib.h:2397
std::shared_ptr< Ope > found_ope
Definition peglib.h:2422
void visit(NotPredicate &ope) override
Definition peglib.h:2376
FindReference(const std::vector< std::shared_ptr< Ope > > &args, const std::vector< std::string > &params)
Definition peglib.h:2348
void visit(Sequence &ope) override
Definition peglib.h:2352
void visit(AndPredicate &ope) override
Definition peglib.h:2372
void visit(Capture &ope) override
Definition peglib.h:2393
void visit(CaptureScope &ope) override
Definition peglib.h:2389
void visit(PrecedenceClimbing &ope) override
Definition peglib.h:2412
Definition peglib.h:1192
const char * first_literal
Definition peglib.h:1198
void merge(const FirstSet &other)
Definition peglib.h:1202
bool any_char
Definition peglib.h:1197
std::bitset< 256 > chars
Definition peglib.h:1195
bool can_be_empty
Definition peglib.h:1196
const Definition * first_rule
Definition peglib.h:1199
Definition peglib.h:2223
std::string error_name
Definition peglib.h:2253
void visit(Sequence &ope) override
Definition peglib.h:3701
bool is_empty
Definition peglib.h:2251
void visit(Repetition &ope) override
Definition peglib.h:2237
const char * error_s
Definition peglib.h:2252
void visit(NotPredicate &) override
Definition peglib.h:2245
std::vector< std::pair< const char *, std::string > > & refs_
Definition peglib.h:2260
void visit(LiteralString &ope) override
Definition peglib.h:2246
void visit(AndPredicate &) override
Definition peglib.h:2244
std::unordered_map< std::string, bool > & has_error_cache_
Definition peglib.h:2261
void set_error()
Definition peglib.h:2256
void visit(PrioritizedChoice &ope) override
Definition peglib.h:2231
HasEmptyElement(std::vector< std::pair< const char *, std::string > > &refs, std::unordered_map< std::string, bool > &has_error_cache)
Definition peglib.h:2226
Definition peglib.h:2068
void visit(Dictionary &) override
Definition peglib.h:2078
void visit(LiteralString &) override
Definition peglib.h:2079
bool result_
Definition peglib.h:2088
void visit(PrioritizedChoice &ope) override
Definition peglib.h:2071
static bool check(Ope &ope)
Definition peglib.h:2081
Definition peglib.h:1097
std::bitset< 256 > identifier_rest
Definition peglib.h:1099
std::bitset< 256 > identifier_first
Definition peglib.h:1098
size_t max_keyword_len
Definition peglib.h:1103
size_t min_keyword_len
Definition peglib.h:1102
std::vector< std::string > exact_keywords
Definition peglib.h:1100
std::vector< std::string > prefix_keywords
Definition peglib.h:1101
static bool matches_any(const std::vector< std::string > &keywords, std::string_view input)
Definition peglib.h:1105
Definition peglib.h:2332
const std::vector< std::string > & params_
Definition peglib.h:2342
Grammar & grammar_
Definition peglib.h:2341
void visit(Reference &ope) override
Definition peglib.h:4018
LinkReferences(Grammar &grammar, const std::vector< std::string > &params)
Definition peglib.h:2335
Definition peglib.h:1968
virtual void visit(WeakHolder &)
Definition peglib.h:1985
virtual void visit(TokenBoundary &)
Definition peglib.h:1982
virtual void visit(Repetition &)
Definition peglib.h:1972
virtual void visit(Dictionary &)
Definition peglib.h:1975
virtual void visit(Character &)
Definition peglib.h:1978
virtual ~Visitor()
Definition peglib.h:1969
virtual void visit(AndPredicate &)
Definition peglib.h:1973
virtual void visit(LiteralString &)
Definition peglib.h:1976
virtual void visit(Reference &)
Definition peglib.h:1987
virtual void visit(CharacterClass &)
Definition peglib.h:1977
virtual void visit(PrioritizedChoice &)
Definition peglib.h:1971
virtual void visit(Ignore &)
Definition peglib.h:1983
virtual void visit(PrecedenceClimbing &)
Definition peglib.h:1990
virtual void visit(CaptureScope &)
Definition peglib.h:1980
virtual void visit(Sequence &)
Definition peglib.h:1970
virtual void visit(Holder &)
Definition peglib.h:1986
virtual void visit(Capture &)
Definition peglib.h:1981
virtual void visit(NotPredicate &)
Definition peglib.h:1974
virtual void visit(BackReference &)
Definition peglib.h:1989
virtual void visit(Cut &)
Definition peglib.h:1992
virtual void visit(AnyCharacter &)
Definition peglib.h:1979
virtual void visit(Whitespace &)
Definition peglib.h:1988
virtual void visit(Recovery &)
Definition peglib.h:1991
virtual void visit(User &)
Definition peglib.h:1984
Definition peglib.h:4106
Data()
Definition peglib.h:4122
std::vector< std::pair< std::string, const char * > > duplicates_of_definition
Definition peglib.h:4111
bool enablePackratParsing
Definition peglib.h:4120
std::map< std::string, std::vector< Instruction > > instructions
Definition peglib.h:4114
std::string start
Definition peglib.h:4108
std::vector< std::pair< std::string, const char * > > duplicates_of_instruction
Definition peglib.h:4113
const char * start_pos
Definition peglib.h:4109
std::set< std::string_view > captures_in_current_definition
Definition peglib.h:4119
std::vector< std::pair< std::string, const char * > > undefined_back_references
Definition peglib.h:4116
std::shared_ptr< Grammar > grammar
Definition peglib.h:4107
std::vector< std::set< std::string_view > > captures_stack
Definition peglib.h:4117
Definition peglib.h:4100
std::any data
Definition peglib.h:4102
std::string type
Definition peglib.h:4101
std::string_view sv
Definition peglib.h:4103
Definition peglib.h:4060
std::shared_ptr< Grammar > grammar
Definition peglib.h:4061
bool enablePackratParsing
Definition peglib.h:4063
std::string start
Definition peglib.h:4062
Definition peglib.h:2314
std::unordered_set< std::string > referenced
Definition peglib.h:2325
std::unordered_map< std::string, const char * > error_s
Definition peglib.h:2323
const std::vector< std::string > & params_
Definition peglib.h:2329
std::unordered_map< std::string, std::string > error_message
Definition peglib.h:2324
ReferenceChecker(const Grammar &grammar, const std::vector< std::string > &params)
Definition peglib.h:2317
const Grammar & grammar_
Definition peglib.h:2328
void visit(Reference &ope) override
Definition peglib.h:3778
Definition peglib.h:519
std::pair< size_t, size_t > line_info() const
Definition peglib.h:2933
std::string token_to_string(size_t id=0) const
Definition peglib.h:554
std::vector< std::string_view > tokens
Definition peglib.h:545
size_t choice_
Definition peglib.h:609
Context * c_
Definition peglib.h:606
std::string name_
Definition peglib.h:610
friend class Holder
Definition peglib.h:603
friend class Sequence
Definition peglib.h:600
std::string_view token(size_t id=0) const
Definition peglib.h:547
SemanticValues()=default
std::string_view sv() const
Definition peglib.h:528
std::string_view sv_
Definition peglib.h:607
size_t choice() const
Definition peglib.h:542
T token_to_number() const
Definition peglib.h:558
const char * ss
Definition peglib.h:525
size_t choice_count_
Definition peglib.h:608
friend class Dictionary
Definition peglib.h:599
size_t choice_count() const
Definition peglib.h:539
std::vector< T > transform(size_t beg=0, size_t end=static_cast< size_t >(-1)) const
Definition peglib.h:564
const char * path
Definition peglib.h:524
std::vector< unsigned int > tags
Definition peglib.h:533
const std::string & name() const
Definition peglib.h:531
friend class Repetition
Definition peglib.h:602
friend class PrecedenceClimbing
Definition peglib.h:604
SemanticValues(Context *c)
Definition peglib.h:521
friend class Context
Definition peglib.h:598
friend class PrioritizedChoice
Definition peglib.h:601
Definition peglib.h:2535
void setup_keyword_guarded_identifier(Sequence &ope)
Definition peglib.h:3833
void visit(Repetition &ope) override
Definition peglib.h:2553
void visit(Sequence &ope) override
Definition peglib.h:3825
void visit(PrioritizedChoice &ope) override
Definition peglib.h:2541
std::unordered_set< std::string > refs_
Definition peglib.h:2562
Definition peglib.h:2091
bool has_rule_
Definition peglib.h:2110
bool has_token_boundary_
Definition peglib.h:2109
void visit(TokenBoundary &) override
Definition peglib.h:2094
void visit(WeakHolder &) override
Definition peglib.h:2097
void visit(NotPredicate &) override
Definition peglib.h:2096
void visit(AndPredicate &) override
Definition peglib.h:2095
static bool is_token(Ope &ope)
Definition peglib.h:2100
Definition peglib.h:2021
void visit(Recovery &) override
Definition peglib.h:2045
void visit(Cut &) override
Definition peglib.h:2046
void visit(Holder &ope) override
Definition peglib.h:2040
void visit(User &) override
Definition peglib.h:2038
void visit(NotPredicate &) override
Definition peglib.h:2028
void visit(TokenBoundary &) override
Definition peglib.h:2036
void visit(LiteralString &) override
Definition peglib.h:2030
void visit(AnyCharacter &) override
Definition peglib.h:2033
void visit(Whitespace &) override
Definition peglib.h:2042
void visit(WeakHolder &) override
Definition peglib.h:2039
void visit(Repetition &) override
Definition peglib.h:2026
void visit(Character &) override
Definition peglib.h:2032
void visit(CharacterClass &) override
Definition peglib.h:2031
void visit(Reference &) override
Definition peglib.h:2041
static std::string get(Ope &ope)
Definition peglib.h:2048
const char * name_
Definition peglib.h:2055
void visit(Capture &) override
Definition peglib.h:2035
void visit(CaptureScope &) override
Definition peglib.h:2034
void visit(PrecedenceClimbing &) override
Definition peglib.h:2044
void visit(Sequence &) override
Definition peglib.h:2024
void visit(PrioritizedChoice &) override
Definition peglib.h:2025
void visit(Ignore &) override
Definition peglib.h:2037
void visit(AndPredicate &) override
Definition peglib.h:2027
void visit(BackReference &) override
Definition peglib.h:2043
void visit(Dictionary &) override
Definition peglib.h:2029
Definition peglib.h:1995
void visit(TokenBoundary &ope) override
Definition peglib.h:2012
void visit(Recovery &ope) override
Definition peglib.h:2017
void visit(PrioritizedChoice &ope) override
Definition peglib.h:2002
void visit(Capture &ope) override
Definition peglib.h:2011
void visit(Whitespace &ope) override
Definition peglib.h:2016
void visit(PrecedenceClimbing &ope) override
Definition peglib.h:2018
void visit(Repetition &ope) override
Definition peglib.h:2007
void visit(CaptureScope &ope) override
Definition peglib.h:2010
void visit(AndPredicate &ope) override
Definition peglib.h:2008
void visit(Sequence &ope) override
Definition peglib.h:1997
void visit(WeakHolder &ope) override
Definition peglib.h:2014
void visit(NotPredicate &ope) override
Definition peglib.h:2009
void visit(Ignore &ope) override
Definition peglib.h:2013
void visit(Holder &ope) override
Definition peglib.h:2015
Definition peglib.h:452
bool match
Definition peglib.h:454
size_t id
Definition peglib.h:455
bool done
Definition peglib.h:453
Definition peglib.h:630
Definition peglib.h:55
bool execute_on_destruction
Definition peglib.h:77
scope_exit(scope_exit &&rhs)
Definition peglib.h:59
EF exit_function
Definition peglib.h:76
~scope_exit()
Definition peglib.h:65
scope_exit(EF &&f)
Definition peglib.h:56
scope_exit(const scope_exit &)=delete
void operator=(const scope_exit &)=delete
scope_exit & operator=(scope_exit &&)=delete
void release()
Definition peglib.h:69