From eeac21fb347a8f49edadf90d8f9e0c7ed3011a0b Mon Sep 17 00:00:00 2001 From: Stephane Del Pino <stephane.delpino44@gmail.com> Date: Sat, 15 May 2021 19:30:07 +0200 Subject: [PATCH] git subrepo clone --branch=master git@github.com:taocpp/PEGTL.git packages/PEGTL subrepo: subdir: "packages/PEGTL" merged: "c131c2e2a" upstream: origin: "git@github.com:taocpp/PEGTL.git" branch: "master" commit: "c131c2e2a" git-subrepo: version: "0.4.3" origin: "git@github.com:ingydotnet/git-subrepo.git" commit: "2f68596" --- .../PEGTL/.github/workflows/clang-analyze.yml | 2 +- .../PEGTL/.github/workflows/clang-tidy.yml | 2 +- packages/PEGTL/.github/workflows/linux.yml | 56 +- .../PEGTL/.github/workflows/no-exceptions.yml | 4 +- .../PEGTL/.github/workflows/sanitizer.yml | 2 +- packages/PEGTL/.gitrepo | 6 +- packages/PEGTL/README.md | 1 + packages/PEGTL/doc/Actions-and-States.md | 67 +- packages/PEGTL/doc/Changelog.md | 6 + packages/PEGTL/doc/Errors-and-Exceptions.md | 2 +- packages/PEGTL/doc/Grammar-Analysis.md | 2 +- packages/PEGTL/doc/Parse-Tree.md | 5 + packages/PEGTL/doc/README.md | 2 + packages/PEGTL/doc/Rule-Reference.md | 4 +- .../PEGTL/include/tao/pegtl/buffer_input.hpp | 7 +- .../include/tao/pegtl/contrib/analyze.hpp | 4 +- .../include/tao/pegtl/contrib/check_bytes.hpp | 55 ++ .../include/tao/pegtl/contrib/instantiate.hpp | 2 +- .../contrib/internal/set_stack_guard.hpp | 6 +- .../contrib/internal/vector_stack_guard.hpp | 6 +- .../include/tao/pegtl/contrib/limit_bytes.hpp | 88 +++ .../include/tao/pegtl/contrib/limit_depth.hpp | 83 +++ .../PEGTL/include/tao/pegtl/contrib/skip.hpp | 59 -- .../tao/pegtl/internal/file_mapper_posix.hpp | 8 +- .../tao/pegtl/internal/file_mapper_win32.hpp | 12 +- .../tao/pegtl/internal/file_reader.hpp | 4 +- .../include/tao/pegtl/internal/marker.hpp | 12 +- .../include/tao/pegtl/internal/ranges.hpp | 51 +- .../PEGTL/include/tao/pegtl/memory_input.hpp | 29 +- .../PEGTL/include/tao/pegtl/mmap_input.hpp | 8 +- packages/PEGTL/include/tao/pegtl/parse.hpp | 52 +- .../PEGTL/include/tao/pegtl/parse_error.hpp | 5 + .../PEGTL/include/tao/pegtl/read_input.hpp | 4 +- .../PEGTL/include/tao/pegtl/string_input.hpp | 8 +- packages/PEGTL/include/tao/pegtl/version.hpp | 4 +- .../PEGTL/src/example/pegtl/CMakeLists.txt | 2 +- .../PEGTL/src/example/pegtl/expression.cpp | 613 ++++++++++++++++++ .../PEGTL/src/example/pegtl/json_classes.hpp | 4 +- .../PEGTL/src/example/pegtl/json_parse.cpp | 14 +- packages/PEGTL/src/example/pegtl/skipper.cpp | 88 --- .../PEGTL/src/example/pegtl/token_input.cpp | 8 +- packages/PEGTL/src/test/pegtl/CMakeLists.txt | 3 + packages/PEGTL/src/test/pegtl/check_bytes.cpp | 50 ++ .../src/test/pegtl/contrib_instantiate.cpp | 4 +- packages/PEGTL/src/test/pegtl/limit_bytes.cpp | 50 ++ packages/PEGTL/src/test/pegtl/limit_depth.cpp | 50 ++ 46 files changed, 1244 insertions(+), 310 deletions(-) create mode 100644 packages/PEGTL/include/tao/pegtl/contrib/check_bytes.hpp create mode 100644 packages/PEGTL/include/tao/pegtl/contrib/limit_bytes.hpp create mode 100644 packages/PEGTL/include/tao/pegtl/contrib/limit_depth.hpp delete mode 100644 packages/PEGTL/include/tao/pegtl/contrib/skip.hpp create mode 100644 packages/PEGTL/src/example/pegtl/expression.cpp delete mode 100644 packages/PEGTL/src/example/pegtl/skipper.cpp create mode 100644 packages/PEGTL/src/test/pegtl/check_bytes.cpp create mode 100644 packages/PEGTL/src/test/pegtl/limit_bytes.cpp create mode 100644 packages/PEGTL/src/test/pegtl/limit_depth.cpp diff --git a/packages/PEGTL/.github/workflows/clang-analyze.yml b/packages/PEGTL/.github/workflows/clang-analyze.yml index 2226ea7b0..523b10556 100644 --- a/packages/PEGTL/.github/workflows/clang-analyze.yml +++ b/packages/PEGTL/.github/workflows/clang-analyze.yml @@ -4,7 +4,7 @@ on: [push, pull_request] jobs: clang-analyze: - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 diff --git a/packages/PEGTL/.github/workflows/clang-tidy.yml b/packages/PEGTL/.github/workflows/clang-tidy.yml index 41e1003c4..4da8af6ff 100644 --- a/packages/PEGTL/.github/workflows/clang-tidy.yml +++ b/packages/PEGTL/.github/workflows/clang-tidy.yml @@ -4,7 +4,7 @@ on: [push, pull_request] jobs: clang-tidy: - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 diff --git a/packages/PEGTL/.github/workflows/linux.yml b/packages/PEGTL/.github/workflows/linux.yml index ba30d3d98..139982af1 100644 --- a/packages/PEGTL/.github/workflows/linux.yml +++ b/packages/PEGTL/.github/workflows/linux.yml @@ -8,16 +8,14 @@ jobs: fail-fast: false matrix: compiler: - - g++-7 - - g++-8 - g++-9 - g++-10 - - clang++-8 - clang++-9 - clang++-10 + - clang++-11 build_type: [Debug, Release] - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest env: CXX: ${{ matrix.compiler }} @@ -36,22 +34,34 @@ jobs: - working-directory: build/ run: ctest --output-on-failure - linux-gcc-extra: + linux-old: strategy: fail-fast: false matrix: - flags: ["-fno-rtti"] + compiler: + - g++-7 + - g++-8 + - clang++-6.0 + - clang++-7 + - clang++-8 build_type: [Debug, Release] - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest + + env: + CXX: ${{ matrix.compiler }} steps: - uses: actions/checkout@v2 + - run: sudo apt-get update + + - run: sudo apt-get install -y ${{ matrix.compiler }} + - run: cmake -E make_directory build - working-directory: build/ - run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DCMAKE_CXX_FLAGS="${{ matrix.flags }}" + run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} - working-directory: build/ run: cmake --build . @@ -59,17 +69,14 @@ jobs: - working-directory: build/ run: ctest --output-on-failure - linux-clang-extra: + linux-gcc-extra: strategy: fail-fast: false matrix: - flags: ["-fno-rtti", "-fms-extensions"] + flags: ["-fno-rtti"] build_type: [Debug, Release] - runs-on: ubuntu-20.04 - - env: - CXX: clang++ + runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 @@ -85,32 +92,25 @@ jobs: - working-directory: build/ run: ctest --output-on-failure - clang-conanio: + linux-clang-extra: strategy: fail-fast: false matrix: - image: - # List: https://github.com/conan-io/conan-docker-tools - - clang50 - - clang60 - - clang7 - - clang9-x86 - - clang11 + flags: ["-fno-rtti", "-fms-extensions"] build_type: [Debug, Release] - container: - image: conanio/${{ matrix.image }} - options: --user root - runs-on: ubuntu-latest + env: + CXX: clang++ + steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v2 - run: cmake -E make_directory build - working-directory: build/ - run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DCMAKE_CXX_FLAGS="${{ matrix.flags }}" - working-directory: build/ run: cmake --build . diff --git a/packages/PEGTL/.github/workflows/no-exceptions.yml b/packages/PEGTL/.github/workflows/no-exceptions.yml index 3dd3d74b8..eeb964f25 100644 --- a/packages/PEGTL/.github/workflows/no-exceptions.yml +++ b/packages/PEGTL/.github/workflows/no-exceptions.yml @@ -7,10 +7,10 @@ jobs: strategy: fail-fast: false matrix: - compiler: [g++-10, clang++-10] + compiler: [g++, clang++] build_type: [Debug, Release] - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest env: CXX: ${{ matrix.compiler }} diff --git a/packages/PEGTL/.github/workflows/sanitizer.yml b/packages/PEGTL/.github/workflows/sanitizer.yml index 1f478cb06..61d61aebd 100644 --- a/packages/PEGTL/.github/workflows/sanitizer.yml +++ b/packages/PEGTL/.github/workflows/sanitizer.yml @@ -10,7 +10,7 @@ jobs: cxx: [g++, clang++] sanitizer: [address, undefined] - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest env: CXX: ${{ matrix.cxx }} diff --git a/packages/PEGTL/.gitrepo b/packages/PEGTL/.gitrepo index 393af709f..d60e16e83 100644 --- a/packages/PEGTL/.gitrepo +++ b/packages/PEGTL/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = git@github.com:taocpp/PEGTL.git branch = master - commit = 57f8ebe0045d7e35cbb251536146a57bc0cf9db5 - parent = 0a259f7e3e4fe2364b8d45b641c7f48ff3bc7341 - cmdver = 0.4.3 + commit = c131c2e2aad67037285ef39d11ec4f1d28d4fc73 + parent = 2f2fa0e22bd114f44f78c5bee89bc13bd0959d1d method = merge + cmdver = 0.4.3 diff --git a/packages/PEGTL/README.md b/packages/PEGTL/README.md index 64d090c99..e613c97f9 100644 --- a/packages/PEGTL/README.md +++ b/packages/PEGTL/README.md @@ -112,6 +112,7 @@ In appreciation of all contributions here are the people that have [directly con [<img alt="pauloscustodio" src="https://avatars.githubusercontent.com/u/70773" width="120">](https://github.com/pauloscustodio) [<img alt="pleroux0" src="https://avatars.githubusercontent.com/u/39619854" width="120">](https://github.com/pleroux0) [<img alt="quadfault" src="https://avatars.githubusercontent.com/u/30195320" width="120">](https://github.com/quadfault) +[<img alt="quarticcat" src="https://avatars.githubusercontent.com/u/70888415" width="120">](https://github.com/quarticcat) [<img alt="ras0219" src="https://avatars.githubusercontent.com/u/533828" width="120">](https://github.com/ras0219) [<img alt="redmercury" src="https://avatars.githubusercontent.com/u/4424222" width="120">](https://github.com/redmercury) [<img alt="robertcampion" src="https://avatars.githubusercontent.com/u/4220569" width="120">](https://github.com/robertcampion) diff --git a/packages/PEGTL/doc/Actions-and-States.md b/packages/PEGTL/doc/Actions-and-States.md index 951720b7d..4c8101682 100644 --- a/packages/PEGTL/doc/Actions-and-States.md +++ b/packages/PEGTL/doc/Actions-and-States.md @@ -24,6 +24,7 @@ When an action is *applied*, the corresponding function receives the *states*, a * [Changing Actions and States](#changing-actions-and-states) * [Match](#match) * [Nothing](#nothing) +* [Backtracking](#backtracking) * [Troubleshooting](#troubleshooting) * [Boolean Return](#boolean-return) * [State Mismatch](#state-mismatch) @@ -285,7 +286,7 @@ std::string unescape( const std::string& escaped ) At the end of the parsing run, the complete unescaped string can be found in the aptly named variable `unescaped`. -A more complete example of how to unescape strings can be found in `src/examples/pegtl/unescape.cpp`. +A more complete example of how to unescape strings can be found in `src/example/pegtl/unescape.cpp`. ## Specialising @@ -506,8 +507,72 @@ For example when a class `b` is derived from `change_state`, it also gains that At this point `b` is allowed to either have or not have an `apply()` or `apply0()`. By letting `b` also derive from one of the three mentioned classes, the `maybe_nothing` will be ignored and `b` will be checked to have or not have the functions as dictated by the respective additional base class. +## Backtracking + +Sometimes there can be *backtracking* during a parsing run which can lead to Actions being called in places where their effects are undesired. +While it might be intuitively clear what backtracking is, for the purpose of the following discussion we give a slightly more formal definition. + +We speak of *backtracking* across a rule `S` when there is a rule `R` of which `S` is a (direct or indirect) sub-rule and during a parsing run +1. `R` returns local failure after +2. `S` succeeded and its success is a requirement for the success of `R` and +3. it is "still possible" for the top-level grammar rule of the parsing run to succeed. + +In this case the input will have been rewound to the point at which `R` was attempted to match and all effects of `S` on the Input will have been undone, however, and this is the subject of this section, any action attached to `S` will have been already performed without there being an automatic "undo". + +#### The AAC-Problem + +In some cases it is easy to rewrite the grammar in a way that prevents backtracking. +This simultaneously removes the issue of having to undo actions and improves parsing performance. + +The prototypical case for which such a rewrite can be done is `R = sor< seq< A, B >, seq< A, C > >` where `A`, `B` and `C` are arbitrary rules. + +If during a parsing run there are actions attached to `A` and `C`, and the input matches `seq< A, C >` but not `seq< A, B >`, then the action for `A` will be called *twice* before the action for `C`, which gives this problem its "AAC" name, given that what happens is: + +* Begin `sor< seq< A, B >, seq< A, C > >` +* Begin `seq< A, B >` +* Begin `A` +* Success `A` with action called +* Begin `B` +* Failure `B` +* Failure `seq< A, B >` +* Begin `seq< A, C >` +* Begin `A` at the same position as the begin `A` above +* Success `A` with action called again on the same input +* Begin `C` +* Success `C` +* Success `seq< A, C >` +* Success `sor< seq< A, B >, seq< A, C > >` + +#### Rewriting + +In practice the structure of the rule might be more complicated than the pure AAC-problem which will make it harder to recognise the pattern. +One solution is to rewrite `R` as `R' = seq< A, sor< B, C > >` where of course any action for `A` will be called at most once for every successful match of `R'`. + +#### Manual Undo + +Another solution is to undo the effects of the Action attached to `A` in case the encompassing `seq< A, B >` (or `seq< A, C >`) fail. + +The advantage of this approach is that the implementation of the Action for `A` can pretend that is only called when really needed. +The disadvantage is that there is no function on the Action that is called in the case of failure which requires the user to either write a custom `match()` function in the Action for `seq< A, B >` or to implement the `failure()` function in a custom [Control class](Control-and-Debug.md). + +#### Manual Commit + +A further solution is to let the Action for `A` perform its job "to the side", and only "commit" the effects to the target data structure in the Action for `seq< A, B >`. + +For example if the Action attached to `A` takes the matched portion of the Input as `std::string` and appends it to `std::vector< std::string >` one could change said Action for `A` to only fill some temporary string in one of the States, and create an Action for `seq< A, B >` that, after it is called on success of that rule, appends the aforementioned temporary string to the target vector. + +#### Looking Ahead + +When everything else fails and a quick-and-dirty solution to Actions being called too often in the presence of backtracking is required and/or performance is not of prime importance it is relatively easy to solve the problem by employing the infinite look-ahead capability of PEGs. + +When backtracking across `S` is a problem because an Action attached to `S` can be called when `S` succeeds even though there is a higher-up rule `R` that can still fail then simply replace `R` with `seq< at< R >, R >` in the grammar. + +Remembering that `at` disables all Actions explains how this solves the problem; we first verify without Actions that `R` will indeed match at this point and only then match `R` again with Actions enabled. + ## Troubleshooting +The following lists a couple of frequently encountered Action-related errors and how to fix them. + ### Boolean Return Actions returning `bool` are an advanced use case that should be used with caution. diff --git a/packages/PEGTL/doc/Changelog.md b/packages/PEGTL/doc/Changelog.md index d1b57f131..cc194318f 100644 --- a/packages/PEGTL/doc/Changelog.md +++ b/packages/PEGTL/doc/Changelog.md @@ -1,5 +1,11 @@ # Changelog +## 3.2.1 + +**Not yet released** + +* Added an optional limiter to guard against infinite recursion. + ## 3.2.0 Released 2021-01-15 diff --git a/packages/PEGTL/doc/Errors-and-Exceptions.md b/packages/PEGTL/doc/Errors-and-Exceptions.md index f080c0917..7b8f018d8 100644 --- a/packages/PEGTL/doc/Errors-and-Exceptions.md +++ b/packages/PEGTL/doc/Errors-and-Exceptions.md @@ -162,7 +162,7 @@ This is often insufficient and one would like to provide more meaningful error m A practical technique to provide customised error messages for all `must<>` error points uses the `must_if<>` helper. -For an example of this method see `src/examples/pegtl/json_errors.hpp`, where all errors that might occur in the supplied JSON grammar are customised like this: +For an example of this method see `src/example/pegtl/json_errors.hpp`, where all errors that might occur in the supplied JSON grammar are customised like this: ```c++ template< typename > inline constexpr const char* error_message = nullptr; diff --git a/packages/PEGTL/doc/Grammar-Analysis.md b/packages/PEGTL/doc/Grammar-Analysis.md index 964adb9c1..629982d8e 100644 --- a/packages/PEGTL/doc/Grammar-Analysis.md +++ b/packages/PEGTL/doc/Grammar-Analysis.md @@ -56,7 +56,7 @@ This support automatically extends to all custom rules built "the usual way" via For true custom rules, i.e. rules that implement their own `match()` function, the following steps need to be taken for them to work with the grammar analysis. -1. The rule needs a `rule_t` that, usually for true custom rules, is a type alias for the grammar rule itself. +1. The rule needs a [`rule_t`](Meta-Data-and-Visit.md#rule-type) that, usually for true custom rules, is a type alias for the grammar rule itself. 2. There needs to be a specialisation of the `analyze_traits<>` for the custom rule, with an additional first template parameter: Assuming a custom rule like the following diff --git a/packages/PEGTL/doc/Parse-Tree.md b/packages/PEGTL/doc/Parse-Tree.md index 039208294..baa3b24a7 100644 --- a/packages/PEGTL/doc/Parse-Tree.md +++ b/packages/PEGTL/doc/Parse-Tree.md @@ -18,6 +18,7 @@ It provides the basic infrastructure to build a parse tree that * [Transformers](#transformers) * [`tao::pegtl::parse_tree::node`](#taopegtlparse_treenode) * [Custom Node Class](#custom-node-class) +* [Requirements](#requirements) ## Full Parse Tree @@ -235,4 +236,8 @@ struct my_node }; ``` +## Requirements + +The parse tree uses a rule's meta data supplied by [`subs_t`](Meta-Data-and-Visit.md#sub-rules) for internal optimizations. + Copyright (c) 2018-2021 Dr. Colin Hirsch and Daniel Frey diff --git a/packages/PEGTL/doc/README.md b/packages/PEGTL/doc/README.md index 09b6fbcf1..7c028ccae 100644 --- a/packages/PEGTL/doc/README.md +++ b/packages/PEGTL/doc/README.md @@ -44,6 +44,7 @@ * [Changing Actions and States](Actions-and-States.md#changing-actions-and-states) * [Match](Actions-and-States.md#match) * [Nothing](Actions-and-States.md#nothing) + * [Backtracking](Actions-and-States.md#backtracking) * [Troubleshooting](Actions-and-States.md#troubleshooting) * [Boolean Return](Actions-and-States.md#boolean-return) * [State Mismatch](Actions-and-States.md#state-mismatch) @@ -103,6 +104,7 @@ * [Transformer](Parse-Tree.md#transformer) * [`tao::pegtl::parse_tree::node`](Parse-Tree.md#taopegtlparse_treenode) * [Custom Node Class](Parse-Tree.md#custom-node-class) + * [Requirements](Parse-Tree.md#requirements) * [Meta Data and Visit](Meta-Data-and-Visit.md) * [Internals](Meta-Data-and-Visit.md#internals) * [Rule Type](Meta-Data-and-Visit.md#rule-type) diff --git a/packages/PEGTL/doc/Rule-Reference.md b/packages/PEGTL/doc/Rule-Reference.md index a39700edf..79dd46ccf 100644 --- a/packages/PEGTL/doc/Rule-Reference.md +++ b/packages/PEGTL/doc/Rule-Reference.md @@ -100,7 +100,7 @@ These rules are in namespace `tao::pegtl`. * Enables all actions (if any). * [Meta data] and [implementation] mapping: - `enable<>::rule_t` is `internal::success` - - `enable< R >::rule_t` is `internal::enable<, R >` + - `enable< R >::rule_t` is `internal::enable< R >` - `enable< R >::subs_t` is `type_list< R >` - `enable< R... >::rule_t` is `internal::enable< internal::seq< R... > >` - `enable< R... >::subs_t` is `type_list< internal::seq< R... > >` @@ -368,6 +368,8 @@ Note that the `true` template parameter to `internal::if_must` corresponds to th - `rematch< R, S... >::rule_t` is `internal::rematch< R, S... >` - `rematch< R, S... >::subs_t` is `type_list< R, S... >` +Note that the `S` do *not* need to match *all* of the input matched by `R` (which is why `minus` uses `eof` in its implementation). + ###### `rep< Num, R... >` * Matches `seq< R... >` for `Num` times without checking for further matches. diff --git a/packages/PEGTL/include/tao/pegtl/buffer_input.hpp b/packages/PEGTL/include/tao/pegtl/buffer_input.hpp index 0003442b0..60d39d349 100644 --- a/packages/PEGTL/include/tao/pegtl/buffer_input.hpp +++ b/packages/PEGTL/include/tao/pegtl/buffer_input.hpp @@ -66,8 +66,8 @@ namespace TAO_PEGTL_NAMESPACE ~buffer_input() = default; - void operator=( const buffer_input& ) = delete; - void operator=( buffer_input&& ) = delete; + buffer_input& operator=( const buffer_input& ) = delete; + buffer_input& operator=( buffer_input&& ) = delete; [[nodiscard]] bool empty() { @@ -216,6 +216,9 @@ namespace TAO_PEGTL_NAMESPACE iterator_t m_current; char* m_end; const Source m_source; + + public: + std::size_t private_depth = 0; }; } // namespace TAO_PEGTL_NAMESPACE diff --git a/packages/PEGTL/include/tao/pegtl/contrib/analyze.hpp b/packages/PEGTL/include/tao/pegtl/contrib/analyze.hpp index 586710735..0bfbd18c2 100644 --- a/packages/PEGTL/include/tao/pegtl/contrib/analyze.hpp +++ b/packages/PEGTL/include/tao/pegtl/contrib/analyze.hpp @@ -44,8 +44,8 @@ namespace TAO_PEGTL_NAMESPACE ~analyze_cycles_impl() = default; - void operator=( analyze_cycles_impl&& ) = delete; - void operator=( const analyze_cycles_impl& ) = delete; + analyze_cycles_impl& operator=( analyze_cycles_impl&& ) = delete; + analyze_cycles_impl& operator=( const analyze_cycles_impl& ) = delete; [[nodiscard]] std::size_t problems() { diff --git a/packages/PEGTL/include/tao/pegtl/contrib/check_bytes.hpp b/packages/PEGTL/include/tao/pegtl/contrib/check_bytes.hpp new file mode 100644 index 000000000..84333c7d7 --- /dev/null +++ b/packages/PEGTL/include/tao/pegtl/contrib/check_bytes.hpp @@ -0,0 +1,55 @@ +// Copyright (c) 2021 Dr. Colin Hirsch and Daniel Frey +// Please see LICENSE for license or visit https://github.com/taocpp/PEGTL/ + +#ifndef TAO_PEGTL_CONTRIB_CHECK_BYTES_HPP +#define TAO_PEGTL_CONTRIB_CHECK_BYTES_HPP + +#include "../apply_mode.hpp" +#include "../config.hpp" +#include "../match.hpp" +#include "../nothing.hpp" +#include "../rewind_mode.hpp" + +#if defined( __cpp_exceptions ) +#include "../parse_error.hpp" +#else +#include <cstdio> +#include <exception> +#endif + +namespace TAO_PEGTL_NAMESPACE +{ + template< std::size_t Maximum > + struct check_bytes + : maybe_nothing + { + template< typename Rule, + pegtl::apply_mode A, + pegtl::rewind_mode M, + template< typename... > + class Action, + template< typename... > + class Control, + typename ParseInput, + typename... States > + static bool match( ParseInput& in, States&&... st ) + { + const auto* start = in.current(); + if( TAO_PEGTL_NAMESPACE::match< Rule, A, M, Action, Control >( in, st... ) ) { + if( std::size_t( in.current() - start ) > Maximum ) { +#if defined( __cpp_exceptions ) + throw TAO_PEGTL_NAMESPACE::parse_error( "maximum allowed rule consumption exceeded", in ); +#else + std::fputs( "maximum allowed rule consumption exceeded\n", stderr ); + std::terminate(); +#endif + } + return true; + } + return false; + } + }; + +} // namespace TAO_PEGTL_NAMESPACE + +#endif diff --git a/packages/PEGTL/include/tao/pegtl/contrib/instantiate.hpp b/packages/PEGTL/include/tao/pegtl/contrib/instantiate.hpp index c04db3f54..ebf6e7f1c 100644 --- a/packages/PEGTL/include/tao/pegtl/contrib/instantiate.hpp +++ b/packages/PEGTL/include/tao/pegtl/contrib/instantiate.hpp @@ -26,7 +26,7 @@ namespace TAO_PEGTL_NAMESPACE class Control, typename ParseInput, typename... States > - [[nodiscard]] static bool match( ParseInput& in, States&... st ) + [[nodiscard]] static bool match( ParseInput& in, States&&... st ) { const T t( static_cast< const ParseInput& >( in ), st... ); return TAO_PEGTL_NAMESPACE::match< Rule, A, M, Action, Control >( in, st... ); diff --git a/packages/PEGTL/include/tao/pegtl/contrib/internal/set_stack_guard.hpp b/packages/PEGTL/include/tao/pegtl/contrib/internal/set_stack_guard.hpp index 94df74cab..c0071e959 100644 --- a/packages/PEGTL/include/tao/pegtl/contrib/internal/set_stack_guard.hpp +++ b/packages/PEGTL/include/tao/pegtl/contrib/internal/set_stack_guard.hpp @@ -12,7 +12,7 @@ namespace TAO_PEGTL_NAMESPACE::internal { template< typename... Cs > - class set_stack_guard + class [[nodiscard]] set_stack_guard { public: template< typename... Ts > @@ -24,8 +24,8 @@ namespace TAO_PEGTL_NAMESPACE::internal set_stack_guard( set_stack_guard&& ) = delete; set_stack_guard( const set_stack_guard& ) = delete; - void operator=( set_stack_guard&& ) = delete; - void operator=( const set_stack_guard& ) = delete; + set_stack_guard& operator=( set_stack_guard&& ) = delete; + set_stack_guard& operator=( const set_stack_guard& ) = delete; ~set_stack_guard() { diff --git a/packages/PEGTL/include/tao/pegtl/contrib/internal/vector_stack_guard.hpp b/packages/PEGTL/include/tao/pegtl/contrib/internal/vector_stack_guard.hpp index 146da0f1a..4b0cfbe8b 100644 --- a/packages/PEGTL/include/tao/pegtl/contrib/internal/vector_stack_guard.hpp +++ b/packages/PEGTL/include/tao/pegtl/contrib/internal/vector_stack_guard.hpp @@ -12,7 +12,7 @@ namespace TAO_PEGTL_NAMESPACE::internal { template< typename... Cs > - class vector_stack_guard + class [[nodiscard]] vector_stack_guard { public: template< typename... Ts > @@ -25,8 +25,8 @@ namespace TAO_PEGTL_NAMESPACE::internal vector_stack_guard( vector_stack_guard&& ) = delete; vector_stack_guard( const vector_stack_guard& ) = delete; - void operator=( vector_stack_guard&& ) = delete; - void operator=( const vector_stack_guard& ) = delete; + vector_stack_guard& operator=( vector_stack_guard&& ) = delete; + vector_stack_guard& operator=( const vector_stack_guard& ) = delete; ~vector_stack_guard() { diff --git a/packages/PEGTL/include/tao/pegtl/contrib/limit_bytes.hpp b/packages/PEGTL/include/tao/pegtl/contrib/limit_bytes.hpp new file mode 100644 index 000000000..cea99fc6f --- /dev/null +++ b/packages/PEGTL/include/tao/pegtl/contrib/limit_bytes.hpp @@ -0,0 +1,88 @@ +// Copyright (c) 2021 Dr. Colin Hirsch and Daniel Frey +// Please see LICENSE for license or visit https://github.com/taocpp/PEGTL/ + +#ifndef TAO_PEGTL_CONTRIB_LIMIT_BYTES_HPP +#define TAO_PEGTL_CONTRIB_LIMIT_BYTES_HPP + +#include <algorithm> + +#include "../apply_mode.hpp" +#include "../config.hpp" +#include "../match.hpp" +#include "../nothing.hpp" +#include "../rewind_mode.hpp" + +#if defined( __cpp_exceptions ) +#include "../parse_error.hpp" +#else +#include <cstdio> +#include <exception> +#endif + +namespace TAO_PEGTL_NAMESPACE +{ + namespace internal + { + template< std::size_t Maximum, typename MemoryInput > + struct [[nodiscard]] bytes_guard + { + MemoryInput& m_in; + const char* m_end; + + explicit bytes_guard( MemoryInput& in_in ) noexcept + : m_in( in_in ), + m_end( in_in.end() ) + { + m_in.private_set_end( m_in.begin() + std::min( m_in.size(), Maximum ) ); + } + + bytes_guard( bytes_guard&& ) = delete; + bytes_guard( const bytes_guard& ) = delete; + + ~bytes_guard() + { + m_in.private_set_end( m_end ); + } + + bytes_guard& operator=( bytes_guard&& ) = delete; + bytes_guard& operator=( const bytes_guard& ) = delete; + }; + + // C++17 does not allow for partial deduction guides. + + } // namespace internal + + template< std::size_t Maximum > + struct limit_bytes + : maybe_nothing + { + template< typename Rule, + apply_mode A, + rewind_mode M, + template< typename... > + class Action, + template< typename... > + class Control, + typename ParseInput, + typename... States > + [[nodiscard]] static bool match( ParseInput& in, States&&... st ) + { + internal::bytes_guard< Maximum, ParseInput > bg( in ); + if( TAO_PEGTL_NAMESPACE::match< Rule, A, M, Action, Control >( in, st... ) ) { + if( in.empty() && ( bg.m_end != in.current() ) ) { +#if defined( __cpp_exceptions ) + throw TAO_PEGTL_NAMESPACE::parse_error( "maximum allowed rule consumption reached", in ); +#else + std::fputs( "maximum allowed rule consumption reached\n", stderr ); + std::terminate(); +#endif + } + return true; + } + return false; + } + }; + +} // namespace TAO_PEGTL_NAMESPACE + +#endif diff --git a/packages/PEGTL/include/tao/pegtl/contrib/limit_depth.hpp b/packages/PEGTL/include/tao/pegtl/contrib/limit_depth.hpp new file mode 100644 index 000000000..447bebff7 --- /dev/null +++ b/packages/PEGTL/include/tao/pegtl/contrib/limit_depth.hpp @@ -0,0 +1,83 @@ +// Copyright (c) 2021 Dr. Colin Hirsch and Daniel Frey +// Please see LICENSE for license or visit https://github.com/taocpp/PEGTL/ + +#ifndef TAO_PEGTL_CONTRIB_LIMIT_DEPTH_HPP +#define TAO_PEGTL_CONTRIB_LIMIT_DEPTH_HPP + +#include "../apply_mode.hpp" +#include "../config.hpp" +#include "../match.hpp" +#include "../nothing.hpp" +#include "../rewind_mode.hpp" + +#if defined( __cpp_exceptions ) +#include "../parse_error.hpp" +#else +#include <cstdio> +#include <exception> +#endif + +namespace TAO_PEGTL_NAMESPACE +{ + namespace internal + { + struct [[nodiscard]] depth_guard + { + std::size_t& m_depth; + + explicit depth_guard( std::size_t& depth ) noexcept + : m_depth( depth ) + { + ++m_depth; + } + + depth_guard( depth_guard&& ) = delete; + depth_guard( const depth_guard& ) = delete; + + ~depth_guard() + { + --m_depth; + } + + depth_guard& operator=( depth_guard&& ) = delete; + depth_guard& operator=( const depth_guard& ) = delete; + }; + + } // namespace internal + + template< std::size_t Maximum > + struct limit_depth + : maybe_nothing + { + template< typename Rule, + apply_mode A, + rewind_mode M, + template< typename... > + class Action, + template< typename... > + class Control, + typename ParseInput, + typename... States > + [[nodiscard]] static bool match( ParseInput& in, States&&... st ) + { + if constexpr( Control< Rule >::enable ) { + const internal::depth_guard dg( in.private_depth ); + if( in.private_depth > Maximum ) { +#if defined( __cpp_exceptions ) + throw TAO_PEGTL_NAMESPACE::parse_error( "maximum parser rule nesting depth exceeded", in ); +#else + std::fputs( "maximum parser rule nesting depth exceeded\n", stderr ); + std::terminate(); +#endif + } + return TAO_PEGTL_NAMESPACE::match< Rule, A, M, Action, Control >( in, st... ); + } + else { + return TAO_PEGTL_NAMESPACE::match< Rule, A, M, Action, Control >( in, st... ); + } + } + }; + +} // namespace TAO_PEGTL_NAMESPACE + +#endif diff --git a/packages/PEGTL/include/tao/pegtl/contrib/skip.hpp b/packages/PEGTL/include/tao/pegtl/contrib/skip.hpp deleted file mode 100644 index 8aea252e8..000000000 --- a/packages/PEGTL/include/tao/pegtl/contrib/skip.hpp +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright (c) 2021 Dr. Colin Hirsch and Daniel Frey -// Please see LICENSE for license or visit https://github.com/taocpp/PEGTL/ - -#ifndef TAO_PEGTL_CONTRIB_SKIP_HPP -#define TAO_PEGTL_CONTRIB_SKIP_HPP - -#include "../apply_mode.hpp" -#include "../config.hpp" -#include "../match.hpp" -#include "../normal.hpp" -#include "../nothing.hpp" -#include "../rewind_mode.hpp" - -namespace TAO_PEGTL_NAMESPACE -{ - // this is currently experimental and may change at any time - template< typename How, typename Where, template< typename... > class Base = normal > - struct skip - { - template< typename Rule > - struct control - : Base< Rule > - { - template< apply_mode A, - rewind_mode M, - template< typename... > - class Action, - template< typename... > - class Control, - typename ParseInput, - typename... States > - [[nodiscard]] static bool match( ParseInput& in, States&&... st ) - { - // TODO: if we only skip after but not before the actual rule, - // we would not need this marker. - auto m = in.template mark< M >(); - - // TODO: different conditions for before/after skipping? - if( Where::template value< Rule > ) { - // TODO: assert on result to be successful? - (void)TAO_PEGTL_NAMESPACE::match< How, apply_mode::nothing, M, nothing, normal >( in ); - } - - const bool result = Base< Rule >::template match< A, M, Action, Control >( in, st... ); - - // TODO: different conditions for before/after skipping? - if( result && Where::template value< Rule > ) { - // TODO: assert on result to be successful? - (void)TAO_PEGTL_NAMESPACE::match< How, apply_mode::nothing, M, nothing, normal >( in ); - } - - return m( result ); - } - }; - }; - -} // namespace TAO_PEGTL_NAMESPACE - -#endif diff --git a/packages/PEGTL/include/tao/pegtl/internal/file_mapper_posix.hpp b/packages/PEGTL/include/tao/pegtl/internal/file_mapper_posix.hpp index ad8147a7d..0c60e3a4a 100644 --- a/packages/PEGTL/include/tao/pegtl/internal/file_mapper_posix.hpp +++ b/packages/PEGTL/include/tao/pegtl/internal/file_mapper_posix.hpp @@ -38,8 +38,8 @@ namespace TAO_PEGTL_NAMESPACE::internal ::close( m_fd ); } - void operator=( const file_opener& ) = delete; - void operator=( file_opener&& ) = delete; + file_opener& operator=( const file_opener& ) = delete; + file_opener& operator=( file_opener&& ) = delete; [[nodiscard]] std::size_t size() const { @@ -118,8 +118,8 @@ namespace TAO_PEGTL_NAMESPACE::internal ::munmap( const_cast< char* >( m_data ), m_size ); } - void operator=( const file_mapper& ) = delete; - void operator=( file_mapper&& ) = delete; + file_mapper& operator=( const file_mapper& ) = delete; + file_mapper& operator=( file_mapper&& ) = delete; [[nodiscard]] bool empty() const noexcept { diff --git a/packages/PEGTL/include/tao/pegtl/internal/file_mapper_win32.hpp b/packages/PEGTL/include/tao/pegtl/internal/file_mapper_win32.hpp index a8a471fd9..73c27dec9 100644 --- a/packages/PEGTL/include/tao/pegtl/internal/file_mapper_win32.hpp +++ b/packages/PEGTL/include/tao/pegtl/internal/file_mapper_win32.hpp @@ -52,8 +52,8 @@ namespace TAO_PEGTL_NAMESPACE::internal ::CloseHandle( m_handle ); } - void operator=( const file_opener& ) = delete; - void operator=( file_opener&& ) = delete; + file_opener& operator=( const file_opener& ) = delete; + file_opener& operator=( file_opener&& ) = delete; [[nodiscard]] std::size_t size() const { @@ -134,8 +134,8 @@ namespace TAO_PEGTL_NAMESPACE::internal ::CloseHandle( m_handle ); } - void operator=( const win32_file_mapper& ) = delete; - void operator=( win32_file_mapper&& ) = delete; + win32_file_mapper& operator=( const win32_file_mapper& ) = delete; + win32_file_mapper& operator=( win32_file_mapper&& ) = delete; const size_t m_size; const HANDLE m_handle; @@ -202,8 +202,8 @@ namespace TAO_PEGTL_NAMESPACE::internal ::UnmapViewOfFile( LPCVOID( m_data ) ); } - void operator=( const file_mapper& ) = delete; - void operator=( file_mapper&& ) = delete; + file_mapper& operator=( const file_mapper& ) = delete; + file_mapper& operator=( file_mapper&& ) = delete; [[nodiscard]] bool empty() const noexcept { diff --git a/packages/PEGTL/include/tao/pegtl/internal/file_reader.hpp b/packages/PEGTL/include/tao/pegtl/internal/file_reader.hpp index 7b846e1bf..7c2b05494 100644 --- a/packages/PEGTL/include/tao/pegtl/internal/file_reader.hpp +++ b/packages/PEGTL/include/tao/pegtl/internal/file_reader.hpp @@ -78,8 +78,8 @@ namespace TAO_PEGTL_NAMESPACE::internal ~file_reader() = default; - void operator=( const file_reader& ) = delete; - void operator=( file_reader&& ) = delete; + file_reader& operator=( const file_reader& ) = delete; + file_reader& operator=( file_reader&& ) = delete; [[nodiscard]] std::size_t size() const { diff --git a/packages/PEGTL/include/tao/pegtl/internal/marker.hpp b/packages/PEGTL/include/tao/pegtl/internal/marker.hpp index 4beb9293a..a9ffa824f 100644 --- a/packages/PEGTL/include/tao/pegtl/internal/marker.hpp +++ b/packages/PEGTL/include/tao/pegtl/internal/marker.hpp @@ -10,7 +10,7 @@ namespace TAO_PEGTL_NAMESPACE::internal { template< typename Iterator, rewind_mode M > - class marker + class [[nodiscard]] marker { public: static constexpr rewind_mode next_rewind_mode = M; @@ -23,8 +23,8 @@ namespace TAO_PEGTL_NAMESPACE::internal ~marker() = default; - void operator=( const marker& ) = delete; - void operator=( marker&& ) = delete; + marker& operator=( const marker& ) = delete; + marker& operator=( marker&& ) = delete; [[nodiscard]] bool operator()( const bool result ) const noexcept { @@ -33,7 +33,7 @@ namespace TAO_PEGTL_NAMESPACE::internal }; template< typename Iterator > - class marker< Iterator, rewind_mode::required > + class [[nodiscard]] marker< Iterator, rewind_mode::required > { public: static constexpr rewind_mode next_rewind_mode = rewind_mode::active; @@ -53,8 +53,8 @@ namespace TAO_PEGTL_NAMESPACE::internal } } - void operator=( const marker& ) = delete; - void operator=( marker&& ) = delete; + marker& operator=( const marker& ) = delete; + marker& operator=( marker&& ) = delete; [[nodiscard]] bool operator()( const bool result ) noexcept { diff --git a/packages/PEGTL/include/tao/pegtl/internal/ranges.hpp b/packages/PEGTL/include/tao/pegtl/internal/ranges.hpp index 1af27d755..bf57db611 100644 --- a/packages/PEGTL/include/tao/pegtl/internal/ranges.hpp +++ b/packages/PEGTL/include/tao/pegtl/internal/ranges.hpp @@ -6,6 +6,8 @@ #include "../config.hpp" +#include <utility> + #include "bump_help.hpp" #include "enable_control.hpp" #include "failure.hpp" @@ -16,37 +18,12 @@ namespace TAO_PEGTL_NAMESPACE::internal { - template< typename Char, Char... Cs > - struct ranges_impl; - - template< typename Char > - struct ranges_impl< Char > - { - [[nodiscard]] static constexpr bool test( const Char /*unused*/ ) noexcept - { - return false; - } - }; - - template< typename Char, Char Eq > - struct ranges_impl< Char, Eq > - { - [[nodiscard]] static constexpr bool test( const Char c ) noexcept - { - return c == Eq; - } - }; - - template< typename Char, Char Lo, Char Hi, Char... Cs > - struct ranges_impl< Char, Lo, Hi, Cs... > + template< typename Char, Char Lo, Char Hi > + constexpr bool validate_range( Char c ) noexcept { - static_assert( Lo <= Hi, "invalid range detected" ); - - [[nodiscard]] static constexpr bool test( const Char c ) noexcept - { - return ( ( Lo <= c ) && ( c <= Hi ) ) || ranges_impl< Char, Cs... >::test( c ); - } - }; + static_assert( Lo <= Hi, "invalid range" ); + return ( Lo <= c ) && ( c <= Hi ); + } template< typename Peek, typename Peek::data_t... Cs > struct ranges @@ -57,9 +34,21 @@ namespace TAO_PEGTL_NAMESPACE::internal using rule_t = ranges; using subs_t = empty_list; + template< std::size_t... Is > + [[nodiscard]] static constexpr bool test( std::index_sequence< Is... > /*unused*/, const data_t c ) noexcept + { + constexpr const data_t cs[] = { Cs... }; + if constexpr( sizeof...( Cs ) % 2 == 0 ) { + return ( validate_range< data_t, cs[ 2 * Is ], cs[ 2 * Is + 1 ] >( c ) || ... ); + } + else { + return ( validate_range< data_t, cs[ 2 * Is ], cs[ 2 * Is + 1 ] >( c ) || ... ) || ( c == cs[ sizeof...( Cs ) - 1 ] ); + } + } + [[nodiscard]] static constexpr bool test( const data_t c ) noexcept { - return ranges_impl< data_t, Cs... >::test( c ); + return test( std::make_index_sequence< sizeof...( Cs ) / 2 >(), c ); } template< int Eol > diff --git a/packages/PEGTL/include/tao/pegtl/memory_input.hpp b/packages/PEGTL/include/tao/pegtl/memory_input.hpp index 675e8b32f..7cdc9f9f8 100644 --- a/packages/PEGTL/include/tao/pegtl/memory_input.hpp +++ b/packages/PEGTL/include/tao/pegtl/memory_input.hpp @@ -62,8 +62,8 @@ namespace TAO_PEGTL_NAMESPACE ~memory_input_base() = default; - memory_input_base operator=( const memory_input_base& ) = delete; - memory_input_base operator=( memory_input_base&& ) = delete; + memory_input_base& operator=( const memory_input_base& ) = delete; + memory_input_base& operator=( memory_input_base&& ) = delete; [[nodiscard]] const char* current() const noexcept { @@ -124,13 +124,17 @@ namespace TAO_PEGTL_NAMESPACE m_current.byte = in_byte; m_current.line = in_line; m_current.column = in_column; + private_depth = 0; } protected: const char* const m_begin; iterator_t m_current; - const char* const m_end; + const char* m_end; const Source m_source; + + public: + std::size_t private_depth = 0; }; template< typename Eol, typename Source > @@ -160,8 +164,8 @@ namespace TAO_PEGTL_NAMESPACE ~memory_input_base() = default; - memory_input_base operator=( const memory_input_base& ) = delete; - memory_input_base operator=( memory_input_base&& ) = delete; + memory_input_base& operator=( const memory_input_base& ) = delete; + memory_input_base& operator=( memory_input_base&& ) = delete; [[nodiscard]] const char* current() const noexcept { @@ -208,13 +212,17 @@ namespace TAO_PEGTL_NAMESPACE void restart() { m_current = m_begin.data; + private_depth = 0; } protected: const internal::iterator m_begin; iterator_t m_current; - const char* const m_end; + const char* m_end; const Source m_source; + + public: + std::size_t private_depth = 0; }; } // namespace internal @@ -268,8 +276,8 @@ namespace TAO_PEGTL_NAMESPACE ~memory_input() = default; - memory_input operator=( const memory_input& ) = delete; - memory_input operator=( memory_input&& ) = delete; + memory_input& operator=( const memory_input& ) = delete; + memory_input& operator=( memory_input&& ) = delete; [[nodiscard]] const Source& source() const noexcept { @@ -355,6 +363,11 @@ namespace TAO_PEGTL_NAMESPACE const char* b = begin_of_line( p ); return std::string_view( b, static_cast< std::size_t >( end_of_line( p ) - b ) ); } + + void private_set_end( const char* new_end ) noexcept + { + this->m_end = new_end; + } }; template< typename... Ts > diff --git a/packages/PEGTL/include/tao/pegtl/mmap_input.hpp b/packages/PEGTL/include/tao/pegtl/mmap_input.hpp index 1bd4dce19..2362b6c76 100644 --- a/packages/PEGTL/include/tao/pegtl/mmap_input.hpp +++ b/packages/PEGTL/include/tao/pegtl/mmap_input.hpp @@ -42,8 +42,8 @@ namespace TAO_PEGTL_NAMESPACE ~mmap_holder() = default; - void operator=( const mmap_holder& ) = delete; - void operator=( mmap_holder&& ) = delete; + mmap_holder& operator=( const mmap_holder& ) = delete; + mmap_holder& operator=( mmap_holder&& ) = delete; }; } // namespace internal @@ -67,8 +67,8 @@ namespace TAO_PEGTL_NAMESPACE ~mmap_input() = default; - void operator=( const mmap_input& ) = delete; - void operator=( mmap_input&& ) = delete; + mmap_input& operator=( const mmap_input& ) = delete; + mmap_input& operator=( mmap_input&& ) = delete; }; template< typename... Ts > diff --git a/packages/PEGTL/include/tao/pegtl/parse.hpp b/packages/PEGTL/include/tao/pegtl/parse.hpp index 9634655c5..3888fb3fc 100644 --- a/packages/PEGTL/include/tao/pegtl/parse.hpp +++ b/packages/PEGTL/include/tao/pegtl/parse.hpp @@ -4,7 +4,7 @@ #ifndef TAO_PEGTL_PARSE_HPP #define TAO_PEGTL_PARSE_HPP -#include <utility> +#include <type_traits> #include "apply_mode.hpp" #include "config.hpp" @@ -16,17 +16,20 @@ namespace TAO_PEGTL_NAMESPACE { - template< typename Rule, - template< typename... > class Action = nothing, - template< typename... > class Control = normal, - apply_mode A = apply_mode::action, - rewind_mode M = rewind_mode::required, - typename ParseInput, - typename... States > - auto parse( ParseInput&& in, States&&... st ) + namespace internal { - return Control< Rule >::template match< A, M, Action, Control >( in, st... ); - } + [[nodiscard]] inline auto get_position( const position& p ) noexcept( std::is_nothrow_copy_constructible_v< position > ) + { + return p; + } + + template< typename ParseInput > + [[nodiscard]] position get_position( const ParseInput& in ) noexcept( noexcept( position( in.position() ) ) ) + { + return in.position(); + } + + } // namespace internal template< typename Rule, template< typename... > class Action = nothing, @@ -35,46 +38,31 @@ namespace TAO_PEGTL_NAMESPACE rewind_mode M = rewind_mode::required, typename ParseInput, typename... States > - auto parse_nested( position op, ParseInput&& in, States&&... st ) + auto parse( ParseInput&& in, States&&... st ) { -#if defined( __cpp_exceptions ) - try { - return parse< Rule, Action, Control, A, M >( in, st... ); - } - catch( parse_error& e ) { - e.add_position( std::move( op ) ); - throw; - } -#else - (void)op; - return parse< Rule, Action, Control, A, M >( in, st... ); -#endif + return Control< Rule >::template match< A, M, Action, Control >( in, st... ); } - // NOTE: The oi.position() in the version below can be expensive for lazy - // inputs, which is why the version below does not simply call the version - // above with said oi.position() as first parameter. - template< typename Rule, template< typename... > class Action = nothing, template< typename... > class Control = normal, apply_mode A = apply_mode::action, rewind_mode M = rewind_mode::required, - typename OuterInput, + typename Outer, typename ParseInput, typename... States > - auto parse_nested( const OuterInput& oi, ParseInput&& in, States&&... st ) + auto parse_nested( const Outer& o, ParseInput&& in, States&&... st ) { #if defined( __cpp_exceptions ) try { return parse< Rule, Action, Control, A, M >( in, st... ); } catch( parse_error& e ) { - e.add_position( oi.position() ); + e.add_position( internal::get_position( o ) ); throw; } #else - (void)oi; + (void)o; return parse< Rule, Action, Control, A, M >( in, st... ); #endif } diff --git a/packages/PEGTL/include/tao/pegtl/parse_error.hpp b/packages/PEGTL/include/tao/pegtl/parse_error.hpp index f471fb420..df62ce681 100644 --- a/packages/PEGTL/include/tao/pegtl/parse_error.hpp +++ b/packages/PEGTL/include/tao/pegtl/parse_error.hpp @@ -107,6 +107,11 @@ namespace TAO_PEGTL_NAMESPACE } m_impl->add_position( std::move( p ) ); } + + void add_position( const position& p ) + { + add_position( position( p ) ); + } }; } // namespace TAO_PEGTL_NAMESPACE diff --git a/packages/PEGTL/include/tao/pegtl/read_input.hpp b/packages/PEGTL/include/tao/pegtl/read_input.hpp index 7b748d093..019c9cd20 100644 --- a/packages/PEGTL/include/tao/pegtl/read_input.hpp +++ b/packages/PEGTL/include/tao/pegtl/read_input.hpp @@ -42,8 +42,8 @@ namespace TAO_PEGTL_NAMESPACE ~read_input() = default; - void operator=( const read_input& ) = delete; - void operator=( read_input&& ) = delete; + read_input& operator=( const read_input& ) = delete; + read_input& operator=( read_input&& ) = delete; }; template< typename... Ts > diff --git a/packages/PEGTL/include/tao/pegtl/string_input.hpp b/packages/PEGTL/include/tao/pegtl/string_input.hpp index 2d83a6334..43349eb6f 100644 --- a/packages/PEGTL/include/tao/pegtl/string_input.hpp +++ b/packages/PEGTL/include/tao/pegtl/string_input.hpp @@ -30,8 +30,8 @@ namespace TAO_PEGTL_NAMESPACE ~string_holder() = default; - void operator=( const string_holder& ) = delete; - void operator=( string_holder&& ) = delete; + string_holder& operator=( const string_holder& ) = delete; + string_holder& operator=( string_holder&& ) = delete; }; } // namespace internal @@ -52,8 +52,8 @@ namespace TAO_PEGTL_NAMESPACE ~string_input() = default; - void operator=( const string_input& ) = delete; - void operator=( string_input&& ) = delete; + string_input& operator=( const string_input& ) = delete; + string_input& operator=( string_input&& ) = delete; }; template< typename... Ts > diff --git a/packages/PEGTL/include/tao/pegtl/version.hpp b/packages/PEGTL/include/tao/pegtl/version.hpp index 4d460c536..2ffc6b953 100644 --- a/packages/PEGTL/include/tao/pegtl/version.hpp +++ b/packages/PEGTL/include/tao/pegtl/version.hpp @@ -4,10 +4,10 @@ #ifndef TAO_PEGTL_VERSION_HPP #define TAO_PEGTL_VERSION_HPP -#define TAO_PEGTL_VERSION "3.2.0" +#define TAO_PEGTL_VERSION "3.2.1" #define TAO_PEGTL_VERSION_MAJOR 3 #define TAO_PEGTL_VERSION_MINOR 2 -#define TAO_PEGTL_VERSION_PATCH 0 +#define TAO_PEGTL_VERSION_PATCH 1 #endif diff --git a/packages/PEGTL/src/example/pegtl/CMakeLists.txt b/packages/PEGTL/src/example/pegtl/CMakeLists.txt index 46ffeb068..64bb7380d 100644 --- a/packages/PEGTL/src/example/pegtl/CMakeLists.txt +++ b/packages/PEGTL/src/example/pegtl/CMakeLists.txt @@ -8,6 +8,7 @@ set(example_sources csv1.cpp csv2.cpp dynamic_match.cpp + expression.cpp hello_world.cpp indent_aware.cpp json_analyze.cpp @@ -29,7 +30,6 @@ set(example_sources peg2pegtl.cpp proto3.cpp recover.cpp - skipper.cpp s_expression.cpp sum.cpp symbol_table.cpp diff --git a/packages/PEGTL/src/example/pegtl/expression.cpp b/packages/PEGTL/src/example/pegtl/expression.cpp new file mode 100644 index 000000000..5d490f93f --- /dev/null +++ b/packages/PEGTL/src/example/pegtl/expression.cpp @@ -0,0 +1,613 @@ +// Copyright (c) 2021 Dr. Colin Hirsch and Daniel Frey +// Please see LICENSE for license or visit https://github.com/taocpp/PEGTL/ + +#if !defined( __cpp_exceptions ) +#include <iostream> +int main() +{ + std::cerr << "Exception support required, example unavailable." << std::endl; + return 1; +} +#else + +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstring> +#include <iomanip> +#include <iostream> +#include <stdexcept> +#include <tuple> +#include <variant> +#include <vector> + +#include <tao/pegtl.hpp> + +namespace TAO_PEGTL_NAMESPACE::expression +{ + // Expression parsing with prefix, postfix and infix operators, ternary + // operator and a couple of other special cases supported. + + // The handling of operator precedences with left and right binding power is + // based on https://github.com/matklad/minipratt/blob/master/src/bin/pratt.rs + + // It correctly recognises all operators with their precedence and associativity, + // however is still very much work-in-progress regarding a lot of details... + + // TODO: Fix missing whitespace-skip before infix/postfix operators. + // TODO: Decide whether to use must everywhere or nowhere? + // TODO: Decide whether to suppress actions for sub-rules. + // TODO: Finalise the event-style interface or change to fake actions or actions with ops? + // TODO: Decide on where to use config vs. where to use grammar template parameters. + // TODO: Choose customisation points vs. copy-n-paste customisation. + // TODO: Constexpr-ify where possible with C++20. + + namespace internal + { + struct prefix_info + { + prefix_info( const std::string_view n, const std::uint8_t pbp ) noexcept + : name( n ), + prefix_binding_power( pbp ) + { + assert( pbp ); + } + + std::string name; + + std::uint8_t prefix_binding_power; + }; + + struct infix_postfix_info + { + infix_postfix_info( const std::string_view n, const std::uint8_t lbp, const std::uint8_t rbp = 0 ) noexcept + : infix_postfix_info( n, std::string_view(), lbp, rbp ) + {} + + infix_postfix_info( const std::string_view n, const std::string_view o, const std::uint8_t lbp, const std::uint8_t rbp = 0 ) noexcept + : name( n ), + other( o ), + left_binding_power( lbp ), + right_binding_power( rbp ) + { + if( right_binding_power > 0 ) { + assert( std::min( left_binding_power, right_binding_power ) & 1 ); + assert( 2 * std::min( left_binding_power, right_binding_power ) + 1 == left_binding_power + right_binding_power ); + } + assert( left_binding_power > 0 ); + } + + [[nodiscard]] bool is_infix() const noexcept + { + return right_binding_power != 0; + } + + [[nodiscard]] bool is_postfix() const noexcept + { + return right_binding_power == 0; + } + + std::string name; + std::string other; // Used for the ':' of the ternary operator etc. + + std::uint8_t left_binding_power; + std::uint8_t right_binding_power; + }; + + template< typename ParseInput > + [[nodiscard]] bool match_string_view( ParseInput& in, const std::string_view sv ) + { + if( in.size( sv.size() ) >= sv.size() ) { + if( std::memcmp( in.current(), sv.data(), sv.size() ) == 0 ) { + in.bump( sv.size() ); + return true; + } + } + return false; + } + + template< typename ParseInput, typename OperatorInfo > + [[nodiscard]] const OperatorInfo* match_prefix( ParseInput& in, const std::size_t max_length, const std::vector< OperatorInfo >& ops ) + { + const std::size_t max = std::min( max_length, in.size( max_length ) ); + for( std::string op( in.current(), max ); !op.empty(); op.pop_back() ) { + if( const auto i = std::find_if( ops.begin(), ops.end(), [ = ]( const OperatorInfo& info ) { return info.name == op; } ); i != ops.end() ) { + in.bump( op.size() ); + return &*i; + } + } + return nullptr; + } + + template< typename ParseInput, typename OperatorInfo > + [[nodiscard]] const OperatorInfo* match_infix_postfix( ParseInput& in, const std::size_t max_length, const std::vector< OperatorInfo >& ops, const std::uint8_t min_precedence ) + { + const std::size_t max = std::min( max_length, in.size( max_length ) ); + for( std::string op( in.current(), max ); !op.empty(); op.pop_back() ) { + if( const auto i = std::find_if( ops.begin(), ops.end(), [ = ]( const OperatorInfo& info ) { return info.name == op; } ); ( i != ops.end() ) && ( i->left_binding_power >= min_precedence ) ) { + in.bump( op.size() ); + return &*i; + } + } + return nullptr; + } + + template< typename T > + [[nodiscard]] std::vector< T > sorted_operator_vector( const std::initializer_list< T >& t ) + { + std::vector< T > v{ t }; + const auto less = []( const auto& l, const auto& r ) { return l.name < r.name; }; + std::sort( v.begin(), v.end(), less ); + return v; + } + + struct operator_maps + { + // clang-format off + operator_maps() + : prefix( sorted_operator_vector( { + prefix_info( "!", 80 ), + prefix_info( "+", 80 ), + prefix_info( "-", 80 ), + prefix_info( "~", 80 ), + prefix_info( "*", 80 ), + prefix_info( "&", 80 ), + prefix_info( "++", 80 ), + prefix_info( "--", 80 ) + } ) ), + infix_postfix( sorted_operator_vector( { + infix_postfix_info( "::", 99, 100 ), // Special: Followed by identifier (or template-space-identifer, which we don't support yet). + infix_postfix_info( ".*", 37, 38 ), + infix_postfix_info( "->*", 37, 38 ), + infix_postfix_info( "*", 35, 36 ), + infix_postfix_info( "/", 35, 36 ), + infix_postfix_info( "%", 35, 36 ), + infix_postfix_info( "+", 33, 34 ), + infix_postfix_info( "-", 33, 34 ), + infix_postfix_info( "<<", 31, 32 ), + infix_postfix_info( ">>", 31, 32 ), + infix_postfix_info( "<=>", 29, 30 ), + infix_postfix_info( "<", 27, 28 ), + infix_postfix_info( "<=", 27, 28 ), + infix_postfix_info( ">", 27, 28 ), + infix_postfix_info( ">=", 27, 28 ), + infix_postfix_info( "==", 25, 26 ), + infix_postfix_info( "!=", 25, 26 ), + infix_postfix_info( "&", 23, 24 ), + infix_postfix_info( "^", 21, 22 ), + infix_postfix_info( "|", 19, 20 ), + infix_postfix_info( "&&", 17, 18 ), + infix_postfix_info( "||", 15, 16 ), + infix_postfix_info( "?", ":", 14, 13 ), // Special: Ternary operator. + infix_postfix_info( "=", 12, 11 ), + infix_postfix_info( "+=", 12, 11 ), + infix_postfix_info( "-=", 12, 11 ), + infix_postfix_info( "*=", 12, 11 ), + infix_postfix_info( "/=", 12, 11 ), + infix_postfix_info( "%=", 12, 11 ), + infix_postfix_info( "<<=", 12, 11 ), + infix_postfix_info( ">>=", 12, 11 ), + infix_postfix_info( "&=", 12, 11 ), + infix_postfix_info( "^=", 12, 11 ), + infix_postfix_info( "|=", 12, 11 ), + // infix_postfix_info( ",", 9, 10 ), // TODO: Enable, but forbid in function argument list. + infix_postfix_info( "[", "]", 90 ), // Special: Argument list. + infix_postfix_info( "(", ")", 90 ), // Special: Argument list. + infix_postfix_info( ".", 90 ), // Special: Followed by identifier. + infix_postfix_info( "->", 90 ), // Special: Followed by identifier. + infix_postfix_info( "++", 90 ), + infix_postfix_info( "--", 90 ) + } ) ), + max_prefix_length( std::max_element( prefix.begin(), prefix.end(), []( const auto& l, const auto& r ) { return l.name.size() < r.name.size(); } )->name.size() ), + max_infix_postfix_length( std::max_element( infix_postfix.begin(), infix_postfix.end(), []( const auto& l, const auto& r ) { return l.name.size() < r.name.size(); } )->name.size() ) + { + // These are C++20 operators with the correct associativity and relative precedence, however some are still missing: + // TODO: Compound literal (C99), _Alignof (C11), Functional cast, sizeof, co_await, co_yield, throw, new, new[], delete, delete[], C-style casts. + } + // clang-format on + + const std::vector< prefix_info > prefix; + const std::vector< infix_postfix_info > infix_postfix; + + const std::size_t max_prefix_length; + const std::size_t max_infix_postfix_length; + }; + + struct string_view_rule + { + template< apply_mode A, + rewind_mode M, + template< typename... > + class Action, + template< typename... > + class Control, + typename ParseInput > + [[nodiscard]] static bool match( ParseInput& in, const std::string_view sv ) noexcept( noexcept( match_string_view( in, sv ) ) ) + { + return match_string_view( in, sv ); + } + }; + + struct comment + : seq< one< '#' >, until< eolf > > + {}; + + struct ignored + : sor< space, comment > + {}; + + template< typename Literal, typename Identifier > + struct expression; + + template< typename Literal, typename Identifier > + struct bracket_expression + { + template< apply_mode A, + rewind_mode M, + template< typename... > + class Action, + template< typename... > + class Control, + typename ParseInput, + typename Result, + typename Config > + [[nodiscard]] static bool match( ParseInput& in, Result& res, const Config& cfg, const std::uint8_t /*unused*/ ) + { + return Control< if_must< one< '(' >, star< ignored >, expression< Literal, Identifier >, star< ignored >, one< ')' > > >::template match< A, M, Action, Control >( in, res, cfg, 0 ); + } + }; + + template< typename Literal, typename Identifier > + struct prefix_expression + { + template< apply_mode A, + rewind_mode M, + template< typename... > + class Action, + template< typename... > + class Control, + typename ParseInput, + typename Result, + typename Config > + [[nodiscard]] static bool match( ParseInput& in, Result& res, const Config& cfg, const std::uint8_t /*unused*/ ) + { + if( const auto* info = match_prefix( in, cfg.max_prefix_length, cfg.prefix ) ) { + (void)Control< must< star< ignored >, expression< Literal, Identifier > > >::template match< A, M, Action, Control >( in, res, cfg, info->prefix_binding_power ); + if constexpr( A == apply_mode::action ) { + res.prefix( info->name ); + } + return true; + } + return false; + } + }; + + template< typename Literal, typename Identifier > + struct infix_postfix_expression + { + template< apply_mode A, + rewind_mode M, + template< typename... > + class Action, + template< typename... > + class Control, + typename ParseInput, + typename Result, + typename Config > + [[nodiscard]] static bool match( ParseInput& in, Result& res, const Config& cfg, const std::uint8_t min ) + { + if( const auto* info = match_infix_postfix( in, cfg.max_infix_postfix_length, cfg.infix_postfix, min ) ) { + if( info->name == "?" ) { + (void)Control< must< star< ignored >, expression< Literal, Identifier > > >::template match< A, M, Action, Control >( in, res, cfg, 0 ); + (void)Control< must< star< ignored >, string_view_rule > >::template match< A, M, Action, Control >( in, info->other ); + (void)Control< must< star< ignored >, expression< Literal, Identifier > > >::template match< A, M, Action, Control >( in, res, cfg, info->right_binding_power ); + if constexpr( A == apply_mode::action ) { + res.ternary( info->name, info->other ); + } + return true; + } + if( ( info->name == "." ) || ( info->name == "::" ) || ( info->name == "->" ) ) { + (void)Control< must< star< ignored >, Identifier > >::template match< A, M, Action, Control >( in, res, cfg, 0 ); + if constexpr( A == apply_mode::action ) { + res.infix( info->name ); + } + return true; + } + if( ( info->name == "(" ) || ( info->name == "[" ) ) { + const std::size_t size = res.term_stack.size(); // TODO: Determine number of arguments without relying on res!? + (void)Control< must< star< ignored >, opt< list_must< expression< Literal, Identifier >, one< ',' >, ignored > > > >::template match< A, M, Action, Control >( in, res, cfg, 0 ); + (void)Control< must< star< ignored >, string_view_rule > >::template match< A, M, Action, Control >( in, info->other ); + if constexpr( A == apply_mode::action ) { + res.call( info->name, info->other, res.term_stack.size() - size ); + } + return true; + } + if( info->is_infix() ) { + (void)Control< must< star< ignored >, expression< Literal, Identifier > > >::template match< A, M, Action, Control >( in, res, cfg, info->right_binding_power ); + if constexpr( A == apply_mode::action ) { + res.infix( info->name ); + } + return true; + } + if( info->is_postfix() ) { + if constexpr( A == apply_mode::action ) { + res.postfix( info->name ); + } + return true; + } + } + return false; + } + }; + + template< typename Literal, typename Identifier > + struct first_expression + : sor< Literal, Identifier, bracket_expression< Literal, Identifier >, prefix_expression< Literal, Identifier > > + {}; + + template< typename Literal, typename Identifier > + struct expression + : seq< first_expression< Literal, Identifier >, star< infix_postfix_expression< Literal, Identifier > > > + {}; + + } // namespace internal + + template< typename Literal, typename Identifier > + struct grammar + { + using rule_t = grammar; + using subs_t = type_list< internal::expression< Literal, Identifier > >; + + template< apply_mode A, + rewind_mode M, + template< typename... > + class Action, + template< typename... > + class Control, + typename ParseInput, + typename Result > + [[nodiscard]] static bool match( ParseInput& in, Result& res ) + { + const internal::operator_maps cfg; + return match< A, M, Action, Control >( in, res, cfg ); + } + + template< apply_mode A, + rewind_mode M, + template< typename... > + class Action, + template< typename... > + class Control, + typename ParseInput, + typename Result, + typename Config > + [[nodiscard]] static bool match( ParseInput& in, Result& res, const Config& cfg ) + { + return Control< internal::expression< Literal, Identifier > >::template match< A, M, Action, Control >( in, res, cfg, 0 ); + } + }; + +} // namespace TAO_PEGTL_NAMESPACE::expression + +namespace application +{ + namespace pegtl = TAO_PEGTL_NAMESPACE; + + struct term_t; + + using tuple_t = std::tuple< std::string, std::vector< term_t > >; + using variant_t = std::variant< std::int64_t, std::string, tuple_t >; + + struct term_t + { + explicit term_t( const std::int64_t l ) noexcept + : variant( l ) + {} + + explicit term_t( std::string&& s ) noexcept + : variant( std::move( s ) ) + {} + + explicit term_t( variant_t&& v ) noexcept + : variant( std::move( v ) ) + {} + + variant_t variant; + }; + + [[nodiscard]] inline std::string operator+( const char* l, const std::string_view r ) + { + return std::string( l ) + " '" + std::string( r ) + "'"; + } + + struct result + { + void infix( const std::string_view op ) + { + assert( term_stack.size() >= 2 ); + { + variant_t tmp = tuple_t( "infix" + op, { std::move( term_stack.at( term_stack.size() - 2 ) ), std::move( term_stack.at( term_stack.size() - 1 ) ) } ); + term_stack.pop_back(); + term_stack.back().variant = std::move( tmp ); + } + assert( string_stack.size() >= 2 ); + { + std::string tmp = "( " + string_stack.at( string_stack.size() - 2 ) + " " + std::string( op ) + " " + string_stack.at( string_stack.size() - 1 ) + " )"; + string_stack.pop_back(); + string_stack.back() = std::move( tmp ); + } + } + + void prefix( const std::string_view op ) + { + assert( term_stack.size() >= 1 ); // NOLINT(readability-container-size-empty) + { + variant_t tmp = tuple_t( "prefix" + op, { std::move( term_stack.at( term_stack.size() - 1 ) ) } ); + term_stack.back().variant = std::move( tmp ); + } + assert( string_stack.size() >= 1 ); // NOLINT(readability-container-size-empty) + { + std::string tmp = std::string( op ) + "( " + string_stack.at( string_stack.size() - 1 ) + " )"; + string_stack.back() = std::move( tmp ); + } + } + + void postfix( const std::string_view op ) + { + assert( term_stack.size() >= 1 ); // NOLINT(readability-container-size-empty) + { + variant_t tmp = tuple_t( "postfix" + op, { std::move( term_stack.at( term_stack.size() - 1 ) ) } ); + term_stack.back().variant = std::move( tmp ); + } + assert( string_stack.size() >= 1 ); // NOLINT(readability-container-size-empty) + { + std::string tmp = "( " + string_stack.at( string_stack.size() - 1 ) + " )" + std::string( op ); + string_stack.back() = std::move( tmp ); + } + } + + void ternary( const std::string_view op, const std::string_view o2 ) + { + assert( term_stack.size() >= 2 ); + { + variant_t tmp = tuple_t( "ternary", { std::move( term_stack.at( term_stack.size() - 3 ) ), std::move( term_stack.at( term_stack.size() - 2 ) ), std::move( term_stack.at( term_stack.size() - 1 ) ) } ); + term_stack.pop_back(); + term_stack.pop_back(); + term_stack.back().variant = std::move( tmp ); + } + assert( string_stack.size() >= 2 ); + { + std::string tmp = "( " + string_stack.at( string_stack.size() - 3 ) + " " + std::string( op ) + " " + string_stack.at( string_stack.size() - 2 ) + " " + std::string( o2 ) + " " + string_stack.at( string_stack.size() - 1 ) + " )"; + string_stack.pop_back(); + string_stack.pop_back(); + string_stack.back() = std::move( tmp ); + } + } + + void call( const std::string_view op, const std::string_view o2, const std::size_t args ) + { + assert( term_stack.size() > args ); + { + variant_t tmp = tuple_t( "call '" + std::string( op ) + std::string( o2 ) + "'", std::vector< term_t >( term_stack.end() - args - 1, term_stack.end() ) ); + for( std::size_t i = 0; i < args; ++i ) { + term_stack.pop_back(); + } + term_stack.back().variant = ( std::move( tmp ) ); + } + assert( string_stack.size() > args ); + { + std::string tmp = *( string_stack.end() - args - 1 ) + std::string( op ) + " "; + for( std::size_t i = 0; i < args; ++i ) { + if( i > 0 ) { + tmp += ", "; + } + tmp += *( string_stack.end() - args + i ); + } + tmp += " " + std::string( o2 ); + string_stack.resize( string_stack.size() - args ); + string_stack.back() = std::move( tmp ); + } + } + + void number( const std::int64_t l ) + { + term_stack.emplace_back( l ); + string_stack.emplace_back( std::to_string( l ) ); + } + + void identifier( const std::string& id ) + { + term_stack.emplace_back( id ); + string_stack.emplace_back( id ); + } + + std::vector< term_t > term_stack; + std::vector< std::string > string_stack; + }; + + inline std::ostream& operator<<( std::ostream& o, const term_t& t ); + + inline std::ostream& operator<<( std::ostream& o, const tuple_t& t ) + { + o << "{ " << std::get< 0 >( t ); + for( const auto& res : std::get< 1 >( t ) ) { + o << " " << res; + } + o << " }"; + return o; + } + + inline std::ostream& operator<<( std::ostream& o, const variant_t& v ) + { + std::visit( [ & ]( const auto& t ) { o << t; }, v ); + return o; + } + + inline std::ostream& operator<<( std::ostream& o, const term_t& t ) + { + o << t.variant; + return o; + } + + struct literal + : pegtl::plus< pegtl::digit > + {}; + + struct grammar + : pegtl::must< pegtl::expression::grammar< literal, pegtl::identifier >, pegtl::eof > + {}; + + template< typename Rule > + struct action + : pegtl::nothing< Rule > + {}; + + template<> + struct action< literal > + { + template< typename Input, typename... States > + static void apply( const Input& in, result& res, States&&... /*unused*/ ) + { + res.number( std::stoll( in.string() ) ); + } + }; + + template<> + struct action< pegtl::identifier > + { + template< typename Input, typename... States > + static void apply( const Input& in, result& res, States&&... /*unused*/ ) + { + res.identifier( in.string() ); + } + }; + +} // namespace application + +int main( int argc, char** argv ) +{ + // if( TAO_PEGTL_NAMESPACE::analyze< application::grammar >() != 0 ) { + // return 1; + // } + for( int i = 1; i < argc; ++i ) { + TAO_PEGTL_NAMESPACE::argv_input in( argv, i ); + try { + application::result res; + TAO_PEGTL_NAMESPACE::parse< application::grammar, application::action >( in, res ); + std::cout << "Input: " << argv[ i ] << std::endl; + assert( res.term_stack.size() == 1 ); + assert( res.string_stack.size() == 1 ); + std::cout << "Result: " << res.string_stack.at( 0 ) << std::endl; + std::cout << "Result: " << res.term_stack.at( 0 ) << std::endl; + } + catch( const TAO_PEGTL_NAMESPACE::parse_error& e ) { + const auto p = e.positions().front(); + std::cerr << e.what() << '\n' + << in.line_at( p ) << '\n' + << std::setw( p.column ) << '^' << '\n'; + } + } + return 0; +} + +#endif diff --git a/packages/PEGTL/src/example/pegtl/json_classes.hpp b/packages/PEGTL/src/example/pegtl/json_classes.hpp index c9f26de16..3e69515b0 100644 --- a/packages/PEGTL/src/example/pegtl/json_classes.hpp +++ b/packages/PEGTL/src/example/pegtl/json_classes.hpp @@ -35,8 +35,8 @@ namespace example json_base( const json_base& ) = delete; json_base( json_base&& ) = delete; - void operator=( const json_base& ) = delete; - void operator=( json_base&& ) = delete; + json_base& operator=( const json_base& ) = delete; + json_base& operator=( json_base&& ) = delete; virtual void stream( std::ostream& ) const = 0; diff --git a/packages/PEGTL/src/example/pegtl/json_parse.cpp b/packages/PEGTL/src/example/pegtl/json_parse.cpp index f3c572d12..240a8898c 100644 --- a/packages/PEGTL/src/example/pegtl/json_parse.cpp +++ b/packages/PEGTL/src/example/pegtl/json_parse.cpp @@ -6,6 +6,7 @@ #include <tao/pegtl.hpp> #include <tao/pegtl/contrib/json.hpp> +#include <tao/pegtl/contrib/limit_depth.hpp> #include <tao/pegtl/contrib/trace.hpp> #include "json_errors.hpp" @@ -16,6 +17,15 @@ namespace example { using grammar = pegtl::seq< pegtl::json::text, pegtl::eof >; + template< typename > + struct action + {}; + + template<> + struct action< pegtl::json::value > + : pegtl::limit_depth< 42 > + {}; + } // namespace example int main( int argc, char** argv ) // NOLINT(bugprone-exception-escape) @@ -30,7 +40,7 @@ int main( int argc, char** argv ) // NOLINT(bugprone-exception-escape) pegtl::argv_input in( argv, 1 ); #if defined( __cpp_exceptions ) try { - pegtl::parse< example::grammar, pegtl::nothing, example::control >( in ); + pegtl::parse< example::grammar, example::action, example::control >( in ); } catch( const pegtl::parse_error& e ) { const auto p = e.positions().front(); @@ -40,7 +50,7 @@ int main( int argc, char** argv ) // NOLINT(bugprone-exception-escape) return 1; } #else - if( !pegtl::parse< example::grammar, pegtl::nothing, example::control >( in ) ) { + if( !pegtl::parse< example::grammar, example::action, example::control >( in ) ) { std::cerr << "error occurred" << std::endl; return 1; } diff --git a/packages/PEGTL/src/example/pegtl/skipper.cpp b/packages/PEGTL/src/example/pegtl/skipper.cpp deleted file mode 100644 index fd11200bb..000000000 --- a/packages/PEGTL/src/example/pegtl/skipper.cpp +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright (c) 2021 Dr. Colin Hirsch and Daniel Frey -// Please see LICENSE for license or visit https://github.com/taocpp/PEGTL/ - -#include <iostream> -#include <string> - -#include <tao/pegtl.hpp> -#include <tao/pegtl/contrib/skip.hpp> - -namespace pegtl = TAO_PEGTL_NAMESPACE; - -namespace demo -{ - // define your grammar without concerning yourself with skipping whitespace - - // clang-format off - struct key : pegtl::identifier {}; - struct value : pegtl::identifier {}; - struct assign : pegtl::one< '=' > {}; - struct grammar : pegtl::seq< key, assign, value, pegtl::eof > {}; - // clang-format on - - // define your actions as usual - - template< typename Rule > - struct action - {}; - - template<> - struct action< key > - { - template< typename ActionInput > - static void apply( const ActionInput& in ) - { - std::cout << "key: '" << in.string() << "'\n"; - } - }; - - template<> - struct action< value > - { - template< typename ActionInput > - static void apply( const ActionInput& in ) - { - std::cout << "value: '" << in.string() << "'\n"; - } - }; - - // now specify how and where to skip whitespace - - using skip_how = pegtl::star< pegtl::sor< pegtl::space, pegtl::eol > >; - - // clang-format off - template< typename > inline constexpr bool where = false; - template<> inline constexpr bool where< key > = true; - template<> inline constexpr bool where< value > = true; - // clang-format on - - // as 'skip<>' can not take 'where' as a template parameter directly, we need to wrap it. - struct skip_where - { - // when to skip - template< typename Rule > - static constexpr auto value = where< Rule >; - }; - - template< typename Rule > - using control = pegtl::skip< skip_how, skip_where >::control< Rule >; - - // The above is a first step, the helper (skip<>) may change in the future - // to allow separate skipping before and after and also support different skip - // rules for each rule/set-of-rules. Also, a more convenient way to specify - // the whole thing. - -} // namespace demo - -int main( int argc, char** argv ) // NOLINT(bugprone-exception-escape) -{ - if( argc > 1 ) { - pegtl::argv_input in( argv, 1 ); - if( pegtl::parse< demo::grammar, demo::action, demo::control >( in ) ) { - std::cout << "success!" << std::endl; - } - else { - std::cerr << "failure." << std::endl; - } - } -} diff --git a/packages/PEGTL/src/example/pegtl/token_input.cpp b/packages/PEGTL/src/example/pegtl/token_input.cpp index 6f03dad7e..7d2795a3f 100644 --- a/packages/PEGTL/src/example/pegtl/token_input.cpp +++ b/packages/PEGTL/src/example/pegtl/token_input.cpp @@ -35,8 +35,8 @@ namespace TAO_PEGTL_NAMESPACE ~token_action_input() = default; - token_action_input operator=( const token_action_input& ) = delete; - token_action_input operator=( token_action_input&& ) = delete; + token_action_input& operator=( const token_action_input& ) = delete; + token_action_input& operator=( token_action_input&& ) = delete; [[nodiscard]] const iterator_t& iterator() const noexcept { @@ -101,8 +101,8 @@ namespace TAO_PEGTL_NAMESPACE ~token_parse_input() = default; - token_parse_input operator=( const token_parse_input& ) = delete; - token_parse_input operator=( token_parse_input&& ) = delete; + token_parse_input& operator=( const token_parse_input& ) = delete; + token_parse_input& operator=( token_parse_input&& ) = delete; void discard() const noexcept {} diff --git a/packages/PEGTL/src/test/pegtl/CMakeLists.txt b/packages/PEGTL/src/test/pegtl/CMakeLists.txt index 889b0520a..6bfacb4a7 100644 --- a/packages/PEGTL/src/test/pegtl/CMakeLists.txt +++ b/packages/PEGTL/src/test/pegtl/CMakeLists.txt @@ -23,6 +23,7 @@ set(test_sources change_action_and_states.cpp change_state.cpp change_states.cpp + check_bytes.cpp contains.cpp contrib_alphabet.cpp contrib_analyze.cpp @@ -65,6 +66,8 @@ set(test_sources internal_endian.cpp internal_file_mapper.cpp internal_file_opener.cpp + limit_bytes.cpp + limit_depth.cpp parse_error.cpp pegtl_string_t.cpp position.cpp diff --git a/packages/PEGTL/src/test/pegtl/check_bytes.cpp b/packages/PEGTL/src/test/pegtl/check_bytes.cpp new file mode 100644 index 000000000..b773abed9 --- /dev/null +++ b/packages/PEGTL/src/test/pegtl/check_bytes.cpp @@ -0,0 +1,50 @@ +// Copyright (c) 2021 Dr. Colin Hirsch and Daniel Frey +// Please see LICENSE for license or visit https://github.com/taocpp/PEGTL/ + +#include <tao/pegtl/contrib/check_bytes.hpp> + +#include "test.hpp" + +namespace TAO_PEGTL_NAMESPACE +{ + struct test_rule + : star< alpha > + {}; + + struct test_grammar + : seq< test_rule, eof > + {}; + + template< typename Rule > + struct test_action + : nothing< Rule > + {}; + + template<> + struct test_action< test_rule > + : check_bytes< 5 > + {}; + + void unit_test() + { + memory_input<> i1( "aaa", __FUNCTION__ ); + const auto r1 = pegtl::parse< test_grammar >( i1 ); + TAO_PEGTL_TEST_ASSERT( r1 ); + + memory_input<> i2( "aaaaaaaaaaa", __FUNCTION__ ); + const auto r2 = pegtl::parse< test_grammar >( i2 ); + TAO_PEGTL_TEST_ASSERT( r2 ); + + memory_input<> i3( "aaa", __FUNCTION__ ); + const auto r3 = pegtl::parse< test_grammar, test_action >( i3 ); + TAO_PEGTL_TEST_ASSERT( r3 ); + +#if defined( __cpp_exceptions ) + memory_input<> i4( "aaaaaaaaaaa", __FUNCTION__ ); + TAO_PEGTL_TEST_THROWS( pegtl::parse< test_grammar, test_action >( i4 ) ); +#endif + } + +} // namespace TAO_PEGTL_NAMESPACE + +#include "main.hpp" diff --git a/packages/PEGTL/src/test/pegtl/contrib_instantiate.cpp b/packages/PEGTL/src/test/pegtl/contrib_instantiate.cpp index 5c6c268cb..fc85dec75 100644 --- a/packages/PEGTL/src/test/pegtl/contrib_instantiate.cpp +++ b/packages/PEGTL/src/test/pegtl/contrib_instantiate.cpp @@ -32,8 +32,8 @@ namespace TAO_PEGTL_NAMESPACE dtor = true; } - void operator=( test_class&& ) = delete; - void operator=( const test_class& ) = delete; + test_class& operator=( test_class&& ) = delete; + test_class& operator=( const test_class& ) = delete; }; using test_grammar = sor< alpha, digit >; diff --git a/packages/PEGTL/src/test/pegtl/limit_bytes.cpp b/packages/PEGTL/src/test/pegtl/limit_bytes.cpp new file mode 100644 index 000000000..94e66c3f6 --- /dev/null +++ b/packages/PEGTL/src/test/pegtl/limit_bytes.cpp @@ -0,0 +1,50 @@ +// Copyright (c) 2021 Dr. Colin Hirsch and Daniel Frey +// Please see LICENSE for license or visit https://github.com/taocpp/PEGTL/ + +#include <tao/pegtl/contrib/limit_bytes.hpp> + +#include "test.hpp" + +namespace TAO_PEGTL_NAMESPACE +{ + struct test_rule + : star< alpha > + {}; + + struct test_grammar + : seq< test_rule, eof > + {}; + + template< typename Rule > + struct test_action + : nothing< Rule > + {}; + + template<> + struct test_action< test_rule > + : limit_bytes< 5 > + {}; + + void unit_test() + { + memory_input<> i1( "aaa", __FUNCTION__ ); + const auto r1 = pegtl::parse< test_grammar >( i1 ); + TAO_PEGTL_TEST_ASSERT( r1 ); + + memory_input<> i2( "aaaaaaaaaaa", __FUNCTION__ ); + const auto r2 = pegtl::parse< test_grammar >( i2 ); + TAO_PEGTL_TEST_ASSERT( r2 ); + + memory_input<> i3( "aaa", __FUNCTION__ ); + const auto r3 = pegtl::parse< test_grammar, test_action >( i3 ); + TAO_PEGTL_TEST_ASSERT( r3 ); + +#if defined( __cpp_exceptions ) + memory_input<> i4( "aaaaaaaaaaa", __FUNCTION__ ); + TAO_PEGTL_TEST_THROWS( pegtl::parse< test_grammar, test_action >( i4 ) ); +#endif + } + +} // namespace TAO_PEGTL_NAMESPACE + +#include "main.hpp" diff --git a/packages/PEGTL/src/test/pegtl/limit_depth.cpp b/packages/PEGTL/src/test/pegtl/limit_depth.cpp new file mode 100644 index 000000000..8fb1b2713 --- /dev/null +++ b/packages/PEGTL/src/test/pegtl/limit_depth.cpp @@ -0,0 +1,50 @@ +// Copyright (c) 2021 Dr. Colin Hirsch and Daniel Frey +// Please see LICENSE for license or visit https://github.com/taocpp/PEGTL/ + +#include <tao/pegtl/contrib/limit_depth.hpp> + +#include "test.hpp" + +namespace TAO_PEGTL_NAMESPACE +{ + struct test_recursive + : seq< alpha, opt< test_recursive > > + {}; + + struct test_grammar + : seq< test_recursive, eof > + {}; + + template< typename Rule > + struct test_action + : nothing< Rule > + {}; + + template<> + struct test_action< test_recursive > + : limit_depth< 5 > + {}; + + void unit_test() + { + memory_input<> i1( "aaa", __FUNCTION__ ); + const auto r1 = pegtl::parse< test_grammar >( i1 ); + TAO_PEGTL_TEST_ASSERT( r1 ); + + memory_input<> i2( "aaaaaaaaaaa", __FUNCTION__ ); + const auto r2 = pegtl::parse< test_grammar >( i2 ); + TAO_PEGTL_TEST_ASSERT( r2 ); + + memory_input<> i3( "aaa", __FUNCTION__ ); + const auto r3 = pegtl::parse< test_grammar, test_action >( i3 ); + TAO_PEGTL_TEST_ASSERT( r3 ); + +#if defined( __cpp_exceptions ) + memory_input<> i4( "aaaaaaaaaaa", __FUNCTION__ ); + TAO_PEGTL_TEST_THROWS( pegtl::parse< test_grammar, test_action >( i4 ) ); +#endif + } + +} // namespace TAO_PEGTL_NAMESPACE + +#include "main.hpp" -- GitLab