|
17 | 17 |
|
18 | 18 | from .exceptions import ConfigurationError, assert_config, UnexpectedInput
|
19 | 19 | from .utils import Serialize, SerializeMemoizer, FS, logger, TextOrSlice
|
20 |
| -from .load_grammar import load_grammar, FromPackageLoader, Grammar, verify_used_files, PackageResource, sha256_digest |
| 20 | +from .load_grammar import load_grammar, _deserialize_grammar, FromPackageLoader, Grammar, verify_used_files, PackageResource, sha256_digest |
21 | 21 | from .tree import Tree
|
22 | 22 | from .common import LexerConf, ParserConf, _ParserArgType, _LexerArgType
|
23 | 23 |
|
@@ -56,6 +56,7 @@ class LarkOptions(Serialize):
|
56 | 56 | propagate_positions: Union[bool, str]
|
57 | 57 | maybe_placeholders: bool
|
58 | 58 | cache: Union[bool, str]
|
| 59 | + cache_grammar: bool |
59 | 60 | regex: bool
|
60 | 61 | g_regex_flags: int
|
61 | 62 | keep_all_tokens: bool
|
@@ -99,6 +100,10 @@ class LarkOptions(Serialize):
|
99 | 100 | - When ``False``, does nothing (default)
|
100 | 101 | - When ``True``, caches to a temporary file in the local directory
|
101 | 102 | - When given a string, caches to the path pointed by the string
|
| 103 | + cache_grammar |
| 104 | + For use with ``cache`` option. When ``True``, the unanalyzed grammar is also included in the cache. |
| 105 | + Useful for classes that require the ``Lark.grammar`` to be present (e.g. Reconstructor). |
| 106 | + (default= ``False``) |
102 | 107 | regex
|
103 | 108 | When True, uses the ``regex`` module instead of the stdlib ``re``.
|
104 | 109 | g_regex_flags
|
@@ -165,6 +170,7 @@ class LarkOptions(Serialize):
|
165 | 170 | 'keep_all_tokens': False,
|
166 | 171 | 'tree_class': None,
|
167 | 172 | 'cache': False,
|
| 173 | + 'cache_grammar': False, |
168 | 174 | 'postlex': None,
|
169 | 175 | 'parser': 'earley',
|
170 | 176 | 'lexer': 'auto',
|
@@ -211,6 +217,9 @@ def __init__(self, options_dict: Dict[str, Any]) -> None:
|
211 | 217 | raise ConfigurationError('Cannot specify an embedded transformer when using the Earley algorithm. '
|
212 | 218 | 'Please use your transformer on the resulting parse tree, or use a different algorithm (i.e. LALR)')
|
213 | 219 |
|
| 220 | + if self.cache_grammar and not self.cache: |
| 221 | + raise ConfigurationError('cache_grammar cannot be set when cache is disabled') |
| 222 | + |
214 | 223 | if o:
|
215 | 224 | raise ConfigurationError("Unknown options: %s" % o.keys())
|
216 | 225 |
|
@@ -264,8 +273,12 @@ class Lark(Serialize):
|
264 | 273 | parser: 'ParsingFrontend'
|
265 | 274 | terminals: Collection[TerminalDef]
|
266 | 275 |
|
| 276 | + __serialize_fields__ = ['parser', 'rules', 'options'] |
| 277 | + |
267 | 278 | def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:
|
268 | 279 | self.options = LarkOptions(options)
|
| 280 | + if self.options.cache_grammar: |
| 281 | + self.__serialize_fields__.append('grammar') |
269 | 282 | re_module: types.ModuleType
|
270 | 283 |
|
271 | 284 | # Set regex or re module
|
@@ -327,7 +340,9 @@ def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:
|
327 | 340 | # specific reason - we just want a username.
|
328 | 341 | username = "unknown"
|
329 | 342 |
|
330 |
| - cache_fn = tempfile.gettempdir() + "/.lark_cache_%s_%s_%s_%s.tmp" % (username, cache_sha256, *sys.version_info[:2]) |
| 343 | + |
| 344 | + cache_fn = tempfile.gettempdir() + "/.lark_%s_%s_%s_%s_%s.tmp" % ( |
| 345 | + "cache_grammar" if self.options.cache_grammar else "cache", username, cache_sha256, *sys.version_info[:2]) |
331 | 346 |
|
332 | 347 | old_options = self.options
|
333 | 348 | try:
|
@@ -454,8 +469,6 @@ def __init__(self, grammar: 'Union[Grammar, str, IO[str]]', **options) -> None:
|
454 | 469 | if __doc__:
|
455 | 470 | __doc__ += "\n\n" + LarkOptions.OPTIONS_DOC
|
456 | 471 |
|
457 |
| - __serialize_fields__ = 'parser', 'rules', 'options' |
458 |
| - |
459 | 472 | def _build_lexer(self, dont_ignore: bool=False) -> BasicLexer:
|
460 | 473 | lexer_conf = self.lexer_conf
|
461 | 474 | if dont_ignore:
|
@@ -531,6 +544,8 @@ def _load(self: _T, f: Any, **kwargs) -> _T:
|
531 | 544 |
|
532 | 545 | assert memo_json
|
533 | 546 | memo = SerializeMemoizer.deserialize(memo_json, {'Rule': Rule, 'TerminalDef': TerminalDef}, {})
|
| 547 | + if 'grammar' in data: |
| 548 | + self.grammar = _deserialize_grammar(data['grammar'], memo) |
534 | 549 | options = dict(data['options'])
|
535 | 550 | if (set(kwargs) - _LOAD_ALLOWED_OPTIONS) & set(LarkOptions._defaults):
|
536 | 551 | raise ConfigurationError("Some options are not allowed when loading a Parser: {}"
|
|
0 commit comments