369 lines
11 KiB
Python
369 lines
11 KiB
Python
# This library has been modified by Therefore Games for use with Unprompted.
|
|
import re
|
|
|
|
__version__ = "5.4.0"
|
|
|
|
# Globally-registered handler functions indexed by keyword.
|
|
global_keywords = {}
|
|
|
|
# The set of all end-words for globally-registered block-scoped shortcodes.
|
|
global_endwords = set()
|
|
|
|
global_did_break = False
|
|
global_did_continue = False
|
|
|
|
|
|
# Decorator function for globally registering shortcode handlers.
|
|
def register(keyword, endword=None, preprocessor=None):
|
|
|
|
def register_function(func):
|
|
global_keywords[keyword] = (func, endword, preprocessor)
|
|
if endword:
|
|
global_endwords.add(endword)
|
|
return func
|
|
|
|
return register_function
|
|
|
|
|
|
# ------------------- #
|
|
# Exception Classes #
|
|
# ------------------- #
|
|
|
|
|
|
# Base class for all exceptions raised by the library.
|
|
class ShortcodeError(Exception):
|
|
pass
|
|
|
|
|
|
# Raised if the parser detects invalid shortcode syntax.
|
|
class ShortcodeSyntaxError(ShortcodeError):
|
|
pass
|
|
|
|
|
|
# Raised if a handler function throws an error.
|
|
class ShortcodeRenderingError(ShortcodeError):
|
|
pass
|
|
|
|
|
|
# ----------- #
|
|
# AST Nodes #
|
|
# ----------- #
|
|
|
|
|
|
# Input text is parsed into a tree of Node instances.
|
|
class Node:
|
|
|
|
def __init__(self):
|
|
self.children = []
|
|
|
|
def render(self, context):
|
|
return ''.join(child.render(context) for child in self.children)
|
|
|
|
|
|
# Represents ordinary text not enclosed in tag delimiters.
|
|
class Text(Node):
|
|
|
|
def __init__(self, text):
|
|
self.text = text
|
|
|
|
def render(self, context):
|
|
global global_did_break
|
|
if global_did_break:
|
|
return ""
|
|
return self.text
|
|
|
|
|
|
# Base class for atomic and block-scoped shortcodes.
|
|
class Shortcode(Node):
|
|
|
|
# Regex for parsing the shortcode's arguments.
|
|
re_args = re.compile(r"""
|
|
(?:([^\s'"=]+)=)?
|
|
(
|
|
"((?:[^\\"]|\\.)*)"
|
|
|
|
|
'((?:[^\\']|\\.)*)'
|
|
)
|
|
|
|
|
([^\s'"=]+)=(\S+)
|
|
|
|
|
(\S+)
|
|
""", re.VERBOSE)
|
|
|
|
def __init__(self, token, handler_function, preprocessor=None):
|
|
self.token = token
|
|
self.handler = handler_function
|
|
self.pargs, self.kwargs = self.parse_args(token.text[len(token.keyword):])
|
|
self.children = []
|
|
self.preprocess = preprocessor
|
|
|
|
def parse_args(self, argstring):
|
|
pargs, kwargs = [], {}
|
|
for match in self.re_args.finditer(argstring):
|
|
if match.group(2) or match.group(5):
|
|
key = match.group(1) or match.group(5)
|
|
value = match.group(3) or match.group(4) or match.group(6)
|
|
if key:
|
|
kwargs[key] = value
|
|
else:
|
|
pargs.append(value)
|
|
else:
|
|
pargs.append(match.group(7))
|
|
return pargs, kwargs
|
|
|
|
|
|
# An atomic shortcode is a shortcode with no closing tag.
|
|
class AtomicShortcode(Shortcode):
|
|
|
|
# If the shortcode handler raises an exception we intercept it and wrap it
|
|
# in a ShortcodeRenderingError. The original exception will still be
|
|
# available via the exception's __cause__ attribute.
|
|
def render(self, context):
|
|
global global_did_break, global_did_continue
|
|
if self.token.blocked:
|
|
return self.token.raw_text
|
|
if self.token.keyword == "break":
|
|
global_did_break = True
|
|
elif self.token.keyword == "continue":
|
|
global_did_continue = True
|
|
global_did_break = True
|
|
|
|
if global_did_break:
|
|
return ""
|
|
|
|
try:
|
|
return str(self.handler(self.token.keyword, self.pargs, self.kwargs, context))
|
|
except Exception as ex:
|
|
msg = f"An exception was raised while rendering the "
|
|
msg += f"'{self.token.keyword}' shortcode in line {self.token.line_number}."
|
|
raise ShortcodeRenderingError(msg) from ex
|
|
|
|
def render_preprocess(self, context):
|
|
try:
|
|
return self.preprocess(self.token.keyword, self.pargs, self.kwargs, context)
|
|
except Exception as ex:
|
|
msg = f"An exception was raised while pre-processing the "
|
|
msg += f"'{self.token.keyword}' shortcode in line {self.token.line_number}."
|
|
raise ShortcodeRenderingError(msg) from ex
|
|
|
|
|
|
# A block-scoped shortcode is a shortcode with a closing tag.
|
|
class BlockShortcode(Shortcode):
|
|
|
|
# If the shortcode handler raises an exception we intercept it and wrap it
|
|
# in a ShortcodeRenderingError. The original exception will still be
|
|
# available via the exception's __cause__ attribute.
|
|
def render(self, context):
|
|
global global_did_break
|
|
if self.token.blocked:
|
|
return self.token.raw_text
|
|
elif global_did_break:
|
|
return ""
|
|
content = ''.join(child.render(context) for child in self.children)
|
|
try:
|
|
return str(self.handler(self.token.keyword, self.pargs, self.kwargs, context, content))
|
|
except Exception as ex:
|
|
msg = f"An exception was raised while rendering the "
|
|
msg += f"'{self.token.keyword}' shortcode in line {self.token.line_number}."
|
|
raise ShortcodeRenderingError(msg) from ex
|
|
|
|
def run_preprocess(self, context):
|
|
try:
|
|
return self.preprocess(self.token.keyword, self.pargs, self.kwargs, context)
|
|
except Exception as ex:
|
|
msg = f"An exception was raised while pre-processing the "
|
|
msg += f"'{self.token.keyword}' shortcode in line {self.token.line_number}."
|
|
raise ShortcodeRenderingError(msg) from ex
|
|
|
|
|
|
# -------- #
|
|
# Parser #
|
|
# -------- #
|
|
|
|
|
|
# A Parser instance parses input text and renders shortcodes. A single Parser
|
|
# instance can parse an unlimited number of input strings. Note that the parse()
|
|
# method accepts an optional arbitrary context object which it passes on to each
|
|
# shortcode's handler function.
|
|
#
|
|
# If the `inherit_globals` parameter is true, the parser will inherit a copy of
|
|
# the set of globally-registered shortcodes at the moment of instantiation.
|
|
#
|
|
# If `ignore_unknown` is true, unknown shortcodes are ignored. If this parameter
|
|
# is false (the default), unknown shortcodes cause an error.
|
|
class Parser:
|
|
|
|
def __init__(self, start='[%', end='%]', esc='\\', inherit_globals=True, ignore_unknown=False):
|
|
self.start = start
|
|
self.end = end
|
|
self.esc_start = esc # + start
|
|
self.keywords = global_keywords.copy() if inherit_globals else {}
|
|
self.endwords = global_endwords.copy() if inherit_globals else set()
|
|
self.ignore_unknown = ignore_unknown
|
|
self.blocking_depth = 0
|
|
|
|
def register(self, func, keyword, endword=None, preprocessor=None):
|
|
self.keywords[keyword] = (func, endword, preprocessor)
|
|
if endword:
|
|
self.endwords.add(endword)
|
|
|
|
def parse(self, text, context=None):
|
|
if not self.start in text:
|
|
return text
|
|
|
|
stack = [Node()]
|
|
expecting = []
|
|
self.blocking_depth = 0
|
|
|
|
lexer = Lexer(text, self.start, self.end, self.esc_start)
|
|
for token in lexer.tokenize():
|
|
if self.blocking_depth > 0:
|
|
token.blocked = True
|
|
|
|
if token.type == "TEXT":
|
|
stack[-1].children.append(Text(token.text))
|
|
elif token.keyword in self.keywords:
|
|
# Hardcoded bypass for multiline comments
|
|
if len(expecting) > 0 and "##" in self.keywords and expecting[-1] == self.keywords["##"][1]:
|
|
continue
|
|
|
|
handler, endword, preprocessor = self.keywords[token.keyword]
|
|
if endword:
|
|
node = BlockShortcode(token, handler, preprocessor)
|
|
|
|
if self.blocking_depth:
|
|
self.blocking_depth += 1
|
|
elif preprocessor:
|
|
added_depth = int(node.run_preprocess(context))
|
|
self.blocking_depth += added_depth
|
|
|
|
expecting.append(endword)
|
|
stack[-1].children.append(node)
|
|
if self.blocking_depth < 2:
|
|
stack.append(node)
|
|
else:
|
|
node = AtomicShortcode(token, handler, preprocessor)
|
|
if preprocessor:
|
|
node.render_preprocess(context)
|
|
stack[-1].children.append(node)
|
|
elif token.keyword in self.endwords:
|
|
if len(expecting) == 0:
|
|
msg = f"Unexpected '{token.keyword}' tag in line {token.line_number}."
|
|
raise ShortcodeSyntaxError(msg)
|
|
elif token.keyword == expecting[-1]:
|
|
if self.blocking_depth > 0:
|
|
self.blocking_depth -= 1
|
|
expecting.pop()
|
|
|
|
if self.blocking_depth > 0:
|
|
stack[-1].children.append(Text(token.raw_text))
|
|
else:
|
|
stack.pop()
|
|
|
|
elif token.blocked:
|
|
stack[-1].children.append(Text(token.raw_text))
|
|
else:
|
|
msg = f"Unexpected '{token.keyword}' tag in line {token.line_number}. "
|
|
msg += f"The shortcode parser was expecting a closing '{expecting[-1]}' tag."
|
|
raise ShortcodeSyntaxError(msg)
|
|
elif token.keyword == '':
|
|
msg = f"Empty shortcode tag in line {token.line_number}."
|
|
raise ShortcodeSyntaxError(msg)
|
|
elif self.ignore_unknown:
|
|
stack[-1].children.append(Text(token.raw_text))
|
|
else:
|
|
msg = f"Unrecognised shortcode tag '{token.keyword}' "
|
|
msg += f"in line {token.line_number}."
|
|
raise ShortcodeSyntaxError(msg)
|
|
|
|
if expecting:
|
|
token = stack[-1].token
|
|
msg = f"Unexpected end of document. The shortcode parser was "
|
|
msg += f"expecting a closing '{expecting[-1]}' tag to close the "
|
|
msg += f"'{token.keyword}' tag opened in line {token.line_number}."
|
|
raise ShortcodeSyntaxError(msg)
|
|
|
|
return stack.pop().render(context).replace(self.esc_start, "")
|
|
|
|
|
|
# ------- #
|
|
# Lexer #
|
|
# ------- #
|
|
|
|
|
|
class Token:
|
|
|
|
def __init__(self, token_type, token_text, raw_text, line_number):
|
|
words = token_text.split()
|
|
self.keyword = words[0] if words else ''
|
|
self.type = token_type
|
|
self.text = token_text
|
|
self.raw_text = raw_text
|
|
self.line_number = line_number
|
|
self.blocked = False
|
|
|
|
def __str__(self):
|
|
return f"({self.type}, {repr(self.text)}, {self.line_number})"
|
|
|
|
|
|
class Lexer:
|
|
|
|
def __init__(self, text, start, end, esc_start):
|
|
self.text = text
|
|
self.start = start
|
|
self.end = end
|
|
self.esc_start = esc_start
|
|
self.tokens = []
|
|
self.index = 0
|
|
self.line_number = 1
|
|
|
|
def match(self, target):
|
|
if self.text.startswith(target, self.index):
|
|
return True
|
|
return False
|
|
|
|
def advance(self):
|
|
if self.text[self.index] == '\n':
|
|
self.line_number += 1
|
|
self.index += 1
|
|
|
|
def tokenize(self):
|
|
while self.index < len(self.text):
|
|
if self.match(self.esc_start):
|
|
self.read_escaped_tag_delimiter()
|
|
elif self.match(self.start):
|
|
self.read_tag()
|
|
else:
|
|
self.read_text()
|
|
return self.tokens
|
|
|
|
def read_escaped_tag_delimiter(self):
|
|
next_char = self.text[self.index + 1]
|
|
self.index += 2
|
|
self.tokens.append(Token("TEXT", self.esc_start + next_char, self.esc_start + next_char, self.line_number))
|
|
|
|
def read_tag(self):
|
|
self.index += len(self.start)
|
|
start_index = self.index
|
|
start_line_number = self.line_number
|
|
while self.index < len(self.text):
|
|
if self.match(self.end):
|
|
text = self.text[start_index:self.index].strip()
|
|
raw_text = self.text[start_index - len(self.start):self.index + len(self.end)]
|
|
self.tokens.append(Token("TAG", text, raw_text, start_line_number))
|
|
self.index += len(self.end)
|
|
return
|
|
self.advance()
|
|
msg = f"Unclosed shortcode tag. The tag was opened in line {start_line_number}. Partial text processed: {self.text[start_index:self.index]}"
|
|
raise ShortcodeSyntaxError(msg)
|
|
|
|
def read_text(self):
|
|
start_index = self.index
|
|
start_line_number = self.line_number
|
|
while self.index < len(self.text):
|
|
if self.match(self.esc_start) or self.match(self.start):
|
|
break
|
|
self.advance()
|
|
text = self.text[start_index:self.index]
|
|
self.tokens.append(Token("TEXT", text, text, start_line_number))
|