unprompted/lib_unprompted/shortcodes.py

369 lines
11 KiB
Python

# This library has been modified by Therefore Games for use with Unprompted.
import re
__version__ = "5.4.0"
# Globally-registered handler functions indexed by keyword.
global_keywords = {}
# The set of all end-words for globally-registered block-scoped shortcodes.
global_endwords = set()
global_did_break = False
global_did_continue = False
# Decorator function for globally registering shortcode handlers.
def register(keyword, endword=None, preprocessor=None):
def register_function(func):
global_keywords[keyword] = (func, endword, preprocessor)
if endword:
global_endwords.add(endword)
return func
return register_function
# ------------------- #
# Exception Classes #
# ------------------- #
# Base class for all exceptions raised by the library.
class ShortcodeError(Exception):
pass
# Raised if the parser detects invalid shortcode syntax.
class ShortcodeSyntaxError(ShortcodeError):
pass
# Raised if a handler function throws an error.
class ShortcodeRenderingError(ShortcodeError):
pass
# ----------- #
# AST Nodes #
# ----------- #
# Input text is parsed into a tree of Node instances.
class Node:
def __init__(self):
self.children = []
def render(self, context):
return ''.join(child.render(context) for child in self.children)
# Represents ordinary text not enclosed in tag delimiters.
class Text(Node):
def __init__(self, text):
self.text = text
def render(self, context):
global global_did_break
if global_did_break:
return ""
return self.text
# Base class for atomic and block-scoped shortcodes.
class Shortcode(Node):
# Regex for parsing the shortcode's arguments.
re_args = re.compile(r"""
(?:([^\s'"=]+)=)?
(
"((?:[^\\"]|\\.)*)"
|
'((?:[^\\']|\\.)*)'
)
|
([^\s'"=]+)=(\S+)
|
(\S+)
""", re.VERBOSE)
def __init__(self, token, handler_function, preprocessor=None):
self.token = token
self.handler = handler_function
self.pargs, self.kwargs = self.parse_args(token.text[len(token.keyword):])
self.children = []
self.preprocess = preprocessor
def parse_args(self, argstring):
pargs, kwargs = [], {}
for match in self.re_args.finditer(argstring):
if match.group(2) or match.group(5):
key = match.group(1) or match.group(5)
value = match.group(3) or match.group(4) or match.group(6)
if key:
kwargs[key] = value
else:
pargs.append(value)
else:
pargs.append(match.group(7))
return pargs, kwargs
# An atomic shortcode is a shortcode with no closing tag.
class AtomicShortcode(Shortcode):
# If the shortcode handler raises an exception we intercept it and wrap it
# in a ShortcodeRenderingError. The original exception will still be
# available via the exception's __cause__ attribute.
def render(self, context):
global global_did_break, global_did_continue
if self.token.blocked:
return self.token.raw_text
if self.token.keyword == "break":
global_did_break = True
elif self.token.keyword == "continue":
global_did_continue = True
global_did_break = True
if global_did_break:
return ""
try:
return str(self.handler(self.token.keyword, self.pargs, self.kwargs, context))
except Exception as ex:
msg = f"An exception was raised while rendering the "
msg += f"'{self.token.keyword}' shortcode in line {self.token.line_number}."
raise ShortcodeRenderingError(msg) from ex
def render_preprocess(self, context):
try:
return self.preprocess(self.token.keyword, self.pargs, self.kwargs, context)
except Exception as ex:
msg = f"An exception was raised while pre-processing the "
msg += f"'{self.token.keyword}' shortcode in line {self.token.line_number}."
raise ShortcodeRenderingError(msg) from ex
# A block-scoped shortcode is a shortcode with a closing tag.
class BlockShortcode(Shortcode):
# If the shortcode handler raises an exception we intercept it and wrap it
# in a ShortcodeRenderingError. The original exception will still be
# available via the exception's __cause__ attribute.
def render(self, context):
global global_did_break
if self.token.blocked:
return self.token.raw_text
elif global_did_break:
return ""
content = ''.join(child.render(context) for child in self.children)
try:
return str(self.handler(self.token.keyword, self.pargs, self.kwargs, context, content))
except Exception as ex:
msg = f"An exception was raised while rendering the "
msg += f"'{self.token.keyword}' shortcode in line {self.token.line_number}."
raise ShortcodeRenderingError(msg) from ex
def run_preprocess(self, context):
try:
return self.preprocess(self.token.keyword, self.pargs, self.kwargs, context)
except Exception as ex:
msg = f"An exception was raised while pre-processing the "
msg += f"'{self.token.keyword}' shortcode in line {self.token.line_number}."
raise ShortcodeRenderingError(msg) from ex
# -------- #
# Parser #
# -------- #
# A Parser instance parses input text and renders shortcodes. A single Parser
# instance can parse an unlimited number of input strings. Note that the parse()
# method accepts an optional arbitrary context object which it passes on to each
# shortcode's handler function.
#
# If the `inherit_globals` parameter is true, the parser will inherit a copy of
# the set of globally-registered shortcodes at the moment of instantiation.
#
# If `ignore_unknown` is true, unknown shortcodes are ignored. If this parameter
# is false (the default), unknown shortcodes cause an error.
class Parser:
def __init__(self, start='[%', end='%]', esc='\\', inherit_globals=True, ignore_unknown=False):
self.start = start
self.end = end
self.esc_start = esc # + start
self.keywords = global_keywords.copy() if inherit_globals else {}
self.endwords = global_endwords.copy() if inherit_globals else set()
self.ignore_unknown = ignore_unknown
self.blocking_depth = 0
def register(self, func, keyword, endword=None, preprocessor=None):
self.keywords[keyword] = (func, endword, preprocessor)
if endword:
self.endwords.add(endword)
def parse(self, text, context=None):
if not self.start in text:
return text
stack = [Node()]
expecting = []
self.blocking_depth = 0
lexer = Lexer(text, self.start, self.end, self.esc_start)
for token in lexer.tokenize():
if self.blocking_depth > 0:
token.blocked = True
if token.type == "TEXT":
stack[-1].children.append(Text(token.text))
elif token.keyword in self.keywords:
# Hardcoded bypass for multiline comments
if len(expecting) > 0 and "##" in self.keywords and expecting[-1] == self.keywords["##"][1]:
continue
handler, endword, preprocessor = self.keywords[token.keyword]
if endword:
node = BlockShortcode(token, handler, preprocessor)
if self.blocking_depth:
self.blocking_depth += 1
elif preprocessor:
added_depth = int(node.run_preprocess(context))
self.blocking_depth += added_depth
expecting.append(endword)
stack[-1].children.append(node)
if self.blocking_depth < 2:
stack.append(node)
else:
node = AtomicShortcode(token, handler, preprocessor)
if preprocessor:
node.render_preprocess(context)
stack[-1].children.append(node)
elif token.keyword in self.endwords:
if len(expecting) == 0:
msg = f"Unexpected '{token.keyword}' tag in line {token.line_number}."
raise ShortcodeSyntaxError(msg)
elif token.keyword == expecting[-1]:
if self.blocking_depth > 0:
self.blocking_depth -= 1
expecting.pop()
if self.blocking_depth > 0:
stack[-1].children.append(Text(token.raw_text))
else:
stack.pop()
elif token.blocked:
stack[-1].children.append(Text(token.raw_text))
else:
msg = f"Unexpected '{token.keyword}' tag in line {token.line_number}. "
msg += f"The shortcode parser was expecting a closing '{expecting[-1]}' tag."
raise ShortcodeSyntaxError(msg)
elif token.keyword == '':
msg = f"Empty shortcode tag in line {token.line_number}."
raise ShortcodeSyntaxError(msg)
elif self.ignore_unknown:
stack[-1].children.append(Text(token.raw_text))
else:
msg = f"Unrecognised shortcode tag '{token.keyword}' "
msg += f"in line {token.line_number}."
raise ShortcodeSyntaxError(msg)
if expecting:
token = stack[-1].token
msg = f"Unexpected end of document. The shortcode parser was "
msg += f"expecting a closing '{expecting[-1]}' tag to close the "
msg += f"'{token.keyword}' tag opened in line {token.line_number}."
raise ShortcodeSyntaxError(msg)
return stack.pop().render(context).replace(self.esc_start, "")
# ------- #
# Lexer #
# ------- #
class Token:
def __init__(self, token_type, token_text, raw_text, line_number):
words = token_text.split()
self.keyword = words[0] if words else ''
self.type = token_type
self.text = token_text
self.raw_text = raw_text
self.line_number = line_number
self.blocked = False
def __str__(self):
return f"({self.type}, {repr(self.text)}, {self.line_number})"
class Lexer:
def __init__(self, text, start, end, esc_start):
self.text = text
self.start = start
self.end = end
self.esc_start = esc_start
self.tokens = []
self.index = 0
self.line_number = 1
def match(self, target):
if self.text.startswith(target, self.index):
return True
return False
def advance(self):
if self.text[self.index] == '\n':
self.line_number += 1
self.index += 1
def tokenize(self):
while self.index < len(self.text):
if self.match(self.esc_start):
self.read_escaped_tag_delimiter()
elif self.match(self.start):
self.read_tag()
else:
self.read_text()
return self.tokens
def read_escaped_tag_delimiter(self):
next_char = self.text[self.index + 1]
self.index += 2
self.tokens.append(Token("TEXT", self.esc_start + next_char, self.esc_start + next_char, self.line_number))
def read_tag(self):
self.index += len(self.start)
start_index = self.index
start_line_number = self.line_number
while self.index < len(self.text):
if self.match(self.end):
text = self.text[start_index:self.index].strip()
raw_text = self.text[start_index - len(self.start):self.index + len(self.end)]
self.tokens.append(Token("TAG", text, raw_text, start_line_number))
self.index += len(self.end)
return
self.advance()
msg = f"Unclosed shortcode tag. The tag was opened in line {start_line_number}. Partial text processed: {self.text[start_index:self.index]}"
raise ShortcodeSyntaxError(msg)
def read_text(self):
start_index = self.index
start_line_number = self.line_number
while self.index < len(self.text):
if self.match(self.esc_start) or self.match(self.start):
break
self.advance()
text = self.text[start_index:self.index]
self.tokens.append(Token("TEXT", text, text, start_line_number))