Expand source code
Browse git
"""
# Semantics of search templates
"""
import types
import re
from .relations import add_K_Relations, add_F_Relations, add_V_Relations
from .syntax import reTp, kRe, deContext
# SEMANTIC ANALYSIS OF SEARCH TEMPLATE ###
def semantics(searchExe):
if not searchExe.good:
return
error = searchExe.api.TF.error
_msgCache = searchExe._msgCache
searchExe.badSemantics = []
offset = searchExe.offset
_grammar(searchExe)
if not searchExe.good:
searchExe.showOuterTemplate(_msgCache)
for (i, line) in enumerate(searchExe.searchLines):
error(f"{i + offset:>2} {line}", tm=False, cache=_msgCache)
for (ln, eline) in searchExe.badSemantics:
txt = eline if ln is None else f"line {ln + offset}: {eline}"
error(txt, tm=False, cache=_msgCache)
return
if searchExe.good:
_validation(searchExe)
if not searchExe.good:
searchExe.showOuterTemplate(_msgCache)
for (i, line) in enumerate(searchExe.searchLines):
error(f"{i + offset:>2} {line}", tm=False, cache=_msgCache)
for (ln, eline) in searchExe.badSemantics:
txt = eline if ln is None else f"line {ln + offset}: {eline}"
error(txt, tm=False, cache=_msgCache)
def _grammar(searchExe):
prevKind = None
good = True
qnames = {}
qnodes = []
qedges = []
edgeLine = {}
nodeLine = {}
nTokens = len(searchExe.tokens)
def tokenSort(t):
return (nTokens + t["ln"]) if t["kind"] == "rel" else t["ln"]
tokens = sorted(searchExe.tokens, key=tokenSort)
# atomStack is a stack of qnodes with their indent levels
# such that every next member is one level deeper
# and every member is the last qnode encountered at that level
# The stack is implemented as a dict,
# keyed by the indent, and valued by the qnode
atomStack = {}
for token in tokens:
i = token["ln"]
kind = token["kind"]
if kind == "atom":
if "quantifiers" in token:
token["quantifiers"] = [
deContext(q, token["name"]) for q in token["quantifiers"]
]
indent = token["indent"]
op = token["op"]
if "name" in token:
name = token["name"]
otype = token["otype"]
features = token["features"]
src = token.get("src", "")
quantifiers = token.get("quantifiers", [])
qnodes.append((otype, features, src, quantifiers))
q = len(qnodes) - 1
nodeLine[q] = i
name = f":{i}" if name == "" else name
qnames[name] = q
if len(atomStack) == 0:
if indent > 0:
searchExe.badSemantics.append(
(i, f"Unexpected indent: {indent}, expected 0")
)
good = False
if op is not None:
searchExe.badSemantics.append(
(i, "Lonely relation: not allowed at outermost level")
)
good = False
if "name" in token:
atomStack[0] = q
else:
atomNest = sorted(atomStack.items(), key=lambda x: x[0])
top = atomNest[-1]
if indent == top[0]:
# sibling of previous atom
if len(atomNest) > 1:
if "name" in token:
# take the qnode of the subtop of the
# atomStack, if there is one
qedges.append((q, "]]", atomNest[-2][1]))
edgeLine[len(qedges) - 1] = i
if op is not None:
qedges.append((top[1], op, q))
edgeLine[len(qedges) - 1] = i
else:
# lonely operator:
# left is previous atom, right is parent atom
qedges.append((top[1], op, atomNest[-2][1]))
edgeLine[len(qedges) - 1] = i
else:
if op is not None:
qedges.append((top[1], op, q))
edgeLine[len(qedges) - 1] = i
elif indent > top[0]:
if "name" in token:
# child of previous atom
qedges.append((q, "]]", top[1]))
edgeLine[len(qedges) - 1] = i
if op is not None:
qedges.append((top[1], op, q))
edgeLine[len(qedges) - 1] = i
else:
searchExe.badSemantics.append(
(i, "Lonely relation: not allowed as first child")
)
good = False
else:
# outdent action:
# look up the proper parent in the stack
if indent in atomStack:
parents = [at[1] for at in atomNest if at[0] < indent]
if "name" in token:
if op is not None:
qedges.append((atomStack[indent], op, q))
edgeLine[len(qedges) - 1] = i
if len(parents) != 0: # if not already at outermost level
if "name" in token:
qedges.append((q, "]]", parents[-1]))
edgeLine[len(qedges) - 1] = i
else:
# connect previous sibling to parent
qedges.append((atomStack[indent], op, parents[-1]))
edgeLine[len(qedges) - 1] = i
removeKeys = [at[0] for at in atomNest if at[0] > indent]
for rk in removeKeys:
del atomStack[rk]
else:
# parent cannot be found: indentation error
searchExe.badSemantics.append(
(
i,
"Unexpected indent: {}, expected one of {}".format(
indent,
", ".join(
str(at[0]) for at in atomNest if at[0] < indent
),
),
)
)
good = False
atomStack[indent] = q
elif kind == "feat":
features = token["features"]
if prevKind is not None and prevKind not in {"atom", "feat"}:
searchExe.badSemantics.append(
(i, f'Features after {prevKind}: "{features}"')
)
good = False
else:
if len(qnodes):
qnodes[-1][1].update(features)
elif kind == "rel":
fName = token["f"]
tName = token["t"]
op = token["op"]
f = qnames.get(fName, None)
t = qnames.get(tName, None)
namesGood = True
for (q, n) in ((f, fName), (t, tName)):
if q is None:
searchExe.badSemantics.append(
(i, f'Relation with undefined name: "{n}"')
)
namesGood = False
if not namesGood:
good = False
else:
qedges.append((f, op, t))
edgeLine[len(qedges) - 1] = i
prevKind = kind
# resolve names when used in atoms
for (q, qdata) in enumerate(qnodes):
otype = qdata[0]
referQ = qnames.get(otype, None)
if referQ is not None:
referOtype = qnodes[referQ][0]
qnodes[q] = (referOtype, *qdata[1:])
qedges.append((q, "=", referQ))
if good:
searchExe.qnames = qnames
searchExe.qnodes = qnodes
searchExe.qedgesRaw = qedges
searchExe.nodeLine = nodeLine
searchExe.edgeLine = edgeLine
else:
searchExe.good = False
def _validateFeature(
searchExe,
q,
fName,
features,
missingFeatures,
wrongValues,
hasValues={},
asEdge=False,
):
values = features[fName]
fSet = "edges" if asEdge else "nodes"
if fName not in searchExe.api.TF.featureSets[fSet]:
missingFeatures.setdefault(fName, []).append(q)
else:
if asEdge:
doValues = searchExe.api.TF.features[fName].edgeValues
if not doValues and values is not True:
hasValues.setdefault(fName, {}).setdefault(values, []).append(q)
return
requiredType = searchExe.api.TF.features[fName].dataType
if values is True:
return
elif values is None:
return
elif isinstance(values, types.FunctionType):
if requiredType == "str":
wrongValues.setdefault(fName, {}).setdefault(values, []).append(q)
elif isinstance(values, reTp):
if requiredType == "int":
wrongValues.setdefault(fName, {}).setdefault(values, []).append(q)
else:
valuesCast = set()
if requiredType == "int":
(ident, values) = values
if type(values) is not bool:
for val in values:
try:
valCast = int(val)
except Exception:
valCast = val
wrongValues.setdefault(fName, {}).setdefault(
val, []
).append(q)
valuesCast.add(valCast)
features[fName] = (ident, frozenset(valuesCast))
def _validation(searchExe):
levels = searchExe.api.C.levels.data
otypes = {x[0] for x in levels}
qnodes = searchExe.qnodes
nodeLine = searchExe.nodeLine
edgeMap = searchExe.edgeMap
nodeMap = searchExe.nodeMap
edgeLine = searchExe.edgeLine
relationFromName = searchExe.relationFromName
offset = searchExe.offset
# check the object types of atoms
good = True
otypesGood = True
sets = searchExe.sets
for (q, qdata) in enumerate(qnodes):
otype = qdata[0]
if otype == ".":
continue
if sets is not None and otype in sets:
continue
if otype not in otypes:
searchExe.badSemantics.append(
(nodeLine[q], f'Unknown object type: "{otype}"')
)
otypesGood = False
if not otypesGood:
searchExe.badSemantics.append(
(
None,
"Valid object types are: {}".format(", ".join(x[0] for x in levels)),
)
)
if sets is not None:
searchExe.badSemantics.append(
(
None,
"Or choose a custom set from: {}".format(
", ".join(x for x in sorted(sets)),
),
)
)
good = False
# check the feature names of feature specs
# and check the types of their values
missingFeatures = {}
wrongValues = {}
wrongTypes = {}
hasValues = {}
for (q, qdata) in enumerate(qnodes):
features = qdata[1]
for fName in sorted(features):
_validateFeature(
searchExe, q, fName, features, missingFeatures, wrongValues
)
# check the relational operator token in edges
# and replace them by an index
# in the relations list of known relations
qedges = []
edgesGood = True
# relations may have a variable number k in them (k-nearness, etc.)
# make an entry in the relation map for each value of k
addRels = {}
for (e, (f, op, t)) in enumerate(searchExe.qedgesRaw):
if (
type(op) is tuple
or (op[0] == "-" and op[-1] == ">")
or (op[0] == "<" and op[-1] == "-")
or (op[0] == "<" and op[-1] == ">")
or (op[0] == "." and op[-1] == ".")
):
continue
match = kRe.findall(op)
if len(match):
(pre, k, post) = match[0]
opNameK = f"{pre}k{post}"
addRels.setdefault(opNameK, set()).add(int(k))
if not missingFeatures and not wrongValues:
add_K_Relations(searchExe, addRels)
# relations may have one or two node features f,g in them (feature-comparison)
# make an entry in the relation map for each value of (f, g)
fPatOne = r"^\.([^=#<>]+)\.$"
fPatBoth = r"^\.([^=#<>]+)([=#<>])(.*)\.$"
fPatMatch = r"^\.([^~]+)~(.*?)~([^~]+)\.$"
fOneRe = re.compile(fPatOne)
fBothRe = re.compile(fPatBoth)
fMatchRe = re.compile(fPatMatch)
addRels = {}
for (e, (f, op, t)) in enumerate(searchExe.qedgesRaw):
if type(op) is tuple:
continue
match = fMatchRe.findall(op)
if len(match):
(fF, r, gF) = match[0]
opNameFG = ".f~r~g."
addRels.setdefault(opNameFG, set()).add(((f, fF), r, (t, gF)))
for fName in (fF, gF):
fType = searchExe.api.TF.features[fName].dataType
if fType != "str":
wrongTypes.setdefault(fName, {}).setdefault(fType, set()).add(e)
else:
match = fBothRe.findall(op)
if len(match):
(fF, r, gF) = match[0]
opNameFG = f".f{r}g."
addRels.setdefault(opNameFG, set()).add(((f, fF), (t, gF)))
if r in {"<", ">"}:
for fName in (fF, gF):
fType = searchExe.api.TF.features[fName].dataType
if fType != "int":
wrongTypes.setdefault(fName, {}).setdefault(
fType, set()
).add(e)
else:
match = fOneRe.findall(op)
if len(match):
opNameF = ".f."
fF = match[0]
addRels.setdefault(opNameF, set()).add(((f, fF), (t, fF)))
if not missingFeatures and not wrongValues:
add_F_Relations(searchExe, addRels)
# edge relations may have a value spec in them
# make an entry in the relation map for each value spec
addRels = {}
for (e, (f, op, t)) in enumerate(searchExe.qedgesRaw):
if type(op) is not tuple:
continue
(opName, opFeatures) = op
for eName in sorted(opFeatures):
_validateFeature(
searchExe,
e,
eName,
opFeatures,
missingFeatures,
wrongValues,
hasValues,
asEdge=True,
)
addRels.setdefault(opName, set()).add((eName, opFeatures[eName]))
if not missingFeatures and not wrongValues:
add_V_Relations(searchExe, addRels)
# now look up each particalur relation in the relation map
for (e, (f, op, t)) in enumerate(searchExe.qedgesRaw):
theOp = op[0] if type(op) is tuple else op
rela = relationFromName.get(theOp, None)
if rela is None:
searchExe.badSemantics.append((edgeLine[e], f'Unknown relation: "{theOp}"'))
edgesGood = False
qedges.append((f, rela, t))
if not edgesGood:
searchExe.badSemantics.append(
(None, f"Allowed relations:\n{searchExe.relationLegend}")
)
good = False
# report error found above
if len(missingFeatures):
for (fName, qs) in sorted(missingFeatures.items()):
searchExe.badSemantics.append(
(
None,
'Missing feature "{}" in line(s) {}'.format(
fName, ", ".join(str(nodeLine[q] + offset) for q in qs),
),
)
)
good = False
if len(hasValues):
for (fName, wrongs) in sorted(hasValues.items()):
searchExe.badSemantics.append(
(None, f'Feature "{fName}" cannot have values:')
)
for (val, qs) in sorted(wrongs.items()):
searchExe.badSemantics.append(
(
None,
' "{}" superfluous: line(s) {}'.format(
val, ", ".join(str(nodeLine[q] + offset) for q in qs),
),
)
)
good = False
if len(wrongValues):
for (fName, wrongs) in sorted(wrongValues.items()):
searchExe.badSemantics.append(
(None, f'Feature "{fName}" has wrong values:')
)
for (val, qs) in sorted(wrongs.items()):
searchExe.badSemantics.append(
(
None,
' "{}" is not a number: line(s) {}'.format(
val, ", ".join(str(nodeLine[q] + offset) for q in qs),
),
)
)
good = False
if len(wrongTypes):
for (fName, wrongs) in sorted(wrongTypes.items()):
searchExe.badSemantics.append((None, f'Feature "{fName}" has wrong type:'))
for (val, qs) in sorted(wrongs.items()):
searchExe.badSemantics.append(
(
None,
' "{}" is the wrong type: line(s) {}'.format(
val, ", ".join(str(nodeLine[q] + offset) for q in qs),
),
)
)
good = False
searchExe.qedges = qedges
# determine which node and edge features are not yet loaded,
# and load them
eFeatsUsed = set()
for (f, rela, t) in qedges:
efName = edgeMap.get(rela, (None,))[0]
if efName is not None:
eFeatsUsed.add(efName)
nFeatsUsed = set()
for (n, qdata) in enumerate(qnodes):
features = qdata[1]
for nfName in features:
nFeatsUsed.add(nfName)
if n in nodeMap:
nFeatsUsed |= nodeMap[n]
if good:
searchExe.api.ensureLoaded(eFeatsUsed | nFeatsUsed)
else:
searchExe.good = False