Module tf.search.spin

Search preprocessing

Expand source code Browse git
"""
# Search preprocessing
"""

import types
from random import randrange
from inspect import signature

from .syntax import (
    reTp,
    cleanParent,
    QWHERE,
    QWITHOUT,
    QWITH,
    QHAVE,
    QOR,
    QEND,
)
from ..core.helpers import project

# SPINNING ###


def _spinAtom(searchExe, q):
    F = searchExe.api.F
    Fs = searchExe.api.Fs
    qnodes = searchExe.qnodes
    sets = searchExe.sets

    (otype, features, src, quantifiers) = qnodes[q]
    featureList = sorted(features.items())
    yarn = set()
    nodeSet = sets[otype] if sets is not None and otype in sets else F.otype.s(otype)
    for n in nodeSet:
        good = True
        for (ft, val) in featureList:
            fval = Fs(ft).v(n)
            if val is None:
                if fval is not None:
                    good = False
                    break
            elif val is True:
                if fval is None:
                    good = False
                    break
            elif isinstance(val, types.FunctionType):
                if not val(fval):
                    good = False
                    break
            elif isinstance(val, reTp):
                if fval is None or not val.search(fval):
                    good = False
                    break
            else:
                (ident, val) = val
                if ident is None and val is True:
                    pass
                elif ident:
                    if fval not in val:
                        good = False
                        break
                else:
                    if fval in val:
                        good = False
                        break
        if good:
            yarn.add(n)
    if quantifiers:
        for quantifier in quantifiers:
            yarn = _doQuantifier(searchExe, yarn, src, quantifier)
    searchExe.yarns[q] = yarn


def _doQuantifier(searchExe, yarn, atom, quantifier):
    from .searchexe import SearchExe

    (quKind, quTemplates, parentName, ln) = quantifier
    TF = searchExe.api.TF
    info = TF.info
    _msgCache = searchExe._msgCache
    indent = TF.indent
    showQuantifiers = searchExe.showQuantifiers
    silent = searchExe.silent
    level = searchExe.level
    universe = yarn
    cleanAtom = cleanParent(atom, parentName)
    offset = searchExe.offset + ln

    if showQuantifiers:
        indent(level=level + 1, reset=True)
        info(f'"Quantifier on "{cleanAtom}"', cache=_msgCache)

    if quKind == QWITHOUT:
        queryN = "\n".join((cleanAtom, quTemplates[0]))
        exe = SearchExe(
            searchExe.api,
            queryN,
            outerTemplate=searchExe.outerTemplate,
            quKind=quKind,
            level=level + 1,
            offset=offset,
            sets=searchExe.sets,
            shallow=True,
            showQuantifiers=showQuantifiers,
            silent=silent,
            _msgCache=_msgCache,
            setInfo=searchExe.setInfo,
        )
        if showQuantifiers:
            indent(level=level + 2, reset=True)
            info(f"{quKind}\n{queryN}\n{QEND}", tm=False, cache=_msgCache)
        noResults = exe.search()
        resultYarn = universe - noResults
        if showQuantifiers:
            indent(level=level + 2)
            info(f"{len(noResults)} nodes to exclude", cache=_msgCache)
    elif quKind == QWHERE:
        # compute the atom+antecedent:
        #   as result tuples
        queryA = "\n".join((cleanAtom, quTemplates[0]))
        exe = SearchExe(
            searchExe.api,
            queryA,
            outerTemplate=searchExe.outerTemplate,
            quKind=quKind,
            offset=offset,
            level=level + 1,
            sets=searchExe.sets,
            shallow=False,
            showQuantifiers=showQuantifiers,
            silent=silent,
            _msgCache=_msgCache,
            setInfo=searchExe.setInfo,
        )
        if showQuantifiers:
            indent(level=level + 2, reset=True)
            info(f"{quKind}\n{queryA}", tm=False, cache=_msgCache)
        aResultTuples = exe.search(limit=0)
        if showQuantifiers:
            indent(level=level + 2)
            info(f"{len(aResultTuples)} matching nodes", cache=_msgCache)
        if not aResultTuples:
            resultYarn = yarn
        else:
            sizeA = len(aResultTuples[0])

            # compute the atom+antecedent+consequent:
            #   as shallow result tuples (same length as atom+antecedent)
            queryAH = "\n".join((cleanAtom, *quTemplates))
            offset += len(quTemplates[0].split("\n"))
            exe = SearchExe(
                searchExe.api,
                queryAH,
                outerTemplate=searchExe.outerTemplate,
                quKind=QHAVE,
                offset=offset,
                level=level + 1,
                sets=searchExe.sets,
                shallow=sizeA,
                showQuantifiers=showQuantifiers,
                silent=silent,
                _msgCache=_msgCache,
                setInfo=searchExe.setInfo,
            )
            if showQuantifiers:
                indent(level=level + 2, reset=True)
                info(f"{QHAVE}\n{queryAH}\n{QEND}", tm=False, cache=_msgCache)
            ahResults = exe.search()
            if showQuantifiers:
                indent(level=level + 2)
                info(f"{len(ahResults)} matching nodes", cache=_msgCache)

            # determine the shallow tuples that correspond to
            #   atom+antecedent but not consequent
            #   and then take the projection to their first components
            resultsAnotH = project(set(aResultTuples) - ahResults, 1)
            if showQuantifiers:
                indent(level=level + 2)
                info(
                    f"{len(resultsAnotH)} match antecedent but not consequent",
                    cache=_msgCache,
                )

            # now have the atoms that do NOT qualify:
            #   we subtract them from the universe
            resultYarn = universe - resultsAnotH
    elif quKind == QWITH:
        # compute the atom+alternative for all alternatives and union them
        resultYarn = set()
        nAlts = len(quTemplates)
        for (i, alt) in enumerate(quTemplates):
            queryAlt = "\n".join((cleanAtom, alt))
            exe = SearchExe(
                searchExe.api,
                queryAlt,
                outerTemplate=searchExe.outerTemplate,
                quKind=quKind if i == 0 else QOR,
                offset=offset,
                level=level + 1,
                sets=searchExe.sets,
                shallow=True,
                showQuantifiers=showQuantifiers,
                silent=silent,
                _msgCache=_msgCache,
                setInfo=searchExe.setInfo,
            )
            offset += len(alt.split("\n")) + 1
            if showQuantifiers:
                indent(level=level + 2, reset=True)
                info(
                    (f"{quKind if i == 0 else QOR}\n{queryAlt}"),
                    tm=False,
                    cache=_msgCache,
                )
            altResults = exe.search()
            altResults &= universe
            nAlt = len(altResults)
            nYarn = len(resultYarn)
            resultYarn |= altResults
            nNew = len(resultYarn)
            if showQuantifiers:
                indent(level=level + 2)
                info(f"adding {nAlt} to {nYarn} yields {nNew} nodes", cache=_msgCache)
                if i == nAlts - 1:
                    info(QEND, tm=False, cache=_msgCache)
    if showQuantifiers:
        indent(level=level + 1)
        info(f"reduction from {len(yarn)} to {len(resultYarn)} nodes", cache=_msgCache)
        indent(level=0)
    return resultYarn


def spinAtoms(searchExe):
    qnodes = searchExe.qnodes
    for q in range(len(qnodes)):
        _spinAtom(searchExe, q)


def estimateSpreads(searchExe, both=False):
    TRY_LIMIT_F = searchExe.perfParams["tryLimitFrom"]
    TRY_LIMIT_T = searchExe.perfParams["tryLimitTo"]
    qnodes = searchExe.qnodes
    relations = searchExe.relations
    converse = searchExe.converse
    qedges = searchExe.qedges
    yarns = searchExe.yarns

    spreadsC = {}
    spreads = {}

    for (e, (f, rela, t)) in enumerate(qedges):
        tasks = [(f, rela, t, 1)]
        if both:
            tasks.append((t, converse[rela], f, -1))
        for (tf, trela, tt, dir) in tasks:
            s = relations[trela]["spin"]
            yarnF = yarns[tf]
            yarnT = yarns[tt]
            dest = spreads if dir == 1 else spreadsC
            if type(s) is float:
                # fixed estimates
                dest[e] = len(yarnT) * s
                continue
            yarnF = list(yarnF)
            yarnT = yarns[tt]
            yarnFl = len(yarnF)
            if yarnFl < TRY_LIMIT_F:
                triesn = yarnF
            else:
                triesn = {yarnF[randrange(yarnFl)] for n in range(TRY_LIMIT_F)}

            if len(triesn) == 0:
                dest[e] = 0
            else:
                r = relations[trela]["func"](qnodes[tf][0], qnodes[tt][0])
                nparams = len(signature(r).parameters)
                totalSpread = 0
                if nparams == 1:
                    for n in triesn:
                        mFromN = {m for m in r(n) or () if m in yarnT}
                        totalSpread += len(mFromN)
                else:
                    yarnTl = len(yarnT)
                    yarnTL = list(yarnT)
                    for n in triesn:
                        triesm = (
                            yarnT
                            if yarnTl < TRY_LIMIT_T
                            else set(
                                yarnTL[randrange(yarnTl)] for m in range(TRY_LIMIT_T)
                            )
                        )
                        if len(triesm) == 0:
                            thisSpread = 0
                        else:
                            thisSpread = 0
                            for m in triesm:
                                if r(n, m):
                                    thisSpread += 1
                            thisSpread = thisSpread / len(triesm)
                        totalSpread += yarnTl * thisSpread
                dest[e] = totalSpread / len(triesn)
    searchExe.spreads = spreads
    searchExe.spreadsC = spreadsC


def _chooseEdge(searchExe):
    qedges = searchExe.qedges
    yarns = searchExe.yarns
    spreads = searchExe.spreads
    yarnSize = {}
    for (e, (f, rela, t)) in enumerate(qedges):
        if searchExe.uptodate[e]:
            continue
        yarnFl = len(yarns[f])
        yarnTl = len(yarns[t])
        yarnSize[e] = yarnFl * yarnTl * spreads[e]
    firstEdge = sorted(yarnSize.items(), key=lambda x: x[1])[0][0]
    return firstEdge


def _spinEdge(searchExe, e):
    YARN_RATIO = searchExe.perfParams["yarnRatio"]
    qnodes = searchExe.qnodes
    relations = searchExe.relations
    yarns = searchExe.yarns
    spreads = searchExe.spreads
    qedges = searchExe.qedges
    uptodate = searchExe.uptodate

    (f, rela, t) = qedges[e]
    yarnF = yarns[f]
    yarnT = yarns[t]
    uptodate[e] = True

    # if the yarns around an edge are big,
    # and the spread of the relation is
    # also big, spinning costs an enormous amount of time,
    # and will not help in reducing the search space.
    # condition for skipping: spread times length from-yarn >= SPIN_LIMIT
    yarnFl = len(yarnF)
    yarnTl = len(yarnT)
    thisYarnRatio = (
        max((yarnFl / yarnTl, yarnTl / yarnFl)) / spreads[e]
        if yarnFl and yarnTl and spreads[e]
        else -YARN_RATIO
    )
    if thisYarnRatio < YARN_RATIO:
        return False
    # if spreads[e] * len(yarnF) >= SPIN_LIMIT:
    #   return False

    # for some basic relations we know that spinning is useless
    s = relations[rela]["spin"]
    if type(s) is float:
        return False

    # for other basic relations we have an optimized spin function
    # if type(s) is types.FunctionType:
    if isinstance(s, types.FunctionType):
        (newYarnF, newYarnT) = s(qnodes[f][0], qnodes[t][0])(yarnF, yarnT)
    else:
        r = relations[rela]["func"](qnodes[f][0], qnodes[t][0])
        nparams = len(signature(r).parameters)
        newYarnF = set()
        newYarnT = set()

        if nparams == 1:
            for n in yarnF:
                found = False
                for m in r(n):
                    if m not in yarnT:
                        continue
                    newYarnT.add(m)
                    found = True
                if found:
                    newYarnF.add(n)
        else:
            for n in yarnF:
                found = False
                for m in yarnT:
                    if r(n, m):
                        newYarnT.add(m)
                        found = True
                if found:
                    newYarnF.add(n)

    affectedF = len(newYarnF) != len(yarns[f])
    affectedT = len(newYarnT) != len(yarns[t])

    uptodate[e] = True
    for (oe, (of, orela, ot)) in enumerate(qedges):
        if oe == e:
            continue
        if (affectedF and f in {of, ot}) or (affectedT and t in {of, ot}):
            uptodate[oe] = False
    searchExe.yarns[f] = newYarnF
    searchExe.yarns[t] = newYarnT

    return affectedF or affectedT


def spinEdges(searchExe):
    qnodes = searchExe.qnodes
    qedges = searchExe.qedges
    yarns = searchExe.yarns
    uptodate = searchExe.uptodate
    thinned = {}

    estimateSpreads(searchExe, both=True)

    for e in range(len(qedges)):
        uptodate[e] = False
    it = 0
    while 1:
        if min(len(yarns[q]) for q in range(len(qnodes))) == 0:
            break
        if all(uptodate[e] for e in range(len(qedges))):
            break
        e = _chooseEdge(searchExe)
        (f, rela, t) = qedges[e]
        affected = _spinEdge(searchExe, e)
        if affected:
            thinned[e] = 1
        it += 1
    searchExe.thinned = thinned

Functions

def estimateSpreads(searchExe, both=False)
Expand source code Browse git
def estimateSpreads(searchExe, both=False):
    TRY_LIMIT_F = searchExe.perfParams["tryLimitFrom"]
    TRY_LIMIT_T = searchExe.perfParams["tryLimitTo"]
    qnodes = searchExe.qnodes
    relations = searchExe.relations
    converse = searchExe.converse
    qedges = searchExe.qedges
    yarns = searchExe.yarns

    spreadsC = {}
    spreads = {}

    for (e, (f, rela, t)) in enumerate(qedges):
        tasks = [(f, rela, t, 1)]
        if both:
            tasks.append((t, converse[rela], f, -1))
        for (tf, trela, tt, dir) in tasks:
            s = relations[trela]["spin"]
            yarnF = yarns[tf]
            yarnT = yarns[tt]
            dest = spreads if dir == 1 else spreadsC
            if type(s) is float:
                # fixed estimates
                dest[e] = len(yarnT) * s
                continue
            yarnF = list(yarnF)
            yarnT = yarns[tt]
            yarnFl = len(yarnF)
            if yarnFl < TRY_LIMIT_F:
                triesn = yarnF
            else:
                triesn = {yarnF[randrange(yarnFl)] for n in range(TRY_LIMIT_F)}

            if len(triesn) == 0:
                dest[e] = 0
            else:
                r = relations[trela]["func"](qnodes[tf][0], qnodes[tt][0])
                nparams = len(signature(r).parameters)
                totalSpread = 0
                if nparams == 1:
                    for n in triesn:
                        mFromN = {m for m in r(n) or () if m in yarnT}
                        totalSpread += len(mFromN)
                else:
                    yarnTl = len(yarnT)
                    yarnTL = list(yarnT)
                    for n in triesn:
                        triesm = (
                            yarnT
                            if yarnTl < TRY_LIMIT_T
                            else set(
                                yarnTL[randrange(yarnTl)] for m in range(TRY_LIMIT_T)
                            )
                        )
                        if len(triesm) == 0:
                            thisSpread = 0
                        else:
                            thisSpread = 0
                            for m in triesm:
                                if r(n, m):
                                    thisSpread += 1
                            thisSpread = thisSpread / len(triesm)
                        totalSpread += yarnTl * thisSpread
                dest[e] = totalSpread / len(triesn)
    searchExe.spreads = spreads
    searchExe.spreadsC = spreadsC
def spinAtoms(searchExe)
Expand source code Browse git
def spinAtoms(searchExe):
    qnodes = searchExe.qnodes
    for q in range(len(qnodes)):
        _spinAtom(searchExe, q)
def spinEdges(searchExe)
Expand source code Browse git
def spinEdges(searchExe):
    qnodes = searchExe.qnodes
    qedges = searchExe.qedges
    yarns = searchExe.yarns
    uptodate = searchExe.uptodate
    thinned = {}

    estimateSpreads(searchExe, both=True)

    for e in range(len(qedges)):
        uptodate[e] = False
    it = 0
    while 1:
        if min(len(yarns[q]) for q in range(len(qnodes))) == 0:
            break
        if all(uptodate[e] for e in range(len(qedges))):
            break
        e = _chooseEdge(searchExe)
        (f, rela, t) = qedges[e]
        affected = _spinEdge(searchExe, e)
        if affected:
            thinned[e] = 1
        it += 1
    searchExe.thinned = thinned