Module tff.convert.iiif
Expand source code Browse git
from tf.core.files import (
writeJson,
fileOpen,
fileExists,
initTree,
dirExists,
dirRemove,
dirCopy,
dirContents,
stripExt,
)
from tf.core.generic import AttrDict
from tf.core.helpers import console, readCfg
from .helpers import getPageInfo, getImageLocations, getImageSizes
DS_STORE = ".DS_Store"
def fillinIIIF(data, **kwargs):
tpd = type(data)
if tpd is str:
for k, v in kwargs.items():
pattern = "{" + k + "}"
if type(v) is int and data == pattern:
data = v
break
else:
data = data.replace(pattern, str(v))
return data
if tpd is list:
return [fillinIIIF(item, **kwargs) for item in data]
if tpd is dict:
return {k: fillinIIIF(v, **kwargs) for (k, v) in data.items()}
return data
def parseIIIF(settings, prod, selector, **kwargs):
"""Parse the iiif yml file and deliver a filled in section.
The iiif.yml file contains switches and constants and macros which then are used
to define IIIF things via templates.
The top-level section `scans` contains instructions to define extra annotations
on node types that need to refer to scans.
This is only used for WATM generation.
The top-level section `templates` contains fragments from which manifests can be
constructed. This is only used in this module.
This function fills in the switches, based on the parameter `prod`, then
prepares the constants, then prepares the macros, and then uses it all
to assemble either the `scans` section or the `templates` section; this
choice is based on the parameter `selector`.
Parameters
----------
prod: string
Either `prod` or `dev` or `preview`.
This determines whether we fill in a production value or a develop value
or a preview value for each of the settings mentioned in the `switches`
section of the iiif.yml file.
selector: string
Either `scans` or `templates`.
Which top-level of sections we are going to grab out of the iiif.yml file.
kwargs: dict
Additional optional parameters to pass as key value pairs to
the iiif config file. These values will be filled in for place holders
of the form `[`*arg*`]`.
"""
def applySwitches(prod, constants, switches):
if len(switches):
for k, v in switches[prod].items():
constants[k] = v
return constants
def substituteConstants(data, macros, constants, kwargs):
tpd = type(data)
if tpd is str:
for k, v in macros.items():
pattern = f"<{k}>"
data = data.replace(pattern, str(v))
for k, v in constants.items():
pattern = f"«{k}»"
if type(v) is int and data == pattern:
data = v
break
else:
data = data.replace(pattern, str(v))
if type(data) is str:
for k, v in kwargs.items():
pattern = f"[{k}]"
if type(v) is int and data == pattern:
data = v
break
else:
data = data.replace(pattern, str(v))
return data
if tpd is list:
return [
substituteConstants(item, macros, constants, kwargs) for item in data
]
if tpd is dict:
return {
k: substituteConstants(v, macros, constants, kwargs)
for (k, v) in data.items()
}
return data
constants = applySwitches(
prod, settings.get("constants", {}), settings.get("switches", {})
)
macros = applySwitches(
prod, settings.get("macros", {}), settings.get("switches", {})
)
return AttrDict(
{
x: substituteConstants(xText, macros, constants, kwargs)
for (x, xText) in settings[selector].items()
}
)
class IIIF:
def __init__(
self,
teiVersion,
app,
pageInfoDir,
outputDir=None,
prod="dev",
silent=False,
**kwargs,
):
"""Class for generating IIIF manifests.
Parameters
----------
teiVersion: string
Subdirectory within the static directory.
The manifests are generated in this subdirectory, which corresponds to
the version of the TEI source.
app: object
A loaded TF data source
pageInfoDir: string
Directory where the files with page information are, especially the
page sequence file.
outputDir: string, optional None
If present, manifests nad logo will be generated in this directory.
Otherwise a standard location is chosen: `static` at
the top-level of the repo and within that `prod` or `dev` or `preview`
prod: string, optional dev
Whether the manifests are for production (`prod`) or development (`dev`)
of preview (`preview`)
silent: boolean, optional False
Whether to suppress output messages
kwargs: dict
Additional optional parameters to pass as key value pairs to
the iiif config file. These values will be filled in for place holders
of the form `[`*arg*`]`.
"""
self.teiVersion = teiVersion
self.app = app
self.pageInfoDir = pageInfoDir
self.prod = prod if prod in {"prod", "dev", "preview"} else "dev"
self.silent = silent
self.error = False
self.kwargs = kwargs
teiVersionRep = f"/{teiVersion}" if teiVersion else teiVersion
F = app.api.F
L = app.api.L
locations = getImageLocations(app, prod, silent)
repoLocation = locations.repoLocation
self.scanDir = locations.scanDir
self.thumbDir = locations.thumbDir
scanRefDir = locations.scanRefDir
self.scanRefDir = scanRefDir
self.coversDir = locations.coversDir
doCovers = locations.doCovers
self.doCovers = doCovers
outputDir = (
f"{repoLocation}/static{teiVersionRep}/{prod}"
if outputDir is None
else outputDir
)
self.outputDir = outputDir
self.manifestDir = f"{outputDir}/manifests"
self.pagesDir = f"{scanRefDir}/pages"
self.logoInDir = f"{scanRefDir}/logo"
self.logoDir = f"{outputDir}/logo"
if doCovers:
self.coversHtmlIn = f"{repoLocation}/programs/covers.html"
self.coversHtmlOut = f"{outputDir}/covers.html"
(ok, settings) = readCfg(
repoLocation, "iiif", "IIIF", verbose=-1 if silent else 1, plain=True
)
if not ok:
self.error = True
return
self.settings = settings
manifestLevel = settings.get("manifestLevel", "folder")
console(f"Manifestlevel = {manifestLevel}")
self.manifestLevel = manifestLevel
self.templates = parseIIIF(settings, prod, "templates", **kwargs)
folders = (
[F.folder.v(f) for f in F.otype.s("folder")]
if manifestLevel == "folder"
else [
(F.folder.v(fo), [F.file.v(fi) for fi in L.d(fo, otype="file")])
for fo in F.otype.s("folder")
]
)
self.getSizes()
self.getRotations()
self.getPageSeq()
pages = self.pages
self.folders = folders
self.console("Collections:")
if manifestLevel == "folder":
for folder in folders:
n = len(pages["pages"][folder])
self.console(f"{folder:>5} with {n:>4} pages")
else:
for folder, files in folders:
n = len(pages["pages"][folder])
m = sum(len(x) for x in pages["pages"][folder].values())
self.console(f"{folder:>10} with {n:>4} files and {m:>4} pages")
def console(self, msg, **kwargs):
"""Print something to the output.
This works exactly as `tf.core.helpers.console`
When the silent member of the object is True, the message will be suppressed.
"""
silent = self.silent
if not silent:
console(msg, **kwargs)
def getRotations(self):
if self.error:
return
scanRefDir = self.scanRefDir
rotateFile = f"{scanRefDir}/rotation_pages.tsv"
rotateInfo = {}
self.rotateInfo = rotateInfo
if not fileExists(rotateFile):
console(f"Rotation file not found: {rotateFile}")
return
with fileOpen(rotateFile) as rh:
next(rh)
for line in rh:
fields = line.rstrip("\n").split("\t")
p = fields[0]
rot = int(fields[1])
rotateInfo[p] = rot
def getSizes(self):
if self.error:
return
scanRefDir = self.scanRefDir
doCovers = self.doCovers
silent = self.silent
self.sizeInfo = getImageSizes(scanRefDir, doCovers, silent)
def getPageSeq(self):
if self.error:
return
manifestLevel = self.manifestLevel
doCovers = self.doCovers
zoneBased = self.settings.get("zoneBased", False)
if doCovers:
coversDir = self.coversDir
covers = sorted(
stripExt(f) for f in dirContents(coversDir)[0] if f is not DS_STORE
)
self.covers = covers
pageInfoDir = self.pageInfoDir
pages = getPageInfo(pageInfoDir, zoneBased, manifestLevel)
if doCovers:
pages["covers"] = covers
self.pages = pages
def genPages(self, kind, folder=None, file=None):
if self.error:
return
manifestLevel = self.manifestLevel
zoneBased = self.settings.get("zoneBased", False)
templates = self.templates
sizeInfo = self.sizeInfo[kind]
rotateInfo = None if kind == "covers" else self.rotateInfo
things = self.pages[kind]
theseThings = things if folder is None else things.get(folder, None)
if manifestLevel == "folder":
thesePages = theseThings or []
else:
thesePages = theseThings if file is None else theseThings.get(file, [])
if kind == "covers":
folder = "covers"
pageItem = templates.coverItem if kind == "covers" else templates.pageItem
itemsSeen = set()
items = []
nPages = 0
for p in thesePages:
nPages += 1
if zoneBased:
(p, region) = p
else:
region = "full"
item = {}
w, h = sizeInfo.get(p, (0, 0))
rot = 0 if rotateInfo is None else rotateInfo.get(p, 0)
key = (p, w, h, rot)
if key in itemsSeen:
continue
itemsSeen.add(key)
for k, v in pageItem.items():
v = fillinIIIF(
v,
folder=folder,
file=file,
page=p,
region=region,
width=w,
height=h,
rot=rot,
)
item[k] = v
items.append(item)
pageSequence = (
templates.coverSequence if kind == "covers" else templates.pageSequence
)
manifestDir = self.manifestDir
data = {}
for k, v in pageSequence.items():
v = fillinIIIF(v, folder=folder, file=file)
data[k] = v
data["items"] = items
nItems = len(items)
if nItems:
writeJson(
data,
asFile=(
f"{manifestDir}/{folder}.json"
if manifestLevel == "folder"
else f"{manifestDir}/{folder}/{file}.json"
),
)
return (nPages, nItems)
def manifests(self):
if self.error:
return
folders = self.folders
manifestDir = self.manifestDir
logoInDir = self.logoInDir
logoDir = self.logoDir
doCovers = self.doCovers
manifestLevel = self.manifestLevel
prod = self.prod
settings = self.settings
server = settings["switches"][prod]["server"]
initTree(manifestDir, fresh=True)
if doCovers:
coversHtmlIn = self.coversHtmlIn
coversHtmlOut = self.coversHtmlOut
with fileOpen(coversHtmlIn) as fh:
coversHtml = fh.read()
coversHtml = coversHtml.replace("«server»", server)
with fileOpen(coversHtmlOut, "w") as fh:
fh.write(coversHtml)
self.genPages("covers")
p = 0
i = 0
m = 0
if manifestLevel == "folder":
for folder in folders:
(thisP, thisI) = self.genPages("pages", folder=folder)
p += thisP
i += thisI
if thisI:
m += 1
else:
for folder, files in folders:
folderDir = f"{manifestDir}/{folder}"
initTree(folderDir, fresh=True, gentle=False)
folderI = 0
for file in files:
(thisP, thisI) = self.genPages("pages", folder=folder, file=file)
p += thisP
i += thisI
if thisI:
m += 1
folderI += thisI
if folderI == 0:
dirRemove(folderDir)
if dirExists(logoInDir):
dirCopy(logoInDir, logoDir)
else:
console(f"Directory with logos not found: {logoInDir}", error=True)
self.console(
f"{m} IIIF manifests with {i} items for {p} pages generated in {manifestDir}"
)
Functions
def fillinIIIF(data, **kwargs)
def parseIIIF(settings, prod, selector, **kwargs)
-
Parse the iiif yml file and deliver a filled in section.
The iiif.yml file contains switches and constants and macros which then are used to define IIIF things via templates.
The top-level section
scans
contains instructions to define extra annotations on node types that need to refer to scans. This is only used for WATM generation.The top-level section
templates
contains fragments from which manifests can be constructed. This is only used in this module.This function fills in the switches, based on the parameter
prod
, then prepares the constants, then prepares the macros, and then uses it all to assemble either thescans
section or thetemplates
section; this choice is based on the parameterselector
.Parameters
prod
:string
- Either
prod
ordev
orpreview
. This determines whether we fill in a production value or a develop value or a preview value for each of the settings mentioned in theswitches
section of the iiif.yml file. selector
:string
- Either
scans
ortemplates
. Which top-level of sections we are going to grab out of the iiif.yml file. kwargs
:dict
- Additional optional parameters to pass as key value pairs to
the iiif config file. These values will be filled in for place holders
of the form
[
arg]
.
Classes
class IIIF (teiVersion, app, pageInfoDir, outputDir=None, prod='dev', silent=False, **kwargs)
-
Class for generating IIIF manifests.
Parameters
teiVersion
:string
- Subdirectory within the static directory. The manifests are generated in this subdirectory, which corresponds to the version of the TEI source.
app
:object
- A loaded TF data source
pageInfoDir
:string
- Directory where the files with page information are, especially the page sequence file.
outputDir
:string
, optionalNone
- If present, manifests nad logo will be generated in this directory.
Otherwise a standard location is chosen:
static
at the top-level of the repo and within thatprod
ordev
orpreview
prod
:string
, optionaldev
- Whether the manifests are for production (
prod
) or development (dev
) of preview (preview
) silent
:boolean
, optionalFalse
- Whether to suppress output messages
kwargs
:dict
- Additional optional parameters to pass as key value pairs to
the iiif config file. These values will be filled in for place holders
of the form
[
arg]
.
Expand source code Browse git
class IIIF: def __init__( self, teiVersion, app, pageInfoDir, outputDir=None, prod="dev", silent=False, **kwargs, ): """Class for generating IIIF manifests. Parameters ---------- teiVersion: string Subdirectory within the static directory. The manifests are generated in this subdirectory, which corresponds to the version of the TEI source. app: object A loaded TF data source pageInfoDir: string Directory where the files with page information are, especially the page sequence file. outputDir: string, optional None If present, manifests nad logo will be generated in this directory. Otherwise a standard location is chosen: `static` at the top-level of the repo and within that `prod` or `dev` or `preview` prod: string, optional dev Whether the manifests are for production (`prod`) or development (`dev`) of preview (`preview`) silent: boolean, optional False Whether to suppress output messages kwargs: dict Additional optional parameters to pass as key value pairs to the iiif config file. These values will be filled in for place holders of the form `[`*arg*`]`. """ self.teiVersion = teiVersion self.app = app self.pageInfoDir = pageInfoDir self.prod = prod if prod in {"prod", "dev", "preview"} else "dev" self.silent = silent self.error = False self.kwargs = kwargs teiVersionRep = f"/{teiVersion}" if teiVersion else teiVersion F = app.api.F L = app.api.L locations = getImageLocations(app, prod, silent) repoLocation = locations.repoLocation self.scanDir = locations.scanDir self.thumbDir = locations.thumbDir scanRefDir = locations.scanRefDir self.scanRefDir = scanRefDir self.coversDir = locations.coversDir doCovers = locations.doCovers self.doCovers = doCovers outputDir = ( f"{repoLocation}/static{teiVersionRep}/{prod}" if outputDir is None else outputDir ) self.outputDir = outputDir self.manifestDir = f"{outputDir}/manifests" self.pagesDir = f"{scanRefDir}/pages" self.logoInDir = f"{scanRefDir}/logo" self.logoDir = f"{outputDir}/logo" if doCovers: self.coversHtmlIn = f"{repoLocation}/programs/covers.html" self.coversHtmlOut = f"{outputDir}/covers.html" (ok, settings) = readCfg( repoLocation, "iiif", "IIIF", verbose=-1 if silent else 1, plain=True ) if not ok: self.error = True return self.settings = settings manifestLevel = settings.get("manifestLevel", "folder") console(f"Manifestlevel = {manifestLevel}") self.manifestLevel = manifestLevel self.templates = parseIIIF(settings, prod, "templates", **kwargs) folders = ( [F.folder.v(f) for f in F.otype.s("folder")] if manifestLevel == "folder" else [ (F.folder.v(fo), [F.file.v(fi) for fi in L.d(fo, otype="file")]) for fo in F.otype.s("folder") ] ) self.getSizes() self.getRotations() self.getPageSeq() pages = self.pages self.folders = folders self.console("Collections:") if manifestLevel == "folder": for folder in folders: n = len(pages["pages"][folder]) self.console(f"{folder:>5} with {n:>4} pages") else: for folder, files in folders: n = len(pages["pages"][folder]) m = sum(len(x) for x in pages["pages"][folder].values()) self.console(f"{folder:>10} with {n:>4} files and {m:>4} pages") def console(self, msg, **kwargs): """Print something to the output. This works exactly as `tf.core.helpers.console` When the silent member of the object is True, the message will be suppressed. """ silent = self.silent if not silent: console(msg, **kwargs) def getRotations(self): if self.error: return scanRefDir = self.scanRefDir rotateFile = f"{scanRefDir}/rotation_pages.tsv" rotateInfo = {} self.rotateInfo = rotateInfo if not fileExists(rotateFile): console(f"Rotation file not found: {rotateFile}") return with fileOpen(rotateFile) as rh: next(rh) for line in rh: fields = line.rstrip("\n").split("\t") p = fields[0] rot = int(fields[1]) rotateInfo[p] = rot def getSizes(self): if self.error: return scanRefDir = self.scanRefDir doCovers = self.doCovers silent = self.silent self.sizeInfo = getImageSizes(scanRefDir, doCovers, silent) def getPageSeq(self): if self.error: return manifestLevel = self.manifestLevel doCovers = self.doCovers zoneBased = self.settings.get("zoneBased", False) if doCovers: coversDir = self.coversDir covers = sorted( stripExt(f) for f in dirContents(coversDir)[0] if f is not DS_STORE ) self.covers = covers pageInfoDir = self.pageInfoDir pages = getPageInfo(pageInfoDir, zoneBased, manifestLevel) if doCovers: pages["covers"] = covers self.pages = pages def genPages(self, kind, folder=None, file=None): if self.error: return manifestLevel = self.manifestLevel zoneBased = self.settings.get("zoneBased", False) templates = self.templates sizeInfo = self.sizeInfo[kind] rotateInfo = None if kind == "covers" else self.rotateInfo things = self.pages[kind] theseThings = things if folder is None else things.get(folder, None) if manifestLevel == "folder": thesePages = theseThings or [] else: thesePages = theseThings if file is None else theseThings.get(file, []) if kind == "covers": folder = "covers" pageItem = templates.coverItem if kind == "covers" else templates.pageItem itemsSeen = set() items = [] nPages = 0 for p in thesePages: nPages += 1 if zoneBased: (p, region) = p else: region = "full" item = {} w, h = sizeInfo.get(p, (0, 0)) rot = 0 if rotateInfo is None else rotateInfo.get(p, 0) key = (p, w, h, rot) if key in itemsSeen: continue itemsSeen.add(key) for k, v in pageItem.items(): v = fillinIIIF( v, folder=folder, file=file, page=p, region=region, width=w, height=h, rot=rot, ) item[k] = v items.append(item) pageSequence = ( templates.coverSequence if kind == "covers" else templates.pageSequence ) manifestDir = self.manifestDir data = {} for k, v in pageSequence.items(): v = fillinIIIF(v, folder=folder, file=file) data[k] = v data["items"] = items nItems = len(items) if nItems: writeJson( data, asFile=( f"{manifestDir}/{folder}.json" if manifestLevel == "folder" else f"{manifestDir}/{folder}/{file}.json" ), ) return (nPages, nItems) def manifests(self): if self.error: return folders = self.folders manifestDir = self.manifestDir logoInDir = self.logoInDir logoDir = self.logoDir doCovers = self.doCovers manifestLevel = self.manifestLevel prod = self.prod settings = self.settings server = settings["switches"][prod]["server"] initTree(manifestDir, fresh=True) if doCovers: coversHtmlIn = self.coversHtmlIn coversHtmlOut = self.coversHtmlOut with fileOpen(coversHtmlIn) as fh: coversHtml = fh.read() coversHtml = coversHtml.replace("«server»", server) with fileOpen(coversHtmlOut, "w") as fh: fh.write(coversHtml) self.genPages("covers") p = 0 i = 0 m = 0 if manifestLevel == "folder": for folder in folders: (thisP, thisI) = self.genPages("pages", folder=folder) p += thisP i += thisI if thisI: m += 1 else: for folder, files in folders: folderDir = f"{manifestDir}/{folder}" initTree(folderDir, fresh=True, gentle=False) folderI = 0 for file in files: (thisP, thisI) = self.genPages("pages", folder=folder, file=file) p += thisP i += thisI if thisI: m += 1 folderI += thisI if folderI == 0: dirRemove(folderDir) if dirExists(logoInDir): dirCopy(logoInDir, logoDir) else: console(f"Directory with logos not found: {logoInDir}", error=True) self.console( f"{m} IIIF manifests with {i} items for {p} pages generated in {manifestDir}" )
Methods
def console(self, msg, **kwargs)
-
Print something to the output.
This works exactly as
tf.core.helpers.console
When the silent member of the object is True, the message will be suppressed.
def genPages(self, kind, folder=None, file=None)
def getPageSeq(self)
def getRotations(self)
def getSizes(self)
def manifests(self)