Module ti.info.iiif
Expand source code Browse git
import collections
from ..kit.files import (
writeJson,
fileOpen,
fileExists,
fileCopy,
initTree,
dirContents,
dirRemove,
dirMake,
dirNm,
readYaml,
stripExt,
abspath,
)
from ..kit.generic import AttrDict
from ..kit.helpers import console, readCfg
from .helpers import PAGES
DS_STORE = ".DS_Store"
FILE_NOT_FOUND = "filenotfound"
FILE_NOT_FOUND_SIZES = (480, 640)
def fillinIIIF(data, **kwargs):
tpd = type(data)
if tpd is str:
for k, v in kwargs.items():
pattern = "{" + k + "}"
if type(v) is int and data == pattern:
data = v
break
else:
data = data.replace(pattern, str(v))
return data
if tpd is list:
return [fillinIIIF(item, **kwargs) for item in data]
if tpd is dict:
return {k: fillinIIIF(v, **kwargs) for (k, v) in data.items()}
return data
def parseIIIF(settings, selector, **kwargs):
"""Parse the iiif yml file and deliver a filled in section.
The iiif.yml file contains constants which are used to define IIIF things
via templates.
The top-level section `templates` contains fragments from which manifests can be
constructed.
This function prepares the constants and then uses them to assemble the section
in the yaml file based on the parameter `selector`.
The constants are given as a list of dictionaries, where each value in such a
dictionary may use constants defined in previous dictionaries.
Parameters
----------
selector: string
Which top-level section we are going to grab out of the iiif.yml file.
This can be any section in the yaml file, except `constants`.
kwargs: dict
Additional optional parameters to pass as key value pairs to
the iiif config file. These values will be filled in for place holders
of the form `[`*arg*`]`.
Returns
-------
void or tuple
If the constants do not resolve, or non-existing constants or arguments
are being refererred to, None is returned.
Otherwise, a tuple is returned with two members:
* an dict consisting of the resolved constants
* an AttrDict with resolved key value pairs from that section.
"""
errors = []
def resolveConstants():
source = settings.get("constants", [])
constants = {}
for i, batch in enumerate(source):
for k, v in batch.items():
if type(v) is str:
for c, w in constants.items():
v = v.replace(f"«{c}»", str(w))
if "«" in v or "»" in v:
errors.append(
f"constant batch {i + 1}: value for {k} not resolved: {v}"
)
constants[k] = v
return constants
def substituteConstants(data, constants, kwargs):
tpd = type(data)
if tpd is str:
for k, v in constants.items():
pattern = f"«{k}»"
if type(v) is int and data == pattern:
data = v
break
else:
data = data.replace(pattern, str(v))
if type(data) is str:
for k, v in kwargs.items():
pattern = f"[[{k}]]"
if type(v) is int and data == pattern:
data = v
break
else:
data = data.replace(pattern, str(v))
if "«" in data or "»" in data or "[[" in data or "]]" in data:
errors.append(f"value not completely resolved: {data}")
return data
if tpd is list:
return [substituteConstants(item, constants, kwargs) for item in data]
if tpd is dict:
return {
k: substituteConstants(v, constants, kwargs) for (k, v) in data.items()
}
return data
constants = resolveConstants()
if len(errors):
for e in errors:
console(e)
return None
return (
constants,
AttrDict(
{
x: substituteConstants(xText, constants, kwargs)
for (x, xText) in settings.get(selector, {}).items()
}
),
)
class IIIF:
def __init__(
self,
infoDir,
scanInfoDir,
configPath,
verbose=0,
):
"""Class for generating IIIF manifests.
Parameters
----------
infoDir: string
Directory where the files with page information are, typically containing
the result of an inventory by `ti.info.tei`
scanInfoDir: string
Directory where the files with scan information are, typically containing
the sizes, colorspaces en rotations of the scans.
configPath: string
The configuration file that directs the shape of the manifests.
verbose: integer, optional -1
Produce no (-1), some (0) or many (1) progress and reporting messages
"""
self.infoDir = infoDir
self.scanInfoDir = scanInfoDir
self.configPath = configPath
self.verbose = verbose
self.error = False
(ok, settings) = readCfg(configPath, "iiif", verbose=verbose, plain=True)
if verbose != -1:
console(f"Source information taken from {infoDir}")
if not ok:
self.error = True
return
self.settings = settings
myDir = dirNm(abspath(__file__))
self.myDir = myDir
def manifests(self, manifestDir, verbose=None, **kwargs):
"""Generate manifests.
Parameters
----------
manifestDir: string
Manifests and logo will be generated in this directory.
verbose: integer, optional None
Produce no (-1), some (0) or many (1) progress and reporting messages
If `None`, the value will be taken from the corresponding object member.
kwargs: dict
Additional optional parameters to pass as key value pairs to
the iiif config file. These values will be filled in for place holders
of the form `[`*arg*`]`.
"""
if self.error:
return
if verbose is None:
verbose = self.verbose
self.manifestDir = manifestDir
infoDir = self.infoDir
settings = self.settings
fileInfoFile = f"{infoDir}/files.yml"
if not fileExists(fileInfoFile):
console(f"File with folder/file info not found: {fileInfoFile}", error=True)
self.error = True
return
files = readYaml(asFile=fileInfoFile, plain=True)
excludedFolders = {
k for k, v in settings.get("excludedFolders", {}).items() if v
}
if type(files) is not list:
console(f"The file info in {fileInfoFile} should be a list", error=True)
self.error = True
return
errors = []
for item in files:
if type(item) is not list or len(item) != 2:
errors.append(f"Folder {repr(item)} : not a pair")
continue
(folder, fls) = item
if type(fls) is not list:
errors.append(f"Folder {folder} : {repr(fls)} is not a list")
continue
for file in fls:
if type(file) is not str:
errors.append(f"Folder {folder}: {repr(file)} is not a string")
if len(errors):
for error in errors:
console(error, error=True)
self.errors = True
return
iFiles = [
(fold, [f.removesuffix(".xml") for f in fl])
for (fold, fl) in files
if fold not in excludedFolders
]
nFolders = len(iFiles)
nFiles = sum(len(x[1]) for x in iFiles)
self.files = iFiles
files = self.files
manifestLevel = settings.get("manifestLevel", "folder")
self.manifestLevel = manifestLevel
(self.constants, self.templates) = parseIIIF(settings, "templates", **kwargs)
if verbose > -1:
console("Parameters passed to manifest generation:")
for k, v in sorted(kwargs.items()):
console(f"\t{k:<10} = {v}")
if verbose == 1:
console("Values for the constants of the manifest generation:")
for k, v in sorted(self.constants.items()):
console(f"\t{k:<10} = {v}")
excludedFoldersStr = ", ".join(sorted(excludedFolders))
console(f"Manifestlevel = {manifestLevel}")
console(f"Excluded {manifestLevel} items: = {excludedFoldersStr}")
console(f"{nFolders} folders and {nFiles} files, not counting exclusions")
self.getSizes()
self.getRotations()
self.getPageSeq()
pages = self.pages
properPages = pages.get("pages", {})
mLevelFolders = manifestLevel == "folder"
if verbose > -1:
console("Folders:")
for item in files:
folder = item if mLevelFolders else item[0]
n = len(properPages[folder]) if folder in properPages else 0
m = (
None
if mLevelFolders
else (
sum(len(x) for x in properPages[folder].values())
if folder in properPages
else 0
)
)
nP = n if mLevelFolders else m
nF = m if mLevelFolders else n
pageRep = f"{nP:>4} pages"
fileRep = "" if nF is None else f"{nF:>4} files and "
if folder not in properPages:
console(
f"\t{folder:<10} with {fileRep}{pageRep} (not excluded in config)",
error=True,
)
self.error = True
continue
if verbose > -1:
console(f"\t{folder:<10} with {fileRep}{pageRep}")
manifestDir = self.manifestDir
manifestLevel = self.manifestLevel
infoDir = self.infoDir
settings = self.settings
initTree(manifestDir, fresh=True)
missingFiles = {}
self.missingFiles = missingFiles
p = 0
i = 0
m = 0
if manifestLevel == "folder":
for folder in files:
(thisP, thisI) = self.genPages("pages", folder=folder)
p += thisP
i += thisI
if thisI:
m += 1
else:
for folder, fls in files:
folderDir = f"{manifestDir}/{folder}"
initTree(folderDir, fresh=True, gentle=False)
folderI = 0
for file in fls:
(thisP, thisI) = self.genPages("pages", folder=folder, file=file)
p += thisP
i += thisI
if thisI:
m += 1
folderI += thisI
if folderI == 0:
dirRemove(folderDir)
if len(missingFiles):
console("Missing image files:", error=True)
with fileOpen(f"{infoDir}/facsMissing.tsv", "w") as fh:
fh.write("kind\tfile\tpage\tn\n")
nMissing = 0
for kind, fls in missingFiles.items():
console(f"\t{kind}:", error=True)
for file, pages in fls.items():
console(f"\t\t{file}:", error=True)
for page, n in pages.items():
console(f"\t\t\t{n:>3} x {page}", error=True)
nMissing += n
fh.write(f"{kind}\t{file}\t{page}\t{n}\n")
console(f"\ttotal occurrences of a missing file: {nMissing}")
if verbose > -1:
console(
f"{m} IIIF manifests with {i} items "
f"for {p} pages generated in {manifestDir}"
)
def getRotations(self):
if self.error:
return
verbose = self.verbose
scanInfoDir = self.scanInfoDir
prefix = "rotation_"
suffix = ".tsv"
rotateInfo = {}
self.rotateInfo = rotateInfo
n = 0
nPages = 0
for f in dirContents(scanInfoDir)[0]:
if not f.startswith(prefix) or not f.endswith(suffix):
continue
kind = f.removeprefix(prefix).removesuffix(suffix).split("_", 1)[-1]
if kind != PAGES:
continue
with fileOpen(f"{scanInfoDir}/{f}") as rh:
next(rh)
for line in rh:
fields = line.rstrip("\n").split("\t")
p = fields[0]
rot = int(fields[1])
rotateInfo.setdefault(kind, {})[p] = rot
n += 1
if rot != 0:
nPages += 1
if n == 0:
if verbose > -1:
console(f"No rotation files found in {scanInfoDir}")
return
if verbose > -1:
console(f"{nPages} pages have nonzero rotations")
def getSizes(self):
if self.error:
return
verbose = self.verbose
scanInfoDir = self.scanInfoDir
prefix = "sizes_"
suffix = ".tsv"
sizeInfo = {}
self.sizeInfo = sizeInfo
maxW, maxH = 0, 0
totW, totH = 0, 0
n = 0
ws, hs = [], []
for f in dirContents(scanInfoDir)[0]:
if not f.startswith(prefix) or not f.endswith(suffix):
continue
kind = f.removeprefix(prefix).removesuffix(suffix).split("_", 1)[-1]
if kind != PAGES:
continue
with fileOpen(f"{scanInfoDir}/{f}") as rh:
next(rh)
for line in rh:
fields = line.rstrip("\n").split("\t")
p = fields[0]
(w, h) = (int(x) for x in fields[1:3])
sizeInfo.setdefault(kind, {})[p] = (w, h)
ws.append(w)
hs.append(h)
n += 1
totW += w
totH += h
if w > maxW:
maxW = w
if h > maxH:
maxH = h
if n == 0:
console(f"No sizes files found in {scanInfoDir}", error=True)
return
avW = int(round(totW / n))
avH = int(round(totH / n))
devW = int(round(sum(abs(w - avW) for w in ws) / n))
devH = int(round(sum(abs(h - avH) for h in hs) / n))
if verbose > -1:
console(f"Maximum dimensions: W = {maxW:>4} H = {maxH:>4}")
console(f"Average dimensions: W = {avW:>4} H = {avH:>4}")
console(f"Average deviation: W = {devW:>4} H = {devH:>4}")
def getPageSeq(self):
if self.error:
return
manifestLevel = self.manifestLevel
zoneBased = self.settings.get("zoneBased", False)
verbose = self.verbose
infoDir = self.infoDir
facsFile = f"{infoDir}/facs.yml"
if not fileExists(facsFile):
console("No page-facsimile relating information found", error=True)
return
pagesProto = readYaml(asFile=facsFile, plain=True, preferTuples=False)
if verbose > -1:
console(f"Using facs file info file {facsFile}")
pages = {}
if zoneBased:
facsMappingFile = f"{infoDir}/facsMapping.yml"
if fileExists(facsMappingFile):
console(f"Using facs mapping file {facsMappingFile}")
facsMapping = readYaml(
asFile=facsMappingFile, plain=True, preferTuples=False
)
for path, ps in pagesProto.items():
pathComps = path.split("/")
folder = pathComps[0]
if manifestLevel == "file":
file = stripExt(pathComps[1])
mapping = facsMapping.get(path, {})
mappedPs = [mapping.get(p, p) for p in ps]
pagesDest = pages.setdefault(
folder, [] if manifestLevel == "folder" else {}
)
if manifestLevel == "folder":
pagesDest.extend(mappedPs)
else:
pagesDest.setdefault(file, []).extend(mappedPs)
else:
console(f"No facs mapping file {facsMappingFile}", error=True)
else:
for path, ps in pagesProto.items():
(folder, file) = path.split("/")
file = stripExt(file)
pagesDest = pages.setdefault(
folder, [] if manifestLevel == "folder" else {}
)
pages.setdefault(folder, []).extend(ps)
if manifestLevel == "folder":
pagesDest.extend(ps)
else:
pagesDest.setdefault(file, []).extend(ps)
if pages is None:
console("Could not assemble page sequence info", error=True)
else:
result = dict(pages=pages)
self.pages = result
def genPages(self, kind, folder=None, file=None):
if self.error:
return (0, 0)
constants = self.constants
settings = self.settings
scanInfoDir = self.scanInfoDir
missingFiles = self.missingFiles
manifestLevel = self.manifestLevel
zoneBased = settings.get("zoneBased", False)
templates = self.templates
sizeInfo = self.sizeInfo.get(kind, {})
rotateInfo = self.rotateInfo.get(kind, {})
ext = constants.get("ext", "jpg")
things = self.pages[kind]
theseThings = things if folder is None else things.get(folder, None)
if manifestLevel == "folder":
thesePages = theseThings or []
else:
thesePages = (
theseThings if file is None else (theseThings or {}).get(file, [])
)
pageItem = templates.pageItem
itemsSeen = set()
items = []
nPages = 0
for p in thesePages:
nPages += 1
if zoneBased:
if type(p) is str:
(p, region) = (p, "full")
elif len(p) == 0:
(p, region) = ("NA", "full")
elif len(p) == 1:
(p, region) = (p[0], "full")
else:
(p, region) = p[0:2]
else:
region = "full"
scanPresent = p in sizeInfo
if scanPresent:
w, h = sizeInfo.get(p, (0, 0))
rot = 0 if rotateInfo is None else rotateInfo.get(p, 0)
else:
missingFiles.setdefault(kind, {}).setdefault(
file, collections.Counter()
)[p] += 1
p = FILE_NOT_FOUND
w, h = FILE_NOT_FOUND_SIZES
rot = 0
key = (p, w, h, rot)
if key in itemsSeen:
continue
itemsSeen.add(key)
if not scanPresent:
myDir = self.myDir
fof = f"{FILE_NOT_FOUND}.{ext}"
fofInPath = f"{myDir}/fof/{fof}"
fofOutDir = f"{scanInfoDir}/{kind}"
fofOutPath = f"{fofOutDir}/{fof}"
if not fileExists(fofOutPath):
dirMake(fofOutDir)
fileCopy(fofInPath, fofOutPath)
item = {}
for k, v in pageItem.items():
v = fillinIIIF(
v,
folder=folder,
file=file,
page=p,
region=region,
width=w,
height=h,
rot=rot,
)
item[k] = v
items.append(item)
pageSequence = templates.pageSequence
manifestDir = self.manifestDir
data = {}
for k, v in pageSequence.items():
v = fillinIIIF(v, folder=folder, file=file)
data[k] = v
data["items"] = items
nItems = len(items)
if nItems:
writeJson(
data,
asFile=(
f"{manifestDir}/{folder}.json"
if manifestLevel == "folder"
else f"{manifestDir}/{folder}/{file}.json"
),
)
return (nPages, nItems)
Functions
def fillinIIIF(data, **kwargs)
-
Expand source code Browse git
def fillinIIIF(data, **kwargs): tpd = type(data) if tpd is str: for k, v in kwargs.items(): pattern = "{" + k + "}" if type(v) is int and data == pattern: data = v break else: data = data.replace(pattern, str(v)) return data if tpd is list: return [fillinIIIF(item, **kwargs) for item in data] if tpd is dict: return {k: fillinIIIF(v, **kwargs) for (k, v) in data.items()} return data
def parseIIIF(settings, selector, **kwargs)
-
Parse the iiif yml file and deliver a filled in section.
The iiif.yml file contains constants which are used to define IIIF things via templates.
The top-level section
templates
contains fragments from which manifests can be constructed.This function prepares the constants and then uses them to assemble the section in the yaml file based on the parameter
selector
.The constants are given as a list of dictionaries, where each value in such a dictionary may use constants defined in previous dictionaries.
Parameters
selector
:string
- Which top-level section we are going to grab out of the iiif.yml file.
This can be any section in the yaml file, except
constants
. kwargs
:dict
- Additional optional parameters to pass as key value pairs to
the iiif config file. These values will be filled in for place holders
of the form
[
arg]
.
Returns
void
ortuple
-
If the constants do not resolve, or non-existing constants or arguments are being refererred to, None is returned.
Otherwise, a tuple is returned with two members:
- an dict consisting of the resolved constants
- an AttrDict with resolved key value pairs from that section.
Expand source code Browse git
def parseIIIF(settings, selector, **kwargs): """Parse the iiif yml file and deliver a filled in section. The iiif.yml file contains constants which are used to define IIIF things via templates. The top-level section `templates` contains fragments from which manifests can be constructed. This function prepares the constants and then uses them to assemble the section in the yaml file based on the parameter `selector`. The constants are given as a list of dictionaries, where each value in such a dictionary may use constants defined in previous dictionaries. Parameters ---------- selector: string Which top-level section we are going to grab out of the iiif.yml file. This can be any section in the yaml file, except `constants`. kwargs: dict Additional optional parameters to pass as key value pairs to the iiif config file. These values will be filled in for place holders of the form `[`*arg*`]`. Returns ------- void or tuple If the constants do not resolve, or non-existing constants or arguments are being refererred to, None is returned. Otherwise, a tuple is returned with two members: * an dict consisting of the resolved constants * an AttrDict with resolved key value pairs from that section. """ errors = [] def resolveConstants(): source = settings.get("constants", []) constants = {} for i, batch in enumerate(source): for k, v in batch.items(): if type(v) is str: for c, w in constants.items(): v = v.replace(f"«{c}»", str(w)) if "«" in v or "»" in v: errors.append( f"constant batch {i + 1}: value for {k} not resolved: {v}" ) constants[k] = v return constants def substituteConstants(data, constants, kwargs): tpd = type(data) if tpd is str: for k, v in constants.items(): pattern = f"«{k}»" if type(v) is int and data == pattern: data = v break else: data = data.replace(pattern, str(v)) if type(data) is str: for k, v in kwargs.items(): pattern = f"[[{k}]]" if type(v) is int and data == pattern: data = v break else: data = data.replace(pattern, str(v)) if "«" in data or "»" in data or "[[" in data or "]]" in data: errors.append(f"value not completely resolved: {data}") return data if tpd is list: return [substituteConstants(item, constants, kwargs) for item in data] if tpd is dict: return { k: substituteConstants(v, constants, kwargs) for (k, v) in data.items() } return data constants = resolveConstants() if len(errors): for e in errors: console(e) return None return ( constants, AttrDict( { x: substituteConstants(xText, constants, kwargs) for (x, xText) in settings.get(selector, {}).items() } ), )
Classes
class IIIF (infoDir, scanInfoDir, configPath, verbose=0)
-
Class for generating IIIF manifests.
Parameters
infoDir
:string
- Directory where the files with page information are, typically containing
the result of an inventory by
ti.info.tei
scanInfoDir
:string
- Directory where the files with scan information are, typically containing the sizes, colorspaces en rotations of the scans.
configPath
:string
- The configuration file that directs the shape of the manifests.
verbose
:integer
, optional-1
- Produce no (-1), some (0) or many (1) progress and reporting messages
Expand source code Browse git
class IIIF: def __init__( self, infoDir, scanInfoDir, configPath, verbose=0, ): """Class for generating IIIF manifests. Parameters ---------- infoDir: string Directory where the files with page information are, typically containing the result of an inventory by `ti.info.tei` scanInfoDir: string Directory where the files with scan information are, typically containing the sizes, colorspaces en rotations of the scans. configPath: string The configuration file that directs the shape of the manifests. verbose: integer, optional -1 Produce no (-1), some (0) or many (1) progress and reporting messages """ self.infoDir = infoDir self.scanInfoDir = scanInfoDir self.configPath = configPath self.verbose = verbose self.error = False (ok, settings) = readCfg(configPath, "iiif", verbose=verbose, plain=True) if verbose != -1: console(f"Source information taken from {infoDir}") if not ok: self.error = True return self.settings = settings myDir = dirNm(abspath(__file__)) self.myDir = myDir def manifests(self, manifestDir, verbose=None, **kwargs): """Generate manifests. Parameters ---------- manifestDir: string Manifests and logo will be generated in this directory. verbose: integer, optional None Produce no (-1), some (0) or many (1) progress and reporting messages If `None`, the value will be taken from the corresponding object member. kwargs: dict Additional optional parameters to pass as key value pairs to the iiif config file. These values will be filled in for place holders of the form `[`*arg*`]`. """ if self.error: return if verbose is None: verbose = self.verbose self.manifestDir = manifestDir infoDir = self.infoDir settings = self.settings fileInfoFile = f"{infoDir}/files.yml" if not fileExists(fileInfoFile): console(f"File with folder/file info not found: {fileInfoFile}", error=True) self.error = True return files = readYaml(asFile=fileInfoFile, plain=True) excludedFolders = { k for k, v in settings.get("excludedFolders", {}).items() if v } if type(files) is not list: console(f"The file info in {fileInfoFile} should be a list", error=True) self.error = True return errors = [] for item in files: if type(item) is not list or len(item) != 2: errors.append(f"Folder {repr(item)} : not a pair") continue (folder, fls) = item if type(fls) is not list: errors.append(f"Folder {folder} : {repr(fls)} is not a list") continue for file in fls: if type(file) is not str: errors.append(f"Folder {folder}: {repr(file)} is not a string") if len(errors): for error in errors: console(error, error=True) self.errors = True return iFiles = [ (fold, [f.removesuffix(".xml") for f in fl]) for (fold, fl) in files if fold not in excludedFolders ] nFolders = len(iFiles) nFiles = sum(len(x[1]) for x in iFiles) self.files = iFiles files = self.files manifestLevel = settings.get("manifestLevel", "folder") self.manifestLevel = manifestLevel (self.constants, self.templates) = parseIIIF(settings, "templates", **kwargs) if verbose > -1: console("Parameters passed to manifest generation:") for k, v in sorted(kwargs.items()): console(f"\t{k:<10} = {v}") if verbose == 1: console("Values for the constants of the manifest generation:") for k, v in sorted(self.constants.items()): console(f"\t{k:<10} = {v}") excludedFoldersStr = ", ".join(sorted(excludedFolders)) console(f"Manifestlevel = {manifestLevel}") console(f"Excluded {manifestLevel} items: = {excludedFoldersStr}") console(f"{nFolders} folders and {nFiles} files, not counting exclusions") self.getSizes() self.getRotations() self.getPageSeq() pages = self.pages properPages = pages.get("pages", {}) mLevelFolders = manifestLevel == "folder" if verbose > -1: console("Folders:") for item in files: folder = item if mLevelFolders else item[0] n = len(properPages[folder]) if folder in properPages else 0 m = ( None if mLevelFolders else ( sum(len(x) for x in properPages[folder].values()) if folder in properPages else 0 ) ) nP = n if mLevelFolders else m nF = m if mLevelFolders else n pageRep = f"{nP:>4} pages" fileRep = "" if nF is None else f"{nF:>4} files and " if folder not in properPages: console( f"\t{folder:<10} with {fileRep}{pageRep} (not excluded in config)", error=True, ) self.error = True continue if verbose > -1: console(f"\t{folder:<10} with {fileRep}{pageRep}") manifestDir = self.manifestDir manifestLevel = self.manifestLevel infoDir = self.infoDir settings = self.settings initTree(manifestDir, fresh=True) missingFiles = {} self.missingFiles = missingFiles p = 0 i = 0 m = 0 if manifestLevel == "folder": for folder in files: (thisP, thisI) = self.genPages("pages", folder=folder) p += thisP i += thisI if thisI: m += 1 else: for folder, fls in files: folderDir = f"{manifestDir}/{folder}" initTree(folderDir, fresh=True, gentle=False) folderI = 0 for file in fls: (thisP, thisI) = self.genPages("pages", folder=folder, file=file) p += thisP i += thisI if thisI: m += 1 folderI += thisI if folderI == 0: dirRemove(folderDir) if len(missingFiles): console("Missing image files:", error=True) with fileOpen(f"{infoDir}/facsMissing.tsv", "w") as fh: fh.write("kind\tfile\tpage\tn\n") nMissing = 0 for kind, fls in missingFiles.items(): console(f"\t{kind}:", error=True) for file, pages in fls.items(): console(f"\t\t{file}:", error=True) for page, n in pages.items(): console(f"\t\t\t{n:>3} x {page}", error=True) nMissing += n fh.write(f"{kind}\t{file}\t{page}\t{n}\n") console(f"\ttotal occurrences of a missing file: {nMissing}") if verbose > -1: console( f"{m} IIIF manifests with {i} items " f"for {p} pages generated in {manifestDir}" ) def getRotations(self): if self.error: return verbose = self.verbose scanInfoDir = self.scanInfoDir prefix = "rotation_" suffix = ".tsv" rotateInfo = {} self.rotateInfo = rotateInfo n = 0 nPages = 0 for f in dirContents(scanInfoDir)[0]: if not f.startswith(prefix) or not f.endswith(suffix): continue kind = f.removeprefix(prefix).removesuffix(suffix).split("_", 1)[-1] if kind != PAGES: continue with fileOpen(f"{scanInfoDir}/{f}") as rh: next(rh) for line in rh: fields = line.rstrip("\n").split("\t") p = fields[0] rot = int(fields[1]) rotateInfo.setdefault(kind, {})[p] = rot n += 1 if rot != 0: nPages += 1 if n == 0: if verbose > -1: console(f"No rotation files found in {scanInfoDir}") return if verbose > -1: console(f"{nPages} pages have nonzero rotations") def getSizes(self): if self.error: return verbose = self.verbose scanInfoDir = self.scanInfoDir prefix = "sizes_" suffix = ".tsv" sizeInfo = {} self.sizeInfo = sizeInfo maxW, maxH = 0, 0 totW, totH = 0, 0 n = 0 ws, hs = [], [] for f in dirContents(scanInfoDir)[0]: if not f.startswith(prefix) or not f.endswith(suffix): continue kind = f.removeprefix(prefix).removesuffix(suffix).split("_", 1)[-1] if kind != PAGES: continue with fileOpen(f"{scanInfoDir}/{f}") as rh: next(rh) for line in rh: fields = line.rstrip("\n").split("\t") p = fields[0] (w, h) = (int(x) for x in fields[1:3]) sizeInfo.setdefault(kind, {})[p] = (w, h) ws.append(w) hs.append(h) n += 1 totW += w totH += h if w > maxW: maxW = w if h > maxH: maxH = h if n == 0: console(f"No sizes files found in {scanInfoDir}", error=True) return avW = int(round(totW / n)) avH = int(round(totH / n)) devW = int(round(sum(abs(w - avW) for w in ws) / n)) devH = int(round(sum(abs(h - avH) for h in hs) / n)) if verbose > -1: console(f"Maximum dimensions: W = {maxW:>4} H = {maxH:>4}") console(f"Average dimensions: W = {avW:>4} H = {avH:>4}") console(f"Average deviation: W = {devW:>4} H = {devH:>4}") def getPageSeq(self): if self.error: return manifestLevel = self.manifestLevel zoneBased = self.settings.get("zoneBased", False) verbose = self.verbose infoDir = self.infoDir facsFile = f"{infoDir}/facs.yml" if not fileExists(facsFile): console("No page-facsimile relating information found", error=True) return pagesProto = readYaml(asFile=facsFile, plain=True, preferTuples=False) if verbose > -1: console(f"Using facs file info file {facsFile}") pages = {} if zoneBased: facsMappingFile = f"{infoDir}/facsMapping.yml" if fileExists(facsMappingFile): console(f"Using facs mapping file {facsMappingFile}") facsMapping = readYaml( asFile=facsMappingFile, plain=True, preferTuples=False ) for path, ps in pagesProto.items(): pathComps = path.split("/") folder = pathComps[0] if manifestLevel == "file": file = stripExt(pathComps[1]) mapping = facsMapping.get(path, {}) mappedPs = [mapping.get(p, p) for p in ps] pagesDest = pages.setdefault( folder, [] if manifestLevel == "folder" else {} ) if manifestLevel == "folder": pagesDest.extend(mappedPs) else: pagesDest.setdefault(file, []).extend(mappedPs) else: console(f"No facs mapping file {facsMappingFile}", error=True) else: for path, ps in pagesProto.items(): (folder, file) = path.split("/") file = stripExt(file) pagesDest = pages.setdefault( folder, [] if manifestLevel == "folder" else {} ) pages.setdefault(folder, []).extend(ps) if manifestLevel == "folder": pagesDest.extend(ps) else: pagesDest.setdefault(file, []).extend(ps) if pages is None: console("Could not assemble page sequence info", error=True) else: result = dict(pages=pages) self.pages = result def genPages(self, kind, folder=None, file=None): if self.error: return (0, 0) constants = self.constants settings = self.settings scanInfoDir = self.scanInfoDir missingFiles = self.missingFiles manifestLevel = self.manifestLevel zoneBased = settings.get("zoneBased", False) templates = self.templates sizeInfo = self.sizeInfo.get(kind, {}) rotateInfo = self.rotateInfo.get(kind, {}) ext = constants.get("ext", "jpg") things = self.pages[kind] theseThings = things if folder is None else things.get(folder, None) if manifestLevel == "folder": thesePages = theseThings or [] else: thesePages = ( theseThings if file is None else (theseThings or {}).get(file, []) ) pageItem = templates.pageItem itemsSeen = set() items = [] nPages = 0 for p in thesePages: nPages += 1 if zoneBased: if type(p) is str: (p, region) = (p, "full") elif len(p) == 0: (p, region) = ("NA", "full") elif len(p) == 1: (p, region) = (p[0], "full") else: (p, region) = p[0:2] else: region = "full" scanPresent = p in sizeInfo if scanPresent: w, h = sizeInfo.get(p, (0, 0)) rot = 0 if rotateInfo is None else rotateInfo.get(p, 0) else: missingFiles.setdefault(kind, {}).setdefault( file, collections.Counter() )[p] += 1 p = FILE_NOT_FOUND w, h = FILE_NOT_FOUND_SIZES rot = 0 key = (p, w, h, rot) if key in itemsSeen: continue itemsSeen.add(key) if not scanPresent: myDir = self.myDir fof = f"{FILE_NOT_FOUND}.{ext}" fofInPath = f"{myDir}/fof/{fof}" fofOutDir = f"{scanInfoDir}/{kind}" fofOutPath = f"{fofOutDir}/{fof}" if not fileExists(fofOutPath): dirMake(fofOutDir) fileCopy(fofInPath, fofOutPath) item = {} for k, v in pageItem.items(): v = fillinIIIF( v, folder=folder, file=file, page=p, region=region, width=w, height=h, rot=rot, ) item[k] = v items.append(item) pageSequence = templates.pageSequence manifestDir = self.manifestDir data = {} for k, v in pageSequence.items(): v = fillinIIIF(v, folder=folder, file=file) data[k] = v data["items"] = items nItems = len(items) if nItems: writeJson( data, asFile=( f"{manifestDir}/{folder}.json" if manifestLevel == "folder" else f"{manifestDir}/{folder}/{file}.json" ), ) return (nPages, nItems)
Methods
def genPages(self, kind, folder=None, file=None)
-
Expand source code Browse git
def genPages(self, kind, folder=None, file=None): if self.error: return (0, 0) constants = self.constants settings = self.settings scanInfoDir = self.scanInfoDir missingFiles = self.missingFiles manifestLevel = self.manifestLevel zoneBased = settings.get("zoneBased", False) templates = self.templates sizeInfo = self.sizeInfo.get(kind, {}) rotateInfo = self.rotateInfo.get(kind, {}) ext = constants.get("ext", "jpg") things = self.pages[kind] theseThings = things if folder is None else things.get(folder, None) if manifestLevel == "folder": thesePages = theseThings or [] else: thesePages = ( theseThings if file is None else (theseThings or {}).get(file, []) ) pageItem = templates.pageItem itemsSeen = set() items = [] nPages = 0 for p in thesePages: nPages += 1 if zoneBased: if type(p) is str: (p, region) = (p, "full") elif len(p) == 0: (p, region) = ("NA", "full") elif len(p) == 1: (p, region) = (p[0], "full") else: (p, region) = p[0:2] else: region = "full" scanPresent = p in sizeInfo if scanPresent: w, h = sizeInfo.get(p, (0, 0)) rot = 0 if rotateInfo is None else rotateInfo.get(p, 0) else: missingFiles.setdefault(kind, {}).setdefault( file, collections.Counter() )[p] += 1 p = FILE_NOT_FOUND w, h = FILE_NOT_FOUND_SIZES rot = 0 key = (p, w, h, rot) if key in itemsSeen: continue itemsSeen.add(key) if not scanPresent: myDir = self.myDir fof = f"{FILE_NOT_FOUND}.{ext}" fofInPath = f"{myDir}/fof/{fof}" fofOutDir = f"{scanInfoDir}/{kind}" fofOutPath = f"{fofOutDir}/{fof}" if not fileExists(fofOutPath): dirMake(fofOutDir) fileCopy(fofInPath, fofOutPath) item = {} for k, v in pageItem.items(): v = fillinIIIF( v, folder=folder, file=file, page=p, region=region, width=w, height=h, rot=rot, ) item[k] = v items.append(item) pageSequence = templates.pageSequence manifestDir = self.manifestDir data = {} for k, v in pageSequence.items(): v = fillinIIIF(v, folder=folder, file=file) data[k] = v data["items"] = items nItems = len(items) if nItems: writeJson( data, asFile=( f"{manifestDir}/{folder}.json" if manifestLevel == "folder" else f"{manifestDir}/{folder}/{file}.json" ), ) return (nPages, nItems)
def getPageSeq(self)
-
Expand source code Browse git
def getPageSeq(self): if self.error: return manifestLevel = self.manifestLevel zoneBased = self.settings.get("zoneBased", False) verbose = self.verbose infoDir = self.infoDir facsFile = f"{infoDir}/facs.yml" if not fileExists(facsFile): console("No page-facsimile relating information found", error=True) return pagesProto = readYaml(asFile=facsFile, plain=True, preferTuples=False) if verbose > -1: console(f"Using facs file info file {facsFile}") pages = {} if zoneBased: facsMappingFile = f"{infoDir}/facsMapping.yml" if fileExists(facsMappingFile): console(f"Using facs mapping file {facsMappingFile}") facsMapping = readYaml( asFile=facsMappingFile, plain=True, preferTuples=False ) for path, ps in pagesProto.items(): pathComps = path.split("/") folder = pathComps[0] if manifestLevel == "file": file = stripExt(pathComps[1]) mapping = facsMapping.get(path, {}) mappedPs = [mapping.get(p, p) for p in ps] pagesDest = pages.setdefault( folder, [] if manifestLevel == "folder" else {} ) if manifestLevel == "folder": pagesDest.extend(mappedPs) else: pagesDest.setdefault(file, []).extend(mappedPs) else: console(f"No facs mapping file {facsMappingFile}", error=True) else: for path, ps in pagesProto.items(): (folder, file) = path.split("/") file = stripExt(file) pagesDest = pages.setdefault( folder, [] if manifestLevel == "folder" else {} ) pages.setdefault(folder, []).extend(ps) if manifestLevel == "folder": pagesDest.extend(ps) else: pagesDest.setdefault(file, []).extend(ps) if pages is None: console("Could not assemble page sequence info", error=True) else: result = dict(pages=pages) self.pages = result
def getRotations(self)
-
Expand source code Browse git
def getRotations(self): if self.error: return verbose = self.verbose scanInfoDir = self.scanInfoDir prefix = "rotation_" suffix = ".tsv" rotateInfo = {} self.rotateInfo = rotateInfo n = 0 nPages = 0 for f in dirContents(scanInfoDir)[0]: if not f.startswith(prefix) or not f.endswith(suffix): continue kind = f.removeprefix(prefix).removesuffix(suffix).split("_", 1)[-1] if kind != PAGES: continue with fileOpen(f"{scanInfoDir}/{f}") as rh: next(rh) for line in rh: fields = line.rstrip("\n").split("\t") p = fields[0] rot = int(fields[1]) rotateInfo.setdefault(kind, {})[p] = rot n += 1 if rot != 0: nPages += 1 if n == 0: if verbose > -1: console(f"No rotation files found in {scanInfoDir}") return if verbose > -1: console(f"{nPages} pages have nonzero rotations")
def getSizes(self)
-
Expand source code Browse git
def getSizes(self): if self.error: return verbose = self.verbose scanInfoDir = self.scanInfoDir prefix = "sizes_" suffix = ".tsv" sizeInfo = {} self.sizeInfo = sizeInfo maxW, maxH = 0, 0 totW, totH = 0, 0 n = 0 ws, hs = [], [] for f in dirContents(scanInfoDir)[0]: if not f.startswith(prefix) or not f.endswith(suffix): continue kind = f.removeprefix(prefix).removesuffix(suffix).split("_", 1)[-1] if kind != PAGES: continue with fileOpen(f"{scanInfoDir}/{f}") as rh: next(rh) for line in rh: fields = line.rstrip("\n").split("\t") p = fields[0] (w, h) = (int(x) for x in fields[1:3]) sizeInfo.setdefault(kind, {})[p] = (w, h) ws.append(w) hs.append(h) n += 1 totW += w totH += h if w > maxW: maxW = w if h > maxH: maxH = h if n == 0: console(f"No sizes files found in {scanInfoDir}", error=True) return avW = int(round(totW / n)) avH = int(round(totH / n)) devW = int(round(sum(abs(w - avW) for w in ws) / n)) devH = int(round(sum(abs(h - avH) for h in hs) / n)) if verbose > -1: console(f"Maximum dimensions: W = {maxW:>4} H = {maxH:>4}") console(f"Average dimensions: W = {avW:>4} H = {avH:>4}") console(f"Average deviation: W = {devW:>4} H = {devH:>4}")
def manifests(self, manifestDir, verbose=None, **kwargs)
-
Generate manifests.
Parameters
manifestDir
:string
- Manifests and logo will be generated in this directory.
verbose
:integer
, optionalNone
- Produce no (-1), some (0) or many (1) progress and reporting messages
If
None
, the value will be taken from the corresponding object member. kwargs
:dict
- Additional optional parameters to pass as key value pairs to
the iiif config file. These values will be filled in for place holders
of the form
[
arg]
.
Expand source code Browse git
def manifests(self, manifestDir, verbose=None, **kwargs): """Generate manifests. Parameters ---------- manifestDir: string Manifests and logo will be generated in this directory. verbose: integer, optional None Produce no (-1), some (0) or many (1) progress and reporting messages If `None`, the value will be taken from the corresponding object member. kwargs: dict Additional optional parameters to pass as key value pairs to the iiif config file. These values will be filled in for place holders of the form `[`*arg*`]`. """ if self.error: return if verbose is None: verbose = self.verbose self.manifestDir = manifestDir infoDir = self.infoDir settings = self.settings fileInfoFile = f"{infoDir}/files.yml" if not fileExists(fileInfoFile): console(f"File with folder/file info not found: {fileInfoFile}", error=True) self.error = True return files = readYaml(asFile=fileInfoFile, plain=True) excludedFolders = { k for k, v in settings.get("excludedFolders", {}).items() if v } if type(files) is not list: console(f"The file info in {fileInfoFile} should be a list", error=True) self.error = True return errors = [] for item in files: if type(item) is not list or len(item) != 2: errors.append(f"Folder {repr(item)} : not a pair") continue (folder, fls) = item if type(fls) is not list: errors.append(f"Folder {folder} : {repr(fls)} is not a list") continue for file in fls: if type(file) is not str: errors.append(f"Folder {folder}: {repr(file)} is not a string") if len(errors): for error in errors: console(error, error=True) self.errors = True return iFiles = [ (fold, [f.removesuffix(".xml") for f in fl]) for (fold, fl) in files if fold not in excludedFolders ] nFolders = len(iFiles) nFiles = sum(len(x[1]) for x in iFiles) self.files = iFiles files = self.files manifestLevel = settings.get("manifestLevel", "folder") self.manifestLevel = manifestLevel (self.constants, self.templates) = parseIIIF(settings, "templates", **kwargs) if verbose > -1: console("Parameters passed to manifest generation:") for k, v in sorted(kwargs.items()): console(f"\t{k:<10} = {v}") if verbose == 1: console("Values for the constants of the manifest generation:") for k, v in sorted(self.constants.items()): console(f"\t{k:<10} = {v}") excludedFoldersStr = ", ".join(sorted(excludedFolders)) console(f"Manifestlevel = {manifestLevel}") console(f"Excluded {manifestLevel} items: = {excludedFoldersStr}") console(f"{nFolders} folders and {nFiles} files, not counting exclusions") self.getSizes() self.getRotations() self.getPageSeq() pages = self.pages properPages = pages.get("pages", {}) mLevelFolders = manifestLevel == "folder" if verbose > -1: console("Folders:") for item in files: folder = item if mLevelFolders else item[0] n = len(properPages[folder]) if folder in properPages else 0 m = ( None if mLevelFolders else ( sum(len(x) for x in properPages[folder].values()) if folder in properPages else 0 ) ) nP = n if mLevelFolders else m nF = m if mLevelFolders else n pageRep = f"{nP:>4} pages" fileRep = "" if nF is None else f"{nF:>4} files and " if folder not in properPages: console( f"\t{folder:<10} with {fileRep}{pageRep} (not excluded in config)", error=True, ) self.error = True continue if verbose > -1: console(f"\t{folder:<10} with {fileRep}{pageRep}") manifestDir = self.manifestDir manifestLevel = self.manifestLevel infoDir = self.infoDir settings = self.settings initTree(manifestDir, fresh=True) missingFiles = {} self.missingFiles = missingFiles p = 0 i = 0 m = 0 if manifestLevel == "folder": for folder in files: (thisP, thisI) = self.genPages("pages", folder=folder) p += thisP i += thisI if thisI: m += 1 else: for folder, fls in files: folderDir = f"{manifestDir}/{folder}" initTree(folderDir, fresh=True, gentle=False) folderI = 0 for file in fls: (thisP, thisI) = self.genPages("pages", folder=folder, file=file) p += thisP i += thisI if thisI: m += 1 folderI += thisI if folderI == 0: dirRemove(folderDir) if len(missingFiles): console("Missing image files:", error=True) with fileOpen(f"{infoDir}/facsMissing.tsv", "w") as fh: fh.write("kind\tfile\tpage\tn\n") nMissing = 0 for kind, fls in missingFiles.items(): console(f"\t{kind}:", error=True) for file, pages in fls.items(): console(f"\t\t{file}:", error=True) for page, n in pages.items(): console(f"\t\t\t{n:>3} x {page}", error=True) nMissing += n fh.write(f"{kind}\t{file}\t{page}\t{n}\n") console(f"\ttotal occurrences of a missing file: {nMissing}") if verbose > -1: console( f"{m} IIIF manifests with {i} items " f"for {p} pages generated in {manifestDir}" )