Module tf.ner.sets
Annotation set management.
Annotation sets contain the annotations that the user generates by using the tool.
Expand source code Browse git
"""Annotation set management.
Annotation sets contain the annotations that the user generates by using
the tool.
"""
from ..core.generic import AttrDict
from ..core.helpers import console
from ..core.files import (
fileExists,
initTree,
dirExists,
dirContents,
dirMake,
dirCopy,
dirRemove,
dirMove,
)
from .data import Data
from .settings import ERROR, SET_ENT, SET_SHEET, SET_MAIN
class Sets(Data):
def __init__(self, sets=None):
"""Methods to create, duplicate, rename and delete annotation sets.
Annotation sets have names, given by the user.
There is a special annotation set, whose name is the empty string,
and whose content are the pre-existing entities, i.e. the entities that
are present in the TF data as nodes and features.
Users can not name sets with names that start with a dot.
Annotation sets whose name start with a dot are generated by the system
when a spreadsheets with entity triggers is processed.
These sets are readonly, like the special annotation set, but they can
be duplicated to ordinary sets. Those copies loose the relationship with
the original spreadsheet.
There is always one current annotation set, whose data is loaded into
memory.
Parameters
----------
sets: object, optional None
Entity sets to start with.
If None, a fresh store of sets will be created by a parent class (Data).
"""
Data.__init__(self, sets=sets)
if not self.properlySetup:
return
browse = self.browse
self.setName = ""
"""The current annotation set."""
self.setInfo()
self.setNames = set()
"""The set of names of annotation sets that are present on the file system."""
self.readSets()
if not browse:
self.loadSetData()
def setInfo(self, setName=None):
"""Give information about a set.
Parameters
----------
setName: string, optional None
The name of the set we want info about. If `None`, give info about the
current set.
Returns
-------
tuple
The info consists of: a representation of the name of the set; whether the
set is readonly; whether the set corresponds to the baked-in entities of
the dataset; whether the set corresponds to a spreadsheet.
"""
settings = self.settings
entitySet = settings.entitySet
inObject = False
if setName is None:
setName = self.setName
inObject = True
setIsRo = setName == "" or setName.startswith(".")
setIsSrc = setName == ""
setIsX = setIsRo and not setIsSrc
setNameRep = (
f"{SET_ENT} {entitySet}"
if setName == ""
else (
f"{SET_SHEET} " f"{setName[1:]}"
if setName.startswith(".")
else f"{SET_MAIN} {setName}"
)
)
if inObject:
self.setNameRep = setNameRep
self.setIsRo = setIsRo
self.setIsSrc = setIsSrc
self.setIsX = setIsX
else:
return (setNameRep, setIsRo, setIsSrc, setIsX)
def readSets(self):
"""Read the list current annotation sets (again).
Use this when you change annotation sets outside the NER browser, e.g.
by working with annotations in a Jupyter Notebook.
"""
annoDir = self.annoDir
self.setNames = set(dirContents(annoDir)[1])
def getSetData(self):
"""Deliver the current set.
Returns
-------
AttrDict
A dictionary with the data of the set, with portions given by key.
See `tf.ner.data.Data.fromSourceSet()` and `tf.ner.data.Data.processSet()`
"""
setsData = self.sets
setName = self.setName
setData = setsData.setdefault(setName, AttrDict())
return setData
def setSet(self, newSetName):
"""Switch to a named annotation set.
If the new set does not exist, it will be created.
After the switch, the new set will be loaded into memory.
Parameters
----------
newSetName: string
The name of the new annotation set to switch to.
"""
if not self.properlySetup:
return
browse = self.browse
if not browse:
self.loadSetData()
setNames = self.setNames
setsData = self.sets
setName = self.setName
annoDir = self.annoDir
newSetDir = f"{annoDir}/{newSetName}"
(newSetNameRep, newSetRo, newSetSrc, newSetX) = self.setInfo(newSetName)
if (not newSetSrc) and (newSetName not in setNames or not dirExists(newSetDir)):
initTree(newSetDir, fresh=False)
setNames.add(newSetName)
if newSetName != setName:
setName = newSetName
self.setName = setName
self.setInfo()
self.loadSetData()
if not browse:
setNameRep = self.setNameRep
entities = setsData[setName].entities
nEntities = len(entities)
plural = "" if nEntities == 1 else "s"
self.console(
f"Annotation set {setNameRep} " f"has {nEntities} annotation{plural}"
)
def _addToSet(self, newEntities):
"""Add a bunch of entities to the current set.
Only for sets that correspond to sheets. This is to create such a set,
it is not meant to call this function manually in a Jupyter notebook.
Parameters
----------
newSetName: string
The name of the new annotation set to switch to.
"""
if not self.properlySetup:
return
setIsX = self.setIsX
if not setIsX:
return
self._clearSetData()
self.addEntities(newEntities, returns=False, _lowlevel=True)
def resetSet(self):
"""Clear the current annotation set.
The special set `""` cannot be reset, because it is read-only.
"""
if not self.properlySetup:
return
settings = self.settings
setName = self.setName
setIsRo = self.setIsRo
entitySet = settings.entitySet
if setIsRo:
console(f"Resetting the {entitySet} has no effect")
return
browse = self.browse
setsData = self.sets
annoDir = self.annoDir
setDir = f"{annoDir}/{setName}"
initTree(setDir, fresh=True, gentle=True)
self.loadSetData()
if not browse:
setNameRep = self.setNameRep
entities = setsData[setName].entities
nEntities = len(entities)
plural = "" if nEntities == 1 else "s"
self.console(
f"Annotation set {setNameRep} has {nEntities} annotation{plural}"
)
def setDup(self, dupSet):
"""Duplicates the current set to a set with a new name.
!!! hint "The readonly sets can be duplicated"
After duplication of a read-only set, the duplicate
copy is modifiable.
In this way you can make corrections to the set of pre-existing,
tool-generated annotations.
The current set changes to the result of the duplication.
Parameters
----------
dupSet: string
The name of new set that is the result of the duplication.
Returns
-------
list
A list of messages. Every message is a tuple of kind and content.
"""
if not self.properlySetup:
return []
setNames = self.setNames
setsData = self.sets
setName = self.setName
setIsSrc = self.setIsSrc
annoDir = self.annoDir
annoPath = f"{annoDir}/{dupSet}"
messages = []
if dupSet in setNames:
messages.append((ERROR, f"""Set {dupSet} already exists"""))
else:
if setIsSrc:
dataFile = f"{annoPath}/entities.tsv"
if fileExists(dataFile):
messages.append((ERROR, f"""Set {dupSet} already exists"""))
else:
dirMake(annoPath)
self.saveEntitiesAs(dataFile)
setNames.add(dupSet)
setsData[dupSet] = setsData[setName]
self.setName = dupSet
self.setInfo()
else:
if not dirCopy(
f"{annoDir}/{setName}",
annoPath,
noclobber=True,
):
messages.append(
(ERROR, f"""Could not copy {setName} to {dupSet}""")
)
else:
setNames.add(dupSet)
setsData[dupSet] = setsData[setName]
self.setName = dupSet
self.setInfo()
return messages
def setDel(self, delSet):
"""Remove a named set.
If the removed set happens to be the current set, the current set changes
to the special set named `""`.
Parameters
----------
delSet: string
The name of the set to be removed.
It is not allowed to remove the special set named `""`.
Returns
-------
list
A list of messages. Every message is a tuple of kind and content.
"""
if not self.properlySetup:
return []
messages = []
(delSetRep, delSetRo, delSetSrc, delSetX) = self.setInfo(setName=delSet)
if delSetRo:
messages.append(
(ERROR, f"""Cannot remove set {delSetRep} because it is read-only""")
)
return messages
setNames = self.setNames
setsData = self.sets
annoDir = self.annoDir
annoPath = f"{annoDir}/{delSet}"
dirRemove(annoPath)
if dirExists(annoPath):
messages.append((ERROR, f"""Could not remove {delSetRep}"""))
else:
setNames.discard(delSet)
del setsData[delSet]
if self.setName == delSet:
self.setName = ""
self.setInfo()
return messages
def setMove(self, moveSet):
"""Renames a named set.
The current set changes to the renamed set.
It is not possible to rename the special set named `""`.
It is also forbidden to rename another set to the special set.
Parameters
----------
moveSet: string
The new name of the current set.
Returns
-------
list
A list of messages. Every message is a tuple of kind and content.
"""
if not self.properlySetup:
return []
messages = []
(moveSetRep, moveSetRo, moveSetSrc, moveSetX) = self.setInfo(setName=moveSet)
if moveSetRo:
messages.append((ERROR, f"""Cannot rename a set to ""{moveSetRep}"""))
return messages
setName = self.setName
setNameRep = self.setNameRep
setIsRo = self.setIsRo
if setIsRo:
messages.append((ERROR, f"""Cannot rename set ""{setNameRep}"""))
return messages
setNames = self.setNames
setsData = self.sets
annoDir = self.annoDir
annoPath = f"{annoDir}/{moveSet}"
if dirExists(annoPath):
messages.append((ERROR, f"""Set {moveSetRep} already exists"""))
else:
if not dirMove(f"{annoDir}/{setName}", annoPath):
messages.append(
(
ERROR,
f"""Could not rename {setNameRep} to {moveSetRep}""",
)
)
else:
setNames.add(moveSet)
setNames.discard(setName)
setsData[moveSet] = setsData[setName]
del setsData[setName]
self.setName = moveSet
self.setInfo()
return messages
Classes
class Sets (sets=None)
-
Methods to create, duplicate, rename and delete annotation sets.
Annotation sets have names, given by the user.
There is a special annotation set, whose name is the empty string, and whose content are the pre-existing entities, i.e. the entities that are present in the TF data as nodes and features.
Users can not name sets with names that start with a dot.
Annotation sets whose name start with a dot are generated by the system when a spreadsheets with entity triggers is processed. These sets are readonly, like the special annotation set, but they can be duplicated to ordinary sets. Those copies loose the relationship with the original spreadsheet.
There is always one current annotation set, whose data is loaded into memory.
Parameters
sets
:object
, optionalNone
- Entity sets to start with. If None, a fresh store of sets will be created by a parent class (Data).
Expand source code Browse git
class Sets(Data): def __init__(self, sets=None): """Methods to create, duplicate, rename and delete annotation sets. Annotation sets have names, given by the user. There is a special annotation set, whose name is the empty string, and whose content are the pre-existing entities, i.e. the entities that are present in the TF data as nodes and features. Users can not name sets with names that start with a dot. Annotation sets whose name start with a dot are generated by the system when a spreadsheets with entity triggers is processed. These sets are readonly, like the special annotation set, but they can be duplicated to ordinary sets. Those copies loose the relationship with the original spreadsheet. There is always one current annotation set, whose data is loaded into memory. Parameters ---------- sets: object, optional None Entity sets to start with. If None, a fresh store of sets will be created by a parent class (Data). """ Data.__init__(self, sets=sets) if not self.properlySetup: return browse = self.browse self.setName = "" """The current annotation set.""" self.setInfo() self.setNames = set() """The set of names of annotation sets that are present on the file system.""" self.readSets() if not browse: self.loadSetData() def setInfo(self, setName=None): """Give information about a set. Parameters ---------- setName: string, optional None The name of the set we want info about. If `None`, give info about the current set. Returns ------- tuple The info consists of: a representation of the name of the set; whether the set is readonly; whether the set corresponds to the baked-in entities of the dataset; whether the set corresponds to a spreadsheet. """ settings = self.settings entitySet = settings.entitySet inObject = False if setName is None: setName = self.setName inObject = True setIsRo = setName == "" or setName.startswith(".") setIsSrc = setName == "" setIsX = setIsRo and not setIsSrc setNameRep = ( f"{SET_ENT} {entitySet}" if setName == "" else ( f"{SET_SHEET} " f"{setName[1:]}" if setName.startswith(".") else f"{SET_MAIN} {setName}" ) ) if inObject: self.setNameRep = setNameRep self.setIsRo = setIsRo self.setIsSrc = setIsSrc self.setIsX = setIsX else: return (setNameRep, setIsRo, setIsSrc, setIsX) def readSets(self): """Read the list current annotation sets (again). Use this when you change annotation sets outside the NER browser, e.g. by working with annotations in a Jupyter Notebook. """ annoDir = self.annoDir self.setNames = set(dirContents(annoDir)[1]) def getSetData(self): """Deliver the current set. Returns ------- AttrDict A dictionary with the data of the set, with portions given by key. See `tf.ner.data.Data.fromSourceSet()` and `tf.ner.data.Data.processSet()` """ setsData = self.sets setName = self.setName setData = setsData.setdefault(setName, AttrDict()) return setData def setSet(self, newSetName): """Switch to a named annotation set. If the new set does not exist, it will be created. After the switch, the new set will be loaded into memory. Parameters ---------- newSetName: string The name of the new annotation set to switch to. """ if not self.properlySetup: return browse = self.browse if not browse: self.loadSetData() setNames = self.setNames setsData = self.sets setName = self.setName annoDir = self.annoDir newSetDir = f"{annoDir}/{newSetName}" (newSetNameRep, newSetRo, newSetSrc, newSetX) = self.setInfo(newSetName) if (not newSetSrc) and (newSetName not in setNames or not dirExists(newSetDir)): initTree(newSetDir, fresh=False) setNames.add(newSetName) if newSetName != setName: setName = newSetName self.setName = setName self.setInfo() self.loadSetData() if not browse: setNameRep = self.setNameRep entities = setsData[setName].entities nEntities = len(entities) plural = "" if nEntities == 1 else "s" self.console( f"Annotation set {setNameRep} " f"has {nEntities} annotation{plural}" ) def _addToSet(self, newEntities): """Add a bunch of entities to the current set. Only for sets that correspond to sheets. This is to create such a set, it is not meant to call this function manually in a Jupyter notebook. Parameters ---------- newSetName: string The name of the new annotation set to switch to. """ if not self.properlySetup: return setIsX = self.setIsX if not setIsX: return self._clearSetData() self.addEntities(newEntities, returns=False, _lowlevel=True) def resetSet(self): """Clear the current annotation set. The special set `""` cannot be reset, because it is read-only. """ if not self.properlySetup: return settings = self.settings setName = self.setName setIsRo = self.setIsRo entitySet = settings.entitySet if setIsRo: console(f"Resetting the {entitySet} has no effect") return browse = self.browse setsData = self.sets annoDir = self.annoDir setDir = f"{annoDir}/{setName}" initTree(setDir, fresh=True, gentle=True) self.loadSetData() if not browse: setNameRep = self.setNameRep entities = setsData[setName].entities nEntities = len(entities) plural = "" if nEntities == 1 else "s" self.console( f"Annotation set {setNameRep} has {nEntities} annotation{plural}" ) def setDup(self, dupSet): """Duplicates the current set to a set with a new name. !!! hint "The readonly sets can be duplicated" After duplication of a read-only set, the duplicate copy is modifiable. In this way you can make corrections to the set of pre-existing, tool-generated annotations. The current set changes to the result of the duplication. Parameters ---------- dupSet: string The name of new set that is the result of the duplication. Returns ------- list A list of messages. Every message is a tuple of kind and content. """ if not self.properlySetup: return [] setNames = self.setNames setsData = self.sets setName = self.setName setIsSrc = self.setIsSrc annoDir = self.annoDir annoPath = f"{annoDir}/{dupSet}" messages = [] if dupSet in setNames: messages.append((ERROR, f"""Set {dupSet} already exists""")) else: if setIsSrc: dataFile = f"{annoPath}/entities.tsv" if fileExists(dataFile): messages.append((ERROR, f"""Set {dupSet} already exists""")) else: dirMake(annoPath) self.saveEntitiesAs(dataFile) setNames.add(dupSet) setsData[dupSet] = setsData[setName] self.setName = dupSet self.setInfo() else: if not dirCopy( f"{annoDir}/{setName}", annoPath, noclobber=True, ): messages.append( (ERROR, f"""Could not copy {setName} to {dupSet}""") ) else: setNames.add(dupSet) setsData[dupSet] = setsData[setName] self.setName = dupSet self.setInfo() return messages def setDel(self, delSet): """Remove a named set. If the removed set happens to be the current set, the current set changes to the special set named `""`. Parameters ---------- delSet: string The name of the set to be removed. It is not allowed to remove the special set named `""`. Returns ------- list A list of messages. Every message is a tuple of kind and content. """ if not self.properlySetup: return [] messages = [] (delSetRep, delSetRo, delSetSrc, delSetX) = self.setInfo(setName=delSet) if delSetRo: messages.append( (ERROR, f"""Cannot remove set {delSetRep} because it is read-only""") ) return messages setNames = self.setNames setsData = self.sets annoDir = self.annoDir annoPath = f"{annoDir}/{delSet}" dirRemove(annoPath) if dirExists(annoPath): messages.append((ERROR, f"""Could not remove {delSetRep}""")) else: setNames.discard(delSet) del setsData[delSet] if self.setName == delSet: self.setName = "" self.setInfo() return messages def setMove(self, moveSet): """Renames a named set. The current set changes to the renamed set. It is not possible to rename the special set named `""`. It is also forbidden to rename another set to the special set. Parameters ---------- moveSet: string The new name of the current set. Returns ------- list A list of messages. Every message is a tuple of kind and content. """ if not self.properlySetup: return [] messages = [] (moveSetRep, moveSetRo, moveSetSrc, moveSetX) = self.setInfo(setName=moveSet) if moveSetRo: messages.append((ERROR, f"""Cannot rename a set to ""{moveSetRep}""")) return messages setName = self.setName setNameRep = self.setNameRep setIsRo = self.setIsRo if setIsRo: messages.append((ERROR, f"""Cannot rename set ""{setNameRep}""")) return messages setNames = self.setNames setsData = self.sets annoDir = self.annoDir annoPath = f"{annoDir}/{moveSet}" if dirExists(annoPath): messages.append((ERROR, f"""Set {moveSetRep} already exists""")) else: if not dirMove(f"{annoDir}/{setName}", annoPath): messages.append( ( ERROR, f"""Could not rename {setNameRep} to {moveSetRep}""", ) ) else: setNames.add(moveSet) setNames.discard(setName) setsData[moveSet] = setsData[setName] del setsData[setName] self.setName = moveSet self.setInfo() return messages
Ancestors
Subclasses
Instance variables
var setName
-
The current annotation set.
var setNames
-
The set of names of annotation sets that are present on the file system.
Methods
def getSetData(self)
-
Deliver the current set.
Returns
AttrDict
- A dictionary with the data of the set, with portions given by key.
See
Data.fromSourceSet()
andData.processSet()
def readSets(self)
-
Read the list current annotation sets (again).
Use this when you change annotation sets outside the NER browser, e.g. by working with annotations in a Jupyter Notebook.
def resetSet(self)
-
Clear the current annotation set.
The special set
""
cannot be reset, because it is read-only. def setDel(self, delSet)
-
Remove a named set.
If the removed set happens to be the current set, the current set changes to the special set named
""
.Parameters
delSet
:string
- The name of the set to be removed.
It is not allowed to remove the special set named
""
.
Returns
list
- A list of messages. Every message is a tuple of kind and content.
def setDup(self, dupSet)
-
Duplicates the current set to a set with a new name.
The readonly sets can be duplicated
After duplication of a read-only set, the duplicate copy is modifiable. In this way you can make corrections to the set of pre-existing, tool-generated annotations.
The current set changes to the result of the duplication.
Parameters
dupSet
:string
- The name of new set that is the result of the duplication.
Returns
list
- A list of messages. Every message is a tuple of kind and content.
def setInfo(self, setName=None)
-
Give information about a set.
Parameters
setName
:string
, optionalNone
- The name of the set we want info about. If
None
, give info about the current set.
Returns
tuple
- The info consists of: a representation of the name of the set; whether the set is readonly; whether the set corresponds to the baked-in entities of the dataset; whether the set corresponds to a spreadsheet.
def setMove(self, moveSet)
-
Renames a named set.
The current set changes to the renamed set. It is not possible to rename the special set named
""
. It is also forbidden to rename another set to the special set.Parameters
moveSet
:string
- The new name of the current set.
Returns
list
- A list of messages. Every message is a tuple of kind and content.
def setSet(self, newSetName)
-
Switch to a named annotation set.
If the new set does not exist, it will be created. After the switch, the new set will be loaded into memory.
Parameters
newSetName
:string
- The name of the new annotation set to switch to.
Inherited members
Data
:addEntities
addEntity
addEntityRich
bucketType
checkBuckets
checkFeature
console
consoleLine
delEntity
delEntityRich
featureDefault
filterContent
fromSourceSet
fvalFromNode
getAfter
getBucketNodes
getContext
getEid
getEntityNodes
getKind
getSeqFromNode
getSeqFromStr
getStr
getStrFromSeq
loadSetData
processSet
properlySetup
saveEntitiesAs
sectionHead
slotType
slotsFromNode
stringsFromTokens
textFromNode
textFromSlots
tokensFromNode