Utilities module for Label Studio SDK
This section contains utility operations that you can perform with the SDK. See the client, project or data manager modules for other operations you might want to perform.
source code Browse git
""" .. include::../docs/utils.md
"""
import logging
from lxml import etree
from collections import defaultdict
logger = logging.getLogger(__name__)
_LABEL_TAGS = {"Label", "Choice"}
_NOT_CONTROL_TAGS = {
"Filter",
}
def parse_config(config_string):
"""Parse a given Label Studio labeling configuration and return a structured version of the configuration.
Useful for formatting results for predicted annotations and determining the type(s) of ML models that might
be relevant to the labeling project.
Parameters
----------
config_string: str
Label configuration XML as a string
Returns
-------
dict
structured config with the form:
```json
{
"<ControlTag>.name": {
"type": "ControlTag",
"to_name": ["<ObjectTag1>.name", "<ObjectTag2>.name"],
"inputs: [
{"type": "ObjectTag1", "value": "<ObjectTag1>.value"},
{"type": "ObjectTag2", "value": "<ObjectTag2>.value"}
],
"labels": ["Label1", "Label2", "Label3"] // taken from "alias" if it exists, else "value"
}
```
"""
"""
:param config_string:
"""
if not config_string:
return {}
def _is_input_tag(tag):
return tag.attrib.get("name") and tag.attrib.get("value")
def _is_output_tag(tag):
return (
tag.attrib.get("name")
and tag.attrib.get("toName")
and tag.tag not in _NOT_CONTROL_TAGS
)
def _get_parent_output_tag_name(tag, outputs):
# Find parental <Choices> tag for nested tags like <Choices><View><View><Choice>...
parent = tag
while True:
parent = parent.getparent()
if parent is None:
return
name = parent.attrib.get("name")
if name in outputs:
return name
xml_tree = etree.fromstring(config_string)
inputs, outputs, labels = {}, {}, defaultdict(dict)
for tag in xml_tree.iter():
if _is_output_tag(tag):
tag_info = {"type": tag.tag, "to_name": tag.attrib["toName"].split(",")}
# Grab conditionals if any
conditionals = {}
if tag.attrib.get("perRegion") == "true":
if tag.attrib.get("whenTagName"):
conditionals = {"type": "tag", "name": tag.attrib["whenTagName"]}
elif tag.attrib.get("whenLabelValue"):
conditionals = {
"type": "label",
"name": tag.attrib["whenLabelValue"],
}
elif tag.attrib.get("whenChoiceValue"):
conditionals = {
"type": "choice",
"name": tag.attrib["whenChoiceValue"],
}
if conditionals:
tag_info["conditionals"] = conditionals
outputs[tag.attrib["name"]] = tag_info
elif _is_input_tag(tag):
inputs[tag.attrib["name"]] = {
"type": tag.tag,
"value": tag.attrib["value"].lstrip("$"),
}
if tag.tag not in _LABEL_TAGS:
continue
parent_name = _get_parent_output_tag_name(tag, outputs)
if parent_name is not None:
actual_value = tag.attrib.get("alias") or tag.attrib.get("value")
if not actual_value:
logger.debug(
'Inspecting tag {tag_name}... found no "value" or "alias" attributes.'.format(
tag_name=etree.tostring(tag, encoding="unicode").strip()[:50]
)
)
else:
labels[parent_name][actual_value] = dict(tag.attrib)
for output_tag, tag_info in outputs.items():
tag_info["inputs"] = []
for input_tag_name in tag_info["to_name"]:
if input_tag_name not in inputs:
logger.warning(
f"to_name={input_tag_name} is specified for output tag name={output_tag}, "
"but we can't find it among input tags"
)
continue
tag_info["inputs"].append(inputs[input_tag_name])
tag_info["labels"] = list(labels[output_tag])
tag_info["labels_attrs"] = labels[output_tag]
return outputs
def chunk(lst, n):
"""Yield successive n-sized chunks from lst."""
for i in range(0, len(lst), n):
yield lst[i : i + n]
Functions
def chunk(lst, n)
-
Yield successive n-sized chunks from lst.
source code Browse git
def chunk(lst, n): """Yield successive n-sized chunks from lst.""" for i in range(0, len(lst), n): yield lst[i : i + n]
def parse_config(config_string)
-
Parse a given Label Studio labeling configuration and return a structured version of the configuration. Useful for formatting results for predicted annotations and determining the type(s) of ML models that might be relevant to the labeling project.
Parameters
config_string
:str
- Label configuration XML as a string
Returns
dict
- structured config with the form:
{ "<ControlTag>.name": { "type": "ControlTag", "to_name": ["<ObjectTag1>.name", "<ObjectTag2>.name"], "inputs: [ {"type": "ObjectTag1", "value": "<ObjectTag1>.value"}, {"type": "ObjectTag2", "value": "<ObjectTag2>.value"} ], "labels": ["Label1", "Label2", "Label3"] // taken from "alias" if it exists, else "value" }
source code Browse git
def parse_config(config_string): """Parse a given Label Studio labeling configuration and return a structured version of the configuration. Useful for formatting results for predicted annotations and determining the type(s) of ML models that might be relevant to the labeling project. Parameters ---------- config_string: str Label configuration XML as a string Returns ------- dict structured config with the form: ```json { "<ControlTag>.name": { "type": "ControlTag", "to_name": ["<ObjectTag1>.name", "<ObjectTag2>.name"], "inputs: [ {"type": "ObjectTag1", "value": "<ObjectTag1>.value"}, {"type": "ObjectTag2", "value": "<ObjectTag2>.value"} ], "labels": ["Label1", "Label2", "Label3"] // taken from "alias" if it exists, else "value" } ``` """ """ :param config_string: """ if not config_string: return {} def _is_input_tag(tag): return tag.attrib.get("name") and tag.attrib.get("value") def _is_output_tag(tag): return ( tag.attrib.get("name") and tag.attrib.get("toName") and tag.tag not in _NOT_CONTROL_TAGS ) def _get_parent_output_tag_name(tag, outputs): # Find parental <Choices> tag for nested tags like <Choices><View><View><Choice>... parent = tag while True: parent = parent.getparent() if parent is None: return name = parent.attrib.get("name") if name in outputs: return name xml_tree = etree.fromstring(config_string) inputs, outputs, labels = {}, {}, defaultdict(dict) for tag in xml_tree.iter(): if _is_output_tag(tag): tag_info = {"type": tag.tag, "to_name": tag.attrib["toName"].split(",")} # Grab conditionals if any conditionals = {} if tag.attrib.get("perRegion") == "true": if tag.attrib.get("whenTagName"): conditionals = {"type": "tag", "name": tag.attrib["whenTagName"]} elif tag.attrib.get("whenLabelValue"): conditionals = { "type": "label", "name": tag.attrib["whenLabelValue"], } elif tag.attrib.get("whenChoiceValue"): conditionals = { "type": "choice", "name": tag.attrib["whenChoiceValue"], } if conditionals: tag_info["conditionals"] = conditionals outputs[tag.attrib["name"]] = tag_info elif _is_input_tag(tag): inputs[tag.attrib["name"]] = { "type": tag.tag, "value": tag.attrib["value"].lstrip("$"), } if tag.tag not in _LABEL_TAGS: continue parent_name = _get_parent_output_tag_name(tag, outputs) if parent_name is not None: actual_value = tag.attrib.get("alias") or tag.attrib.get("value") if not actual_value: logger.debug( 'Inspecting tag {tag_name}... found no "value" or "alias" attributes.'.format( tag_name=etree.tostring(tag, encoding="unicode").strip()[:50] ) ) else: labels[parent_name][actual_value] = dict(tag.attrib) for output_tag, tag_info in outputs.items(): tag_info["inputs"] = [] for input_tag_name in tag_info["to_name"]: if input_tag_name not in inputs: logger.warning( f"to_name={input_tag_name} is specified for output tag name={output_tag}, " "but we can't find it among input tags" ) continue tag_info["inputs"].append(inputs[input_tag_name]) tag_info["labels"] = list(labels[output_tag]) tag_info["labels_attrs"] = labels[output_tag] return outputs