"""
Manage data model schema files.
Functions to manage data model schema files and objects according to the requirements of the data reader tool.
"""
from __future__ import annotations
from pathlib import Path
from typing import Any, TypedDict, get_args
from cdm_reader_mapper.common.json_dict import collect_json_files, combine_dicts
from .. import properties
[docs]
class SectionDict(TypedDict, total=False):
"""
Schema definition for a single section within a report.
Attributes
----------
header : dict, optional
Metadata or configuration for the section header.
elements : dict, optional
Dictionary of elements/fields contained within the section.
"""
header: dict[str, Any]
elements: dict[str, Any]
[docs]
class SchemaDict(TypedDict, total=False):
"""
Complete schema definition for a report.
Attributes
----------
header : SchemaHeaderDict, optional
Configuration for the report header.
sections : dict[str, SectionDict], optional
Mapping of section names to section schemas.
elements : dict, optional
Mapping of element names to their attributes.
name : list[Path], optional
List of Path objects representing schema files or sources.
imodel : str | None, optional
Name of the internal data model, if applicable.
"""
header: SchemaHeaderDict
sections: dict[str, SectionDict]
elements: dict[str, Any]
name: list[Path]
imodel: str | None
def _resolve_schema_files(
*,
imodel: str | None = None,
ext_schema_path: str | Path | None = None,
ext_schema_file: str | Path | None = None,
) -> list[Path]:
"""
Determine which schema file(s) to use based on the input parameters.
Parameters
----------
imodel : str, optional
Internal model identifier used to infer schema file locations.
May include underscore-separated components (e.g., model_version).
ext_schema_path : str or Path, optional
Path to an external schema directory. A JSON schema file with the same
name as the directory is expected inside it.
ext_schema_file : str or Path, optional
Direct path to a schema file.
Returns
-------
list of Path-like
List containing resolved schema file path(s).
Raises
------
FileNotFoundError
If a specified schema file or inferred schema path does not exist.
ValueError
If no valid input option is provided or the model is unsupported.
"""
if ext_schema_file:
path = Path(ext_schema_file)
if not path.is_file():
raise FileNotFoundError(f"Can't find input schema file {ext_schema_file}")
return [path]
if ext_schema_path:
schema_path = Path(ext_schema_path).resolve()
path = schema_path / f"{schema_path.name}.json"
# print(path)
# exit()
if not path.is_file():
raise FileNotFoundError(f"Can't find input schema path {ext_schema_path}")
return [path]
if imodel:
parts = imodel.split("_")
model = parts[0]
if model not in get_args(properties.SupportedDataModels):
raise ValueError(f"Input data model {model} not supported")
return collect_json_files(*parts, base=f"{properties._base}.schemas")
raise ValueError("One of 'imodel', 'ext_schema_path', or 'ext_schema_file' must be set")
def _normalize_schema(schema: SchemaDict) -> SchemaDict:
"""
Normalise a schema dictionary by ensuring it has sections and a parsing order.
Parameters
----------
schema : SchemaDict
Raw schema coming from the file parser.
Returns
-------
SchemaDict
Normalised schema - a plain dictionary that still fulfils the
expected schema structure.
"""
new_schema: SchemaDict = {
"header": schema.get("header", {}),
"sections": schema.get("sections", {}),
"elements": schema.get("elements", {}),
"name": schema.get("name", []),
"imodel": schema.get("imodel"),
}
header = schema.get("header", {})
sections = schema.get("sections")
elements = schema.get("elements")
if not sections:
if not elements:
raise KeyError("Schema has no sections and no elements")
level = properties.dummy_level
dummy_header: dict[str, Any] = {}
if "delimiter" in header:
dummy_header["delimiter"] = header["delimiter"]
if "field_layout" in header:
dummy_header["field_layout"] = header["field_layout"]
if "format" in header:
dummy_header["format"] = header["format"]
sections = {level: {"header": dummy_header, "elements": elements}}
new_schema.pop("elements", None)
header = {
**header,
"parsing_order": header.get("parsing_order") or [{"s": list(sections.keys())}],
}
new_schema["header"] = header
new_schema["sections"] = sections
return new_schema
[docs]
def read_schema(
imodel: str | None = None,
ext_schema_path: str | Path | None = None,
ext_schema_file: str | Path | None = None,
) -> SchemaDict:
"""
Load and normalize a data model schema.
Reads a data model schema file into a dictionary and
normalizes it by adding the information required by
the parser.
Parameters
----------
imodel : str, optional
Name of internally available input data model, e.g. icoads_r300_d704.
ext_schema_path : str or Path, optional
The path to the external input data model schema file.
The schema file must have the same name as the directory.
One of `imodel` and `ext_schema_path` or `ext_schema_file` must be set.
ext_schema_file : str or Path, optional
The external input data model schema file.
One of `imodel` and `ext_schema_path` or `ext_schema_file` must be set.
Returns
-------
SchemaDict
Data model schema.
"""
schema_files: list[Any] = _resolve_schema_files(
imodel=imodel,
ext_schema_path=ext_schema_path,
ext_schema_file=ext_schema_file,
)
raw_schema = combine_dicts(schema_files, base=f"{properties._base}.schemas")
enriched: SchemaDict = {}
if "header" in raw_schema:
enriched["header"] = raw_schema["header"]
if "sections" in raw_schema:
enriched["sections"] = raw_schema["sections"]
if "elements" in raw_schema:
enriched["elements"] = raw_schema["elements"]
if "imodel" in raw_schema:
enriched["imodel"] = raw_schema["imodel"]
enriched["name"] = schema_files
return _normalize_schema(enriched)