-
Notifications
You must be signed in to change notification settings - Fork 1
Open
Labels
type:featureNew capability or enhancementNew capability or enhancement
Description
Description:
@tg359 has made a method that can be used to convert a list of values into a has in python:
import hashlib
import json
from typing import Any, Union
from ladybug.epw import EPW
# OLD EXT COMFORT METHOD
def _create_hash(
epw_file: Path,
ground_material: Union[EnergyMaterial, EnergyMaterialVegetation],
shade_material: Union[EnergyMaterial, EnergyMaterialVegetation],
) -> str:
"""Create unique hash for this configuration."""
content = (
f"{epw_file.name}_{ground_material.identifier}_{shade_material.identifier}"
)
return hashlib.shake_256(
content.encode(
"utf-8"
)
).hexdigest(10)
# GENERIC METHOD + SUPPORTING METHODS
def deep_sort_dict(obj: dict[Any, Any], sort_lists: bool = True) -> Any:
"""Recursively sort nested dictionaries and lists for consistent ordering."""
if isinstance(obj, dict):
return {
k: deep_sort_dict(v, sort_lists=sort_lists) for k, v in sorted(obj.items())
}
elif sort_lists and isinstance(obj, (list, tuple)):
processed = [deep_sort_dict(x, sort_lists=sort_lists) for x in obj]
if isinstance(obj, tuple):
return tuple(
sorted(
processed,
key=lambda x: json.dumps(x, sort_keys=True, cls=AllPowerfulEncoder),
)
)
else:
return sorted(
processed,
key=lambda x: json.dumps(x, sort_keys=True, cls=AllPowerfulEncoder),
)
else:
return obj
def deterministic_hash(*values: Any, length: int = 16, algorithm: str = "md5", sort_lists: bool = True) -> int:
"""Create a deterministic hash from one or more values.
This function produces consistent hash values across Python sessions,
unlike the built-in hash() function which uses random salting.
Args:
*values (Any):
One or more values to hash. Will be converted to strings and
concatenated before hashing.
length (int, optional):
Number of hexadecimal characters to use from the hash.
Defaults to 16 (64 bits).
algorithm (str, optional):
Hash algorithm to use. Must be supported by hashlib.
Common options: 'md5', 'sha1', 'sha256', 'sha512'.
Defaults to 'md5'.
sort_lists (bool, optional):
Whether to sort lists and dictionaries recursively before hashing,
to ensure consistent ordering. Defaults to True.
Returns:
int:
An integer hash value derived from the input values.
Raises:
ValueError:
If the algorithm is not supported by hashlib.
Example:
>>> deterministic_hash("hello", "world", 123)
123456789012345678
>>> deterministic_hash("hello", "world", 123) # same result in new session
123456789012345678
>>> deterministic_hash("test", length=8)
12345678
"""
# validate algorithm
if algorithm not in hashlib.algorithms_available:
raise ValueError(
f"Hash algorithm '{algorithm}' not supported. "
f"Available: {sorted(hashlib.algorithms_available)}"
)
# canonicalize each value
canonicalized = []
for v in values:
if isinstance(v, (dict, list)):
v = deep_sort_dict(v, sort_lists=sort_lists)
v = json.dumps(v, sort_keys=True, cls=AllPowerfulEncoder)
canonicalized.append(str(v))
concatenated = "".join(canonicalized)
hasher = hashlib.new(algorithm)
hasher.update(concatenated.encode("utf-8"))
hex_digest = hasher.hexdigest()[:length]
return int(hex_digest, 16)This would be most useful in toolkits like LadybugTools_Toolkit where ExternalComfort simulations require identifiers to be under 100 characters, but will be placed in this toolkit in case other tools require it.
Metadata
Metadata
Assignees
Labels
type:featureNew capability or enhancementNew capability or enhancement