Fix high latency from thundering heard at 0 microseconds (#82233)

* Fix high latency at 0 microseconds

fixes #82231

* fix async_track_utc_time_change alignment

* use replace to preserve fold

* naming

* tweak

* make async_fire_time_changed aware of the thundering heard issue
This commit is contained in:
J. Nick Koston 2022-11-17 13:22:06 -06:00 committed by GitHub
parent 0c887eab87
commit d0efdd750f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 32 additions and 3 deletions

View file

@ -8,6 +8,7 @@ from dataclasses import dataclass
from datetime import datetime, timedelta
import functools as ft
import logging
from random import randint
import time
from typing import Any, Union, cast
@ -60,6 +61,9 @@ _ENTITIES_LISTENER = "entities"
_LOGGER = logging.getLogger(__name__)
RANDOM_MICROSECOND_MIN = 50000
RANDOM_MICROSECOND_MAX = 500000
_P = ParamSpec("_P")
@ -1506,13 +1510,17 @@ def async_track_utc_time_change(
matching_seconds = dt_util.parse_time_expression(second, 0, 59)
matching_minutes = dt_util.parse_time_expression(minute, 0, 59)
matching_hours = dt_util.parse_time_expression(hour, 0, 23)
# Avoid aligning all time trackers to the same second
# since it can create a thundering herd problem
# https://github.com/home-assistant/core/issues/82231
microsecond = randint(RANDOM_MICROSECOND_MIN, RANDOM_MICROSECOND_MAX)
def calculate_next(now: datetime) -> datetime:
"""Calculate and set the next time the trigger should fire."""
localized_now = dt_util.as_local(now) if local else now
return dt_util.find_next_time_expression_time(
localized_now, matching_seconds, matching_minutes, matching_hours
)
).replace(microsecond=microsecond)
time_listener: CALLBACK_TYPE | None = None

View file

@ -5,6 +5,7 @@ import asyncio
from collections.abc import Awaitable, Callable, Coroutine, Generator
from datetime import datetime, timedelta
import logging
from random import randint
from time import monotonic
from typing import Any, Generic, TypeVar
import urllib.error
@ -61,6 +62,12 @@ class DataUpdateCoordinator(Generic[_T]):
# when it was already checked during setup.
self.data: _T = None # type: ignore[assignment]
# Pick a random microsecond to stagger the refreshes
# and avoid a thundering herd.
self._microsecond = randint(
event.RANDOM_MICROSECOND_MIN, event.RANDOM_MICROSECOND_MAX
)
self._listeners: dict[CALLBACK_TYPE, tuple[CALLBACK_TYPE, object | None]] = {}
self._job = HassJob(self._handle_refresh_interval)
self._unsub_refresh: CALLBACK_TYPE | None = None
@ -138,11 +145,17 @@ class DataUpdateCoordinator(Generic[_T]):
# We _floor_ utcnow to create a schedule on a rounded second,
# minimizing the time between the point and the real activation.
# That way we obtain a constant update frequency,
# as long as the update process takes less than a second
# as long as the update process takes less than 500ms
#
# We do not align everything to happen at microsecond 0
# since it increases the risk of a thundering herd
# when multiple coordinators are scheduled to update at the same time.
#
# https://github.com/home-assistant/core/issues/82231
self._unsub_refresh = event.async_track_point_in_utc_time(
self.hass,
self._job,
utcnow().replace(microsecond=0) + self.update_interval,
utcnow().replace(microsecond=self._microsecond) + self.update_interval,
)
async def _handle_refresh_interval(self, _now: datetime) -> None:

View file

@ -388,6 +388,14 @@ def async_fire_time_changed(
utc_datetime = date_util.utcnow()
else:
utc_datetime = date_util.as_utc(datetime_)
if utc_datetime.microsecond < 500000:
# Allow up to 500000 microseconds to be added to the time
# to handle update_coordinator's and
# async_track_time_interval's
# staggering to avoid thundering herd.
utc_datetime = utc_datetime.replace(microsecond=500000)
timestamp = date_util.utc_to_timestamp(utc_datetime)
for task in list(hass.loop._scheduled):