Skip to content

Added second-based scheduler intervals. #450

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion taskiq/api/scheduler.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
from datetime import timedelta
from typing import Optional

from taskiq.cli.scheduler.run import run_scheduler_loop
from taskiq.scheduler.scheduler import TaskiqScheduler


async def run_scheduler_task(
scheduler: TaskiqScheduler,
run_startup: bool = False,
interval: Optional[timedelta] = None,
) -> None:
"""
Run scheduler task.
Expand All @@ -20,4 +24,4 @@ async def run_scheduler_task(
if run_startup:
await scheduler.startup()
while True:
await run_scheduler_loop(scheduler)
await run_scheduler_loop(scheduler, interval)
10 changes: 10 additions & 0 deletions taskiq/cli/scheduler/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class SchedulerArgs:
fs_discover: bool = False
tasks_pattern: Sequence[str] = ("**/tasks.py",)
skip_first_run: bool = False
update_interval: Optional[int] = None

@classmethod
def from_cli(cls, args: Optional[Sequence[str]] = None) -> "SchedulerArgs":
Expand Down Expand Up @@ -80,6 +81,15 @@ def from_cli(cls, args: Optional[Sequence[str]] = None) -> "SchedulerArgs":
"This option skips running tasks immediately after scheduler start."
),
)
parser.add_argument(
"--update-interval",
type=int,
default=None,
help=(
"Interval in seconds to check for new tasks. "
"If not specified, scheduler will run once a minute."
),
)

namespace = parser.parse_args(args)
# If there are any patterns specified, remove default.
Expand Down
91 changes: 67 additions & 24 deletions taskiq/cli/scheduler/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import sys
from datetime import datetime, timedelta
from logging import basicConfig, getLevelName, getLogger
from typing import Dict, List, Optional
from typing import Any, Dict, List, Optional, Set, Tuple

import pytz
from pycron import is_now
Expand Down Expand Up @@ -55,7 +55,7 @@ async def get_schedules(source: ScheduleSource) -> List[ScheduledTask]:

async def get_all_schedules(
scheduler: TaskiqScheduler,
) -> Dict[ScheduleSource, List[ScheduledTask]]:
) -> List[Tuple[ScheduleSource, List[ScheduledTask]]]:
"""
Task to update all schedules.

Expand All @@ -71,7 +71,7 @@ async def get_all_schedules(
schedules = await asyncio.gather(
*[get_schedules(source) for source in scheduler.sources],
)
return dict(zip(scheduler.sources, schedules))
return list(zip(scheduler.sources, schedules))


def get_task_delay(task: ScheduledTask) -> Optional[int]:
Expand All @@ -98,12 +98,10 @@ def get_task_delay(task: ScheduledTask) -> Optional[int]:
task_time = to_tz_aware(task.time)
if task_time <= now:
return 0
one_min_ahead = (now + timedelta(minutes=1)).replace(second=1, microsecond=0)
if task_time <= one_min_ahead:
delay = task_time - now
if delay.microseconds:
return int(delay.total_seconds()) + 1
return int(delay.total_seconds())
delay = task_time - now
if delay.microseconds:
return int(delay.total_seconds()) + 1
return int(delay.total_seconds())
return None


Expand Down Expand Up @@ -145,21 +143,41 @@ async def delayed_send(
await scheduler.on_ready(source, task)


async def run_scheduler_loop(scheduler: TaskiqScheduler) -> None:
async def run_scheduler_loop( # noqa: C901
scheduler: TaskiqScheduler,
interval: Optional[timedelta] = None,
) -> None:
"""
Runs scheduler loop.

This function imports taskiq scheduler
and runs tasks when needed.

:param scheduler: current scheduler.
:param interval: interval to check for schedule updates.
"""
loop = asyncio.get_event_loop()
running_schedules = set()
running_schedules: Dict[str, asyncio.Task[Any]] = {}
ran_cron_jobs: Set[str] = set()
current_minute = datetime.now(tz=pytz.UTC).minute
while True:
# We use this method to correctly sleep for one minute.
now = datetime.now(tz=pytz.UTC)
# If minute changed, we need to clear
# ran_cron_jobs set and update current minute.
if now.minute != current_minute:
current_minute = now.minute
ran_cron_jobs.clear()
# If interval is not None, we need to
# calculate next run time using it.
if interval is not None:
next_run = now + interval
# otherwise we need assume that
# we will run it at the start of the next minute.
# as crontab does.
else:
next_run = (now + timedelta(minutes=1)).replace(second=1, microsecond=0)
scheduled_tasks = await get_all_schedules(scheduler)
for source, task_list in scheduled_tasks.items():
for source, task_list in scheduled_tasks:
logger.debug("Got %d schedules from source %s.", len(task_list), source)
for task in task_list:
try:
Expand All @@ -172,16 +190,37 @@ async def run_scheduler_loop(scheduler: TaskiqScheduler) -> None:
task.schedule_id,
)
continue
if task_delay is not None:
send_task = loop.create_task(
delayed_send(scheduler, source, task, task_delay),
)
running_schedules.add(send_task)
send_task.add_done_callback(running_schedules.discard)
next_minute = datetime.now().replace(second=0, microsecond=0) + timedelta(
minutes=1,
)
delay = next_minute - datetime.now()
# If task delay is None, we don't need to run it.
if task_delay is None:
continue
# If task is delayed for more than next_run,
# we don't need to run it, because we will
# run it in the next iteration.
if now + timedelta(seconds=task_delay) >= next_run:
continue
# If task is already running, we don't need to run it again.
if task.schedule_id in running_schedules and task_delay < 1:
continue
# If task is cron job, we need to check if
# we already ran it this minute.
if task.cron is not None:
if task.schedule_id in ran_cron_jobs:
continue
ran_cron_jobs.add(task.schedule_id)
send_task = loop.create_task(
delayed_send(scheduler, source, task, task_delay),
# We need to set the name of the task
# to be able to discard its reference
# after it is done.
name=f"schedule_{task.schedule_id}",
)
running_schedules[task.schedule_id] = send_task
send_task.add_done_callback(
lambda task_future: running_schedules.pop(
task_future.get_name().removeprefix("schedule_"),
),
)
delay = next_run - datetime.now(tz=pytz.UTC)
logger.debug(
"Sleeping for %.2f seconds before getting schedules.",
delay.total_seconds(),
Expand Down Expand Up @@ -226,6 +265,10 @@ async def run_scheduler(args: SchedulerArgs) -> None:
for source in scheduler.sources:
await source.startup()

interval = None
if args.update_interval:
interval = timedelta(seconds=args.update_interval)

logger.info("Starting scheduler.")
await scheduler.startup()
logger.info("Startup completed.")
Expand All @@ -239,7 +282,7 @@ async def run_scheduler(args: SchedulerArgs) -> None:
await asyncio.sleep(delay.total_seconds())
logger.info("First run skipped. The scheduler is now running.")
try:
await run_scheduler_loop(scheduler)
await run_scheduler_loop(scheduler, interval)
except asyncio.CancelledError:
logger.warning("Shutting down scheduler.")
await scheduler.shutdown()
Expand Down
84 changes: 46 additions & 38 deletions taskiq/schedule_sources/label_based.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import uuid
from logging import getLogger
from typing import List
from typing import Dict, List

from taskiq.abc.broker import AsyncBroker
from taskiq.abc.schedule_source import ScheduleSource
Expand All @@ -13,20 +14,26 @@ class LabelScheduleSource(ScheduleSource):

def __init__(self, broker: AsyncBroker) -> None:
self.broker = broker
self.schedules: Dict[str, ScheduledTask] = {}

async def get_schedules(self) -> List["ScheduledTask"]:
async def startup(self) -> None:
"""
Collect schedules for all tasks.

this function checks labels for all
tasks available to the broker.
Startup the schedule source.

This function iterates over all tasks
available to the broker and collects
schedules from their labels.
If task has a schedule label,
it will be parsed and returned.
it will be parsed and added to the
scheduler list.

:return: list of schedules.
Every time schedule is added, the random
schedule id is generated. Please be aware that
they are different for every startup.

:return: None
"""
schedules = []
self.schedules.clear()
for task_name, task in self.broker.get_all_tasks().items():
if task.broker != self.broker:
# if task broker doesn't match self, something is probably wrong
Expand All @@ -40,20 +47,36 @@ async def get_schedules(self) -> List["ScheduledTask"]:
continue
labels = schedule.get("labels", {})
labels.update(task.labels)
schedules.append(
ScheduledTask(
task_name=task_name,
labels=labels,
args=schedule.get("args", []),
kwargs=schedule.get("kwargs", {}),
cron=schedule.get("cron"),
time=schedule.get("time"),
cron_offset=schedule.get("cron_offset"),
),
schedule_id = uuid.uuid4().hex

self.schedules[schedule_id] = ScheduledTask(
task_name=task_name,
labels=labels,
schedule_id=schedule_id,
args=schedule.get("args", []),
kwargs=schedule.get("kwargs", {}),
cron=schedule.get("cron"),
time=schedule.get("time"),
cron_offset=schedule.get("cron_offset"),
)
return schedules

def post_send(self, scheduled_task: ScheduledTask) -> None:
return await super().startup()

async def get_schedules(self) -> List["ScheduledTask"]:
"""
Collect schedules for all tasks.

this function checks labels for all
tasks available to the broker.

If task has a schedule label,
it will be parsed and returned.

:return: list of schedules.
"""
return list(self.schedules.values())

def post_send(self, task: "ScheduledTask") -> None:
"""
Remove `time` schedule from task's scheduler list.

Expand All @@ -62,22 +85,7 @@ def post_send(self, scheduled_task: ScheduledTask) -> None:

:param scheduled_task: task that just have sent
"""
if scheduled_task.cron or not scheduled_task.time:
if task.cron or not task.time:
return # it's scheduled task with cron label, do not remove this trigger.

for task_name, task in self.broker.get_all_tasks().items():
if task.broker != self.broker:
# if task broker doesn't match self, something is probably wrong
logger.warning(
f"Broker for {task_name} `{task.broker}` doesn't "
f"match scheduler's broker `{self.broker}`",
)
continue
if scheduled_task.task_name != task_name:
continue

schedule_list = task.labels.get("schedule", []).copy()
for idx, schedule in enumerate(schedule_list):
if schedule.get("time") == scheduled_task.time:
task.labels.get("schedule", []).pop(idx)
return
self.schedules.pop(task.schedule_id, None)
14 changes: 11 additions & 3 deletions tests/cli/scheduler/test_task_delays.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


def test_should_run_success() -> None:
hour = datetime.datetime.utcnow().hour
hour = datetime.datetime.now(datetime.timezone.utc).hour
delay = get_task_delay(
ScheduledTask(
task_name="",
Expand Down Expand Up @@ -97,18 +97,26 @@ def test_time_utc_with_local_zone() -> None:
assert delay is not None and delay >= 0


@freeze_time("2023-01-14 12:00:00")
def test_time_localtime_without_zone() -> None:
time = datetime.datetime.now(tz=pytz.FixedOffset(240)).replace(tzinfo=None)
time_to_run = time - datetime.timedelta(seconds=1)

delay = get_task_delay(
ScheduledTask(
task_name="",
labels={},
args=[],
kwargs={},
time=time - datetime.timedelta(seconds=1),
time=time_to_run,
),
)
assert delay is None

expected_delay = time_to_run.replace(tzinfo=pytz.UTC) - datetime.datetime.now(
pytz.UTC,
)

assert delay == int(expected_delay.total_seconds())


@freeze_time("2023-01-14 12:00:00")
Expand Down
16 changes: 8 additions & 8 deletions tests/cli/scheduler/test_updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,10 @@ async def test_get_schedules_success() -> None:
schedules = await get_all_schedules(
TaskiqScheduler(InMemoryBroker(), sources),
)
assert schedules == {
sources[0]: schedules1,
sources[1]: schedules2,
}
assert schedules == [
(sources[0], schedules1),
(sources[1], schedules2),
]


@pytest.mark.anyio
Expand All @@ -81,7 +81,7 @@ async def test_get_schedules_error() -> None:
schedules = await get_all_schedules(
TaskiqScheduler(InMemoryBroker(), [source1, source2]),
)
assert schedules == {
source1: source1.schedules,
source2: [],
}
assert schedules == [
(source1, source1.schedules),
(source2, []),
]
Loading
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy