From 909e0439c2885f45210acf7c33de2f66b4859190 Mon Sep 17 00:00:00 2001 From: Victor Skvortsov Date: Mon, 26 Jan 2026 14:15:38 +0500 Subject: [PATCH] Set JobTerminationReason.INSTANCE_UNREACHABLE for unreachable on-demand instances --- .../server/background/tasks/process_running_jobs.py | 8 ++++++-- .../server/background/tasks/test_process_running_jobs.py | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/dstack/_internal/server/background/tasks/process_running_jobs.py b/src/dstack/_internal/server/background/tasks/process_running_jobs.py index 9de0fffcc..bcb35a089 100644 --- a/src/dstack/_internal/server/background/tasks/process_running_jobs.py +++ b/src/dstack/_internal/server/background/tasks/process_running_jobs.py @@ -370,8 +370,12 @@ async def _process_running_job(session: AsyncSession, job_model: JobModel): # No job_model.termination_reason set means ssh connection failed _set_disconnected_at_now(session, job_model) if _should_terminate_job_due_to_disconnect(job_model): - # TODO: Replace with JobTerminationReason.INSTANCE_UNREACHABLE for on-demand. - job_model.termination_reason = JobTerminationReason.INTERRUPTED_BY_NO_CAPACITY + if job_provisioning_data.instance_type.resources.spot: + job_model.termination_reason = ( + JobTerminationReason.INTERRUPTED_BY_NO_CAPACITY + ) + else: + job_model.termination_reason = JobTerminationReason.INSTANCE_UNREACHABLE job_model.termination_reason_message = "Instance is unreachable" switch_job_status(session, job_model, JobStatus.TERMINATING) else: diff --git a/src/tests/_internal/server/background/tasks/test_process_running_jobs.py b/src/tests/_internal/server/background/tasks/test_process_running_jobs.py index 601fbe1ee..9e318866c 100644 --- a/src/tests/_internal/server/background/tasks/test_process_running_jobs.py +++ b/src/tests/_internal/server/background/tasks/test_process_running_jobs.py @@ -532,7 +532,7 @@ async def test_pulling_shim_failed(self, test_db, session: AsyncSession): assert SSHTunnelMock.call_count == 3 await session.refresh(job) assert job.status == JobStatus.TERMINATING - assert job.termination_reason == JobTerminationReason.INTERRUPTED_BY_NO_CAPACITY + assert job.termination_reason == JobTerminationReason.INSTANCE_UNREACHABLE assert job.remove_at is None @pytest.mark.asyncio