code-philia · finitesimal · Jan 12, 2026 · Jan 12, 2026 · Jan 12, 2026 · Jan 12, 2026
diff --git a/halligan/halligan/stages/stage1.py b/halligan/halligan/stages/stage1.py
@@ -1,12 +1,65 @@
 import re
 import inspect
+import time
+import logging
+from typing import Callable
 
 import halligan.prompts as Prompts
 from halligan.agents import Agent
 from halligan.utils.layout import Frame
 from halligan.utils.constants import Stage
 from halligan.utils.logger import Trace
 
+logger = logging.getLogger(__name__)
+
+
+# Stage-specific exception types
+class StageError(Exception):
+    pass
+
+class NetworkError(StageError):
+    pass
+
+class ToolInvokeError(StageError):
+    pass
+
+class ScriptSyntaxError(StageError):
+    pass
+
+class FatalStageError(StageError):
+    pass
+
+
+def _is_network_error(exc: Exception) -> bool:
+    # heuristic: ConnectionError, TimeoutError or message
+    return isinstance(exc, (ConnectionError, TimeoutError)) or "network" in str(exc).lower()
+
+
+def _run_with_retry(func: Callable, retries: int = 3, backoff: float = 0.5, retry_on=None):
+    retry_on = retry_on or (Exception,)
+    last_exc = None
+    for attempt in range(1, retries + 1):
+        try:
+            return func()
+        except Exception as e:
+            last_exc = e
+            # classify
+            if _is_network_error(e):
+                logger.warning(f"Network error on attempt {attempt}/{retries}: {e}")
+            else:
+                logger.warning(f"Error on attempt {attempt}/{retries}: {e}")
+
+            if attempt == retries:
+                break
+            time.sleep(backoff * attempt)
+
+    # determine proper exception type
+    if last_exc and _is_network_error(last_exc):
+        raise NetworkError(str(last_exc)) from last_exc
+    if isinstance(last_exc, SyntaxError):
+        raise ScriptSyntaxError(str(last_exc)) from last_exc
+    raise ToolInvokeError(str(last_exc)) from last_exc
+
 
 stage = Stage.OBJECTIVE_IDENTIFICATION
 
@@ -55,14 +108,44 @@ def objective(description: str):
     )
     print(prompt)
 
-    # Request script from agent
+    # Request script from agent (with retries on network/tool errors)
     images = [frame.image for frame in frames]
     image_captions = [f"Frame {i}" for i in range(len(frames))]
-    response, _ = agent(prompt, images, image_captions)
-    script = get_script(response)
+
+    def request_and_extract():
+        resp = agent(prompt, images, image_captions)
+        # agent may return (response, metadata)
+        if isinstance(resp, tuple) or isinstance(resp, list):
+            resp_text = resp[0]
+        else:
+            resp_text = resp
+        script_text = get_script(resp_text)
+        if not script_text:
+            # ask agent to reformat or raise to trigger retry
+            raise ToolInvokeError("Agent returned no python script block")
+        return script_text
+
+    script = _run_with_retry(request_and_extract, retries=3, backoff=0.7)
     print(script)
 
-    # Execute response script
-    exec(script, tools, {})
+    # Execute response script (parse first to give clear syntax errors)
+    def exec_script():
+        try:
+            # validate syntax first
+            import ast
+            ast.parse(script)
+        except SyntaxError as e:
+            # don't retry syntax errors
+            raise ScriptSyntaxError(str(e)) from e
+
+        try:
+            exec_globals = dict(tools)
+            exec(script, exec_globals, {})
+        except Exception as e:
+            # tool invocation error inside the executed script
+            raise ToolInvokeError(str(e)) from e
+        return True
+
+    _run_with_retry(exec_script, retries=2, backoff=0.5)
     agent.reset()
     return task_objective
diff --git a/halligan/halligan/stages/stage2.py b/halligan/halligan/stages/stage2.py
@@ -1,6 +1,8 @@
 import re
 import ast
-from typing import List
+import time
+import logging
+from typing import List, Callable
 from textwrap import indent
 
 import halligan.prompts as Prompts
@@ -10,6 +12,47 @@
 from halligan.utils.layout import Frame, Element, get_observation
 from halligan.utils.logger import Trace
 
+logger = logging.getLogger(__name__)
+
+
+# Stage-specific exceptions (kept local for stage files)
+class StageError(Exception):
+    pass
+
+class NetworkError(StageError):
+    pass
+
+class ToolInvokeError(StageError):
+    pass
+
+class ScriptSyntaxError(StageError):
+    pass
+
+def _is_network_error(exc: Exception) -> bool:
+    return isinstance(exc, (ConnectionError, TimeoutError)) or "network" in str(exc).lower()
+
+
+def _run_with_retry(func: Callable, retries: int = 3, backoff: float = 0.5):
+    last_exc = None
+    for attempt in range(1, retries + 1):
+        try:
+            return func()
+        except Exception as e:
+            last_exc = e
+            if _is_network_error(e):
+                logger.warning(f"Network error attempt {attempt}/{retries}: {e}")
+            else:
+                logger.warning(f"Error attempt {attempt}/{retries}: {e}")
+            if attempt == retries:
+                break
+            time.sleep(backoff * attempt)
+
+    if last_exc and _is_network_error(last_exc):
+        raise NetworkError(str(last_exc)) from last_exc
+    if isinstance(last_exc, SyntaxError):
+        raise ScriptSyntaxError(str(last_exc)) from last_exc
+    raise ToolInvokeError(str(last_exc)) from last_exc
+
 
 stage = Stage.STRUCTURE_ABSTRACTION
 
@@ -66,13 +109,39 @@ def get_script(response: str) -> list[str]:
     )
     print(prompt)
 
-    # Request script from agent
-    response, _ = agent(prompt, images, image_captions)
-    script = get_script(response)
+    # Request script from agent with retries
+    def request_script():
+        resp = agent(prompt, images, image_captions)
+        if isinstance(resp, (tuple, list)):
+            resp_text = resp[0]
+        else:
+            resp_text = resp
+        code = get_script(resp_text)
+        if not code:
+            raise ToolInvokeError("Agent returned no python code block for structure_abstraction")
+        return code
+
+    script = _run_with_retry(request_script, retries=3, backoff=0.6)
     print(script)
 
-    # Execute response script
-    env = {}
-    exec(script, toolkit.dependencies, env)
-    env["process"](frames)
+    # Execute response script safely
+    def exec_script():
+        try:
+            ast.parse(script)
+        except SyntaxError as e:
+            raise ScriptSyntaxError(str(e)) from e
+
+        env = {}
+        try:
+            exec(script, toolkit.dependencies, env)
+        except Exception as e:
+            raise ToolInvokeError(str(e)) from e
+
+        if "process" not in env or not callable(env["process"]):
+            raise ToolInvokeError("Agent script did not define a callable 'process(frames)'")
+
+        env["process"](frames)
+        return True
+
+    _run_with_retry(exec_script, retries=2, backoff=0.4)
     agent.reset()
diff --git a/halligan/halligan/stages/stage3.py b/halligan/halligan/stages/stage3.py
@@ -1,6 +1,9 @@
 import re
 import ast
+import time
+import logging
 from textwrap import indent
+from typing import Callable
 
 import halligan.prompts as Prompts
 import halligan.utils.examples as Examples
@@ -12,6 +15,48 @@
 from halligan.utils.vision_tools import vision_toolkits
 from halligan.utils.layout import Frame, get_observation
 
+logger = logging.getLogger(__name__)
+
+
+# local stage exceptions
+class StageError(Exception):
+    pass
+
+class NetworkError(StageError):
+    pass
+
+class ToolInvokeError(StageError):
+    pass
+
+class ScriptSyntaxError(StageError):
+    pass
+
+
+def _is_network_error(exc: Exception) -> bool:
+    return isinstance(exc, (ConnectionError, TimeoutError)) or "network" in str(exc).lower()
+
+
+def _run_with_retry(func: Callable, retries: int = 3, backoff: float = 0.5):
+    last_exc = None
+    for attempt in range(1, retries + 1):
+        try:
+            return func()
+        except Exception as e:
+            last_exc = e
+            if _is_network_error(e):
+                logger.warning(f"Network error attempt {attempt}/{retries}: {e}")
+            else:
+                logger.warning(f"Error attempt {attempt}/{retries}: {e}")
+            if attempt == retries:
+                break
+            time.sleep(backoff * attempt)
+
+    if last_exc and _is_network_error(last_exc):
+        raise NetworkError(str(last_exc)) from last_exc
+    if isinstance(last_exc, SyntaxError):
+        raise ScriptSyntaxError(str(last_exc)) from last_exc
+    raise ToolInvokeError(str(last_exc)) from last_exc
+
 
 stage = Stage.SOLUTION_COMPOSITION
 
@@ -78,26 +123,67 @@ def execute_script(script: str, dependencies: dict):
     )
     print(prompt)
 
-    # Request script from agent 
+    # Request script from agent (retries on network errors)
+    def request_script():
+        resp = agent(prompt, images, image_captions)
+        if isinstance(resp, (tuple, list)):
+            resp_text = resp[0]
+        else:
+            resp_text = resp
+        script = get_script(resp_text)
+        if not script:
+            raise ToolInvokeError("Agent returned no python script block")
+        return script
+
     try:
-        response, _ = agent(prompt, images, image_captions)
-        script = get_script(response)
+        script = _run_with_retry(request_script, retries=3, backoff=0.6)
         print(script)
-        execute_script(script, dependencies)
-
-    except Exception as e:
-        feedback = e
-
-        for _ in range(3):
-            try:
-                print(feedback)
-                response, _ = agent(f"Your code has errors, please fix it.\n{feedback}")
-                script = get_script(response)
-                print(script)
-                execute_script(script, dependencies)
-                break
 
-            except Exception as e:
-                feedback = e
+        # try executing; if execution fails due to code errors, ask agent to fix
+        try:
+            # validate syntax
+            ast.parse(script)
+        except SyntaxError as e:
+            raise ScriptSyntaxError(str(e)) from e
+
+        try:
+            execute_script(script, dependencies)
+        except Exception as e:
+            feedback = e
+            # ask agent to fix up to 3 times
+            for attempt in range(1, 4):
+                logger.info(f"Asking agent to fix code (attempt {attempt}/3): {feedback}")
+                def request_fix():
+                    resp = agent(f"Your code has errors, please fix it.\n{feedback}", images, image_captions)
+                    if isinstance(resp, (tuple, list)):
+                        return resp[0]
+                    return resp
+
+                fixed_resp = _run_with_retry(request_fix, retries=2, backoff=0.5)
+                fixed_script = get_script(fixed_resp)
+                if not fixed_script:
+                    feedback = ToolInvokeError("Agent did not return a python script when asked to fix code")
+                    continue
+
+                try:
+                    ast.parse(fixed_script)
+                except SyntaxError as e:
+                    feedback = ScriptSyntaxError(str(e))
+                    continue
+
+                try:
+                    execute_script(fixed_script, dependencies)
+                    script = fixed_script
+                    break
+                except Exception as e:
+                    feedback = e
+                    continue
+            else:
+                # attempted fixes exhausted
+                raise ToolInvokeError(f"Agent failed to produce a working script: {feedback}") from feedback
 
-    agent.reset()
+    finally:
+        try:
+            agent.reset()
+        except Exception:
+            logger.debug("Failed to reset agent after solution_composition")