Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 88 additions & 5 deletions halligan/halligan/stages/stage1.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,65 @@
import re
import inspect
import time
import logging
from typing import Callable

import halligan.prompts as Prompts
from halligan.agents import Agent
from halligan.utils.layout import Frame
from halligan.utils.constants import Stage
from halligan.utils.logger import Trace

logger = logging.getLogger(__name__)


# Stage-specific exception types
class StageError(Exception):
pass

class NetworkError(StageError):
pass

class ToolInvokeError(StageError):
pass

class ScriptSyntaxError(StageError):
pass

class FatalStageError(StageError):
pass


def _is_network_error(exc: Exception) -> bool:
# heuristic: ConnectionError, TimeoutError or message
return isinstance(exc, (ConnectionError, TimeoutError)) or "network" in str(exc).lower()


def _run_with_retry(func: Callable, retries: int = 3, backoff: float = 0.5, retry_on=None):
retry_on = retry_on or (Exception,)
last_exc = None
for attempt in range(1, retries + 1):
try:
return func()
except Exception as e:
last_exc = e
# classify
if _is_network_error(e):
logger.warning(f"Network error on attempt {attempt}/{retries}: {e}")
else:
logger.warning(f"Error on attempt {attempt}/{retries}: {e}")

if attempt == retries:
break
time.sleep(backoff * attempt)

# determine proper exception type
if last_exc and _is_network_error(last_exc):
raise NetworkError(str(last_exc)) from last_exc
if isinstance(last_exc, SyntaxError):
raise ScriptSyntaxError(str(last_exc)) from last_exc
raise ToolInvokeError(str(last_exc)) from last_exc


stage = Stage.OBJECTIVE_IDENTIFICATION

Expand Down Expand Up @@ -55,14 +108,44 @@ def objective(description: str):
)
print(prompt)

# Request script from agent
# Request script from agent (with retries on network/tool errors)
images = [frame.image for frame in frames]
image_captions = [f"Frame {i}" for i in range(len(frames))]
response, _ = agent(prompt, images, image_captions)
script = get_script(response)

def request_and_extract():
resp = agent(prompt, images, image_captions)
# agent may return (response, metadata)
if isinstance(resp, tuple) or isinstance(resp, list):
resp_text = resp[0]
else:
resp_text = resp
script_text = get_script(resp_text)
if not script_text:
# ask agent to reformat or raise to trigger retry
raise ToolInvokeError("Agent returned no python script block")
return script_text

script = _run_with_retry(request_and_extract, retries=3, backoff=0.7)
print(script)

# Execute response script
exec(script, tools, {})
# Execute response script (parse first to give clear syntax errors)
def exec_script():
try:
# validate syntax first
import ast
ast.parse(script)
except SyntaxError as e:
# don't retry syntax errors
raise ScriptSyntaxError(str(e)) from e

try:
exec_globals = dict(tools)
exec(script, exec_globals, {})
except Exception as e:
# tool invocation error inside the executed script
raise ToolInvokeError(str(e)) from e
return True

_run_with_retry(exec_script, retries=2, backoff=0.5)
agent.reset()
return task_objective
85 changes: 77 additions & 8 deletions halligan/halligan/stages/stage2.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import re
import ast
from typing import List
import time
import logging
from typing import List, Callable
from textwrap import indent

import halligan.prompts as Prompts
Expand All @@ -10,6 +12,47 @@
from halligan.utils.layout import Frame, Element, get_observation
from halligan.utils.logger import Trace

logger = logging.getLogger(__name__)


# Stage-specific exceptions (kept local for stage files)
class StageError(Exception):
pass

class NetworkError(StageError):
pass

class ToolInvokeError(StageError):
pass

class ScriptSyntaxError(StageError):
pass

def _is_network_error(exc: Exception) -> bool:
return isinstance(exc, (ConnectionError, TimeoutError)) or "network" in str(exc).lower()


def _run_with_retry(func: Callable, retries: int = 3, backoff: float = 0.5):
last_exc = None
for attempt in range(1, retries + 1):
try:
return func()
except Exception as e:
last_exc = e
if _is_network_error(e):
logger.warning(f"Network error attempt {attempt}/{retries}: {e}")
else:
logger.warning(f"Error attempt {attempt}/{retries}: {e}")
if attempt == retries:
break
time.sleep(backoff * attempt)

if last_exc and _is_network_error(last_exc):
raise NetworkError(str(last_exc)) from last_exc
if isinstance(last_exc, SyntaxError):
raise ScriptSyntaxError(str(last_exc)) from last_exc
raise ToolInvokeError(str(last_exc)) from last_exc


stage = Stage.STRUCTURE_ABSTRACTION

Expand Down Expand Up @@ -66,13 +109,39 @@ def get_script(response: str) -> list[str]:
)
print(prompt)

# Request script from agent
response, _ = agent(prompt, images, image_captions)
script = get_script(response)
# Request script from agent with retries
def request_script():
resp = agent(prompt, images, image_captions)
if isinstance(resp, (tuple, list)):
resp_text = resp[0]
else:
resp_text = resp
code = get_script(resp_text)
if not code:
raise ToolInvokeError("Agent returned no python code block for structure_abstraction")
return code

script = _run_with_retry(request_script, retries=3, backoff=0.6)
print(script)

# Execute response script
env = {}
exec(script, toolkit.dependencies, env)
env["process"](frames)
# Execute response script safely
def exec_script():
try:
ast.parse(script)
except SyntaxError as e:
raise ScriptSyntaxError(str(e)) from e

env = {}
try:
exec(script, toolkit.dependencies, env)
except Exception as e:
raise ToolInvokeError(str(e)) from e

if "process" not in env or not callable(env["process"]):
raise ToolInvokeError("Agent script did not define a callable 'process(frames)'")

env["process"](frames)
return True

_run_with_retry(exec_script, retries=2, backoff=0.4)
agent.reset()
124 changes: 105 additions & 19 deletions halligan/halligan/stages/stage3.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import re
import ast
import time
import logging
from textwrap import indent
from typing import Callable

import halligan.prompts as Prompts
import halligan.utils.examples as Examples
Expand All @@ -12,6 +15,48 @@
from halligan.utils.vision_tools import vision_toolkits
from halligan.utils.layout import Frame, get_observation

logger = logging.getLogger(__name__)


# local stage exceptions
class StageError(Exception):
pass

class NetworkError(StageError):
pass

class ToolInvokeError(StageError):
pass

class ScriptSyntaxError(StageError):
pass


def _is_network_error(exc: Exception) -> bool:
return isinstance(exc, (ConnectionError, TimeoutError)) or "network" in str(exc).lower()


def _run_with_retry(func: Callable, retries: int = 3, backoff: float = 0.5):
last_exc = None
for attempt in range(1, retries + 1):
try:
return func()
except Exception as e:
last_exc = e
if _is_network_error(e):
logger.warning(f"Network error attempt {attempt}/{retries}: {e}")
else:
logger.warning(f"Error attempt {attempt}/{retries}: {e}")
if attempt == retries:
break
time.sleep(backoff * attempt)

if last_exc and _is_network_error(last_exc):
raise NetworkError(str(last_exc)) from last_exc
if isinstance(last_exc, SyntaxError):
raise ScriptSyntaxError(str(last_exc)) from last_exc
raise ToolInvokeError(str(last_exc)) from last_exc


stage = Stage.SOLUTION_COMPOSITION

Expand Down Expand Up @@ -78,26 +123,67 @@ def execute_script(script: str, dependencies: dict):
)
print(prompt)

# Request script from agent
# Request script from agent (retries on network errors)
def request_script():
resp = agent(prompt, images, image_captions)
if isinstance(resp, (tuple, list)):
resp_text = resp[0]
else:
resp_text = resp
script = get_script(resp_text)
if not script:
raise ToolInvokeError("Agent returned no python script block")
return script

try:
response, _ = agent(prompt, images, image_captions)
script = get_script(response)
script = _run_with_retry(request_script, retries=3, backoff=0.6)
print(script)
execute_script(script, dependencies)

except Exception as e:
feedback = e

for _ in range(3):
try:
print(feedback)
response, _ = agent(f"Your code has errors, please fix it.\n{feedback}")
script = get_script(response)
print(script)
execute_script(script, dependencies)
break

except Exception as e:
feedback = e
# try executing; if execution fails due to code errors, ask agent to fix
try:
# validate syntax
ast.parse(script)
except SyntaxError as e:
raise ScriptSyntaxError(str(e)) from e

try:
execute_script(script, dependencies)
except Exception as e:
feedback = e
# ask agent to fix up to 3 times
for attempt in range(1, 4):
logger.info(f"Asking agent to fix code (attempt {attempt}/3): {feedback}")
def request_fix():
resp = agent(f"Your code has errors, please fix it.\n{feedback}", images, image_captions)
if isinstance(resp, (tuple, list)):
return resp[0]
return resp

fixed_resp = _run_with_retry(request_fix, retries=2, backoff=0.5)
fixed_script = get_script(fixed_resp)
if not fixed_script:
feedback = ToolInvokeError("Agent did not return a python script when asked to fix code")
continue

try:
ast.parse(fixed_script)
except SyntaxError as e:
feedback = ScriptSyntaxError(str(e))
continue

try:
execute_script(fixed_script, dependencies)
script = fixed_script
break
except Exception as e:
feedback = e
continue
else:
# attempted fixes exhausted
raise ToolInvokeError(f"Agent failed to produce a working script: {feedback}") from feedback

agent.reset()
finally:
try:
agent.reset()
except Exception:
logger.debug("Failed to reset agent after solution_composition")
Loading