Skip to main content

Overview

The Aden SDK provides real-time cost control through the control server. You can set budgets, throttle requests, degrade to cheaper models, and block requests when limits are exceeded.

Control Actions

ActionEffectUse Case
allowRequest proceeds normallyWithin budget
blockRequest rejected with errorBudget exhausted
throttleRequest delayedRate limiting
degradeSwitch to cheaper modelApproaching budget
alertProceed with notificationWarning threshold

Setup

Connect to the control server:
import os
from aden import instrument, MeterOptions

instrument(MeterOptions(
    api_key=os.environ["ADEN_API_KEY"],
    server_url=os.environ.get("ADEN_API_URL"),

    # Track usage per user for individual budgets
    get_context_id=lambda: get_current_user_id(),

    # Handle alerts
    on_alert=lambda alert: print(f"[{alert.level}] {alert.message}"),

    # What to do if control server is unreachable
    fail_open=True,  # Allow requests (default)
))

Budget Configuration

Set budgets via the control server API:

Per-User Budget

curl -X POST https://kube.acho.io/v1/control/policy/budgets \
  -H "Authorization: Bearer $ADEN_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "context_id": "user_123",
    "limit_usd": 10.00,
    "period": "monthly",
    "action_on_exceed": "block"
  }'

Global Budget

curl -X POST https://kube.acho.io/v1/control/policy/budgets \
  -H "Authorization: Bearer $ADEN_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "limit_usd": 1000.00,
    "period": "monthly",
    "action_on_exceed": "alert"
  }'

Model Degradation

Automatically switch to cheaper models when approaching budget:
curl -X POST https://kube.acho.io/v1/control/policy/degradations \
  -H "Authorization: Bearer $ADEN_API_KEY" \
  -H "Content-Type: application/json" \
  -d '{
    "from_model": "gpt-4o",
    "to_model": "gpt-4o-mini",
    "trigger": "budget_threshold",
    "threshold_percent": 80,
    "context_id": "user_123"
  }'

Local Cost Control

For testing or serverless environments, implement local control with before_request:
from aden import instrument, MeterOptions, BeforeRequestResult

# Track costs locally
user_costs: dict[str, float] = {}

def before_request(request) -> BeforeRequestResult:
    user_id = get_current_user_id()
    current_cost = user_costs.get(user_id, 0)

    # Block if over budget
    if current_cost >= 10.0:
        return BeforeRequestResult.cancel("Monthly budget exceeded")

    # Degrade expensive models when approaching budget
    if current_cost >= 8.0 and request.model == "gpt-4o":
        return BeforeRequestResult.degrade(
            to_model="gpt-4o-mini",
            reason="Approaching budget limit",
        )

    # Throttle during high-cost periods
    if current_cost >= 5.0:
        return BeforeRequestResult.throttle(delay_ms=1000)

    # Alert when halfway through budget
    if current_cost >= 5.0 and not has_alerted_user(user_id):
        return BeforeRequestResult.alert(
            message="50% of monthly budget used",
            level="warning",
        )

    return BeforeRequestResult.proceed()


instrument(MeterOptions(
    emit_metric=my_emitter,
    before_request=before_request,
))

BeforeRequestResult Types

from aden import BeforeRequestResult

# Allow the request
BeforeRequestResult.proceed()

# Delay the request
BeforeRequestResult.throttle(delay_ms=1000)

# Block the request
BeforeRequestResult.cancel(reason="Budget exceeded")

# Use a cheaper model
BeforeRequestResult.degrade(
    to_model="gpt-4o-mini",
    reason="Cost optimization",
)

# Send alert and proceed
BeforeRequestResult.alert(
    message="High usage detected",
    level="warning",  # "info", "warning", "critical"
)

Handling Blocked Requests

When a request is blocked, a RequestCancelledError is raised:
from aden import RequestCancelledError

try:
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": "Hello"}],
    )
except RequestCancelledError as e:
    print(f"Request blocked: {e.reason}")
    # Show user-friendly message

Handling Degradation

When a model is degraded, the request proceeds with the cheaper model:
response = client.chat.completions.create(
    model="gpt-4o",  # Requested model
    messages=[{"role": "user", "content": "Hello"}],
)

# Check actual model used (may differ if degraded)
print(response.model)  # Might be "gpt-4o-mini"

Alert Handling

Configure alert callbacks:
def handle_alert(alert):
    if alert.level == "critical":
        pagerduty.trigger(alert.message)
    elif alert.level == "warning":
        slack.post_message("#llm-alerts", alert.message)
    else:
        print(f"[Info] {alert.message}")

instrument(MeterOptions(
    emit_metric=my_emitter,
    on_alert=handle_alert,
))

Complete Local Policy Engine

A full example implementing local cost control:
from dataclasses import dataclass, field
from datetime import datetime, timedelta
from aden import instrument, MeterOptions, BeforeRequestResult, MetricEvent


@dataclass
class UserBudget:
    limit_usd: float
    spent_usd: float = 0.0
    period_start: datetime = field(default_factory=datetime.now)


class LocalPolicyEngine:
    def __init__(self):
        self.budgets: dict[str, UserBudget] = {}
        self.model_costs = {
            "gpt-4o": {"input": 0.005, "output": 0.015},
            "gpt-4o-mini": {"input": 0.00015, "output": 0.0006},
            "claude-3-5-sonnet-latest": {"input": 0.003, "output": 0.015},
        }

    def set_budget(self, user_id: str, limit_usd: float):
        self.budgets[user_id] = UserBudget(limit_usd=limit_usd)

    def before_request(self, request) -> BeforeRequestResult:
        user_id = get_current_user_id()
        budget = self.budgets.get(user_id)

        if not budget:
            return BeforeRequestResult.proceed()

        # Reset monthly budget
        if datetime.now() - budget.period_start > timedelta(days=30):
            budget.spent_usd = 0
            budget.period_start = datetime.now()

        usage_percent = (budget.spent_usd / budget.limit_usd) * 100

        # Block at 100%
        if usage_percent >= 100:
            return BeforeRequestResult.cancel("Budget exceeded")

        # Degrade at 80%
        if usage_percent >= 80 and request.model in ["gpt-4o", "claude-3-5-sonnet-latest"]:
            return BeforeRequestResult.degrade(
                to_model="gpt-4o-mini",
                reason=f"Budget at {usage_percent:.0f}%",
            )

        # Throttle at 50%
        if usage_percent >= 50:
            return BeforeRequestResult.throttle(delay_ms=500)

        return BeforeRequestResult.proceed()

    def track_usage(self, event: MetricEvent):
        user_id = get_current_user_id()
        budget = self.budgets.get(user_id)

        if budget and event.usage:
            costs = self.model_costs.get(event.model, {"input": 0, "output": 0})
            cost = (
                event.usage.input_tokens * costs["input"] +
                event.usage.output_tokens * costs["output"]
            ) / 1000
            budget.spent_usd += cost


# Usage
engine = LocalPolicyEngine()
engine.set_budget("user_123", limit_usd=10.00)

instrument(MeterOptions(
    emit_metric=engine.track_usage,
    before_request=engine.before_request,
    get_context_id=get_current_user_id,
))

Next Steps