Foundry - Anomalous token / cost spike per agent

Foundry Token Cost Spike

Query

let lookback = 7d;
let recentWindow = 1h;
let perHour =
    AppDependencies
    | where TimeGenerated > ago(lookback)
    | where isnotempty(Properties["gen_ai.agent.name"])
    | extend
        Agent  = tostring(Properties["gen_ai.agent.name"]),
        Model  = tostring(Properties["gen_ai.request.model"]),
        InTok  = tolong(Properties["gen_ai.usage.input_tokens"]),
        OutTok = tolong(Properties["gen_ai.usage.output_tokens"])
    | extend TotalTok = coalesce(InTok, 0) + coalesce(OutTok, 0)
    | summarize
        HourTokens = sum(TotalTok),
        HourRuns   = count(),
        AnyModel   = take_any(Model)
        by Agent, Hour = bin(TimeGenerated, 1h);
let baseline =
    perHour
    | where Hour < bin(now(), 1h) - recentWindow
    | summarize
        MedianHourTokens = percentile(HourTokens, 50),
        P95HourTokens    = percentile(HourTokens, 95)
        by Agent;
let recent =
    perHour
    | where Hour >= bin(now(), 1h) - recentWindow;
recent
| join kind=leftouter baseline on Agent
| extend
    MedianHourTokens = coalesce(todouble(MedianHourTokens), 0.0),
    P95HourTokens    = coalesce(todouble(P95HourTokens), 0.0)
| extend SpikeRatio = iff(MedianHourTokens > 0, todouble(HourTokens) / MedianHourTokens, todouble(HourTokens))
| where HourTokens > 50000
    and (SpikeRatio >= 3.0 or HourTokens > P95HourTokens * 2)
| extend AccountName = iff(isempty(Agent), "unknown-agent", Agent)
| extend Model = AnyModel
| project
    Hour, AccountName, Agent, Model, HourRuns, HourTokens,
    MedianHourTokens, P95HourTokens, SpikeRatio
| order by SpikeRatio desc

Explanation

This query is designed to detect unusual spikes in token usage by agents in a system called Foundry. It monitors the number of tokens consumed by each agent over the past hour and compares it to the typical usage over the past seven days. If an agent's token usage in the last hour is more than three times its median usage or twice its 95th percentile usage from the past week, it flags this as a potential issue. This helps identify cases of token abuse, infinite loops in agent operations, or cost-related attacks. The query filters out low-traffic agents by setting a minimum threshold of 50,000 tokens per hour to avoid noise. It uses data from Application Insights and generates alerts if any anomalies are detected. The results are sorted by the severity of the spike, and incidents are created for further investigation.

Details

David Alonso

Released: June 8, 2026

Tables

AppDependencies

Keywords

FoundryAgentTokenConsumptionAppDependenciesPropertiesModelTokensHourAccountNameCloudApplicationAIOWASPLLM

Operators

letagoisnotemptytostringtolongcoalescesummarizesumcounttake_anybinnowpercentilejoinkind=leftoutertodoubleiffisemptyprojectorder by

Severity

Medium

Tactics

Impact

MITRE Techniques

T1496 T1499

Frequency: PT1H

Period: P7D

Actions

GitHub

KQL Search