Foundry - Session context contamination across spans

Foundry Session Context Contamination

Query

let injectionMarkers = dynamic([
    "ignore previous instructions","disregard previous","you are now",
    "developer mode","do anything now","reveal your system prompt",
    "bypass your rules","forget the previous","new instructions",
    "from now on you","store this and remember","next time the user asks"
]);
let sensitiveTools = dynamic([
    "code_interpreter","python","shell","bash","powershell","exec","run_code",
    "http_request","fetch","invoke_url","send_email","send_message",
    "execute_sql","query_database","file_write","write_file","upload",
    "create_resource","delete_resource","azure_write","deploy"
]);
let trusted =
    _GetWatchlist('FoundryTrustedToolSources')
    | project SourceUri = tolower(tostring(column_ifexists('SourceUri', '')))
    | where isnotempty(SourceUri);
let conv =
    AppDependencies
    | where TimeGenerated > ago(2h)
    | where isnotempty(Properties["gen_ai.conversation.id"])
    | extend
        ConvId     = tostring(Properties["gen_ai.conversation.id"]),
        Agent      = tostring(Properties["gen_ai.agent.name"]),
        Model      = tostring(Properties["gen_ai.request.model"]),
        Input      = tolower(tostring(Properties["gen_ai.input.messages"])),
        ToolName   = tolower(tostring(Properties["gen_ai.tool.name"])),
        ToolType   = tolower(tostring(Properties["gen_ai.tool.type"])),
        ToolArgs   = tolower(tostring(Properties["gen_ai.tool.call.arguments"])),
        ToolResult = tolower(tostring(Properties["gen_ai.tool.call.result"]))
    | extend SourceUri   = tolower(extract(@"https?://([A-Za-z0-9.\-]+)", 1, strcat(ToolArgs, " ", ToolResult)))
    | extend IsSensitive = ToolName has_any (sensitiveTools) or ToolType has_any (sensitiveTools);
let convBounds =
    conv
    | summarize Spans = count(),
                Start = min(TimeGenerated),
                End   = max(TimeGenerated),
                AnyAgent = take_any(Agent),
                AnyModel = take_any(Model)
            by ConvId
    | where Spans >= 5;
let withQuartile =
    conv
    | join kind=inner convBounds on ConvId
    | extend RangeMs = max_of(datetime_diff('millisecond', End, Start), 1)
    | extend OffMs   = datetime_diff('millisecond', TimeGenerated, Start)
    | extend Quartile = min_of(toint(OffMs * 4 / RangeMs), 3);
let earlyUrl =
    withQuartile
    | where Quartile == 0 and isnotempty(SourceUri)
    | join kind=leftanti trusted on SourceUri
    | summarize UrlHits = count(),
                UntrustedSources = make_set(SourceUri, 16)
            by ConvId;
let earlyInj =
    withQuartile
    | where Quartile == 0
    | extend HasInj = (Input has_any (injectionMarkers)) or (ToolResult has_any (injectionMarkers))
    | summarize InjHits = countif(HasInj) by ConvId;
let lateActs =
    withQuartile
    | where Quartile >= 3 and IsSensitive
    | summarize SensitiveCalls   = count(),
                SensitiveTools   = make_set(ToolName, 8),
                LastSeen         = max(TimeGenerated),
                FirstSensitive   = min(TimeGenerated),
                AnyAgent         = take_any(AnyAgent),
                AnyModel         = take_any(AnyModel)
            by ConvId;
earlyUrl
| join kind=fullouter earlyInj on ConvId
| extend ConvId = coalesce(ConvId, ConvId1)
| extend Contamination = coalesce(UrlHits, 0) + coalesce(InjHits, 0)
| where Contamination >= 1
| join kind=inner lateActs on ConvId
| where SensitiveCalls >= 1
| extend AccountName = iff(isempty(AnyAgent), "unknown-agent", AnyAgent)
| project LastSeen, AccountName, Agent = AnyAgent, Model = AnyModel, ConvId,
          Contamination, UntrustedSources, SensitiveCalls, SensitiveTools,
          FirstSensitive
| order by Contamination desc, SensitiveCalls desc

Explanation

This query is designed to detect a specific type of security threat called "session context contamination" in conversations involving AI agents. Here's a simplified breakdown of what the query does:

Objective: It aims to identify situations where an attacker influences an AI agent's decision-making process by introducing misleading or harmful data early in a conversation. This can lead to the AI taking sensitive actions later on.
Conversation Analysis: The query examines conversations that have at least five interactions (spans) and divides them into four time segments (quartiles).
Early Contamination Detection:
- In the first quartile (early part of the conversation), it checks for:
  - Untrusted URLs: Outbound URLs not on a trusted list.
  - Injection Markers: Specific phrases that suggest an attempt to manipulate the AI's behavior.
Sensitive Actions Detection:
- In the last quartile (late part of the conversation), it looks for sensitive actions taken by the AI, such as executing code, sending emails, or modifying files.
Alert Generation: An alert is triggered only if both conditions are met:
- Early contamination is detected.
- Sensitive actions are performed later in the conversation.
Data Sources: The query uses data from Application Insights, specifically focusing on application dependencies.
Severity and Frequency: The rule is set to a medium severity level, runs every hour, and looks back over the past two hours.
Output: If a match is found, it generates an alert with details like the last seen time, agent name, model used, contamination sources, and sensitive actions taken.
Incident Management: The query is configured to create incidents for detected threats, with settings for grouping related alerts and managing incident lifecycles.

Overall, this query helps in identifying and responding to potential security threats where an AI system's decision-making is compromised by early-stage manipulation.

Details

David Alonso

Released: June 8, 2026

Tables

AppDependencies

Keywords

ApplicationInsightsAppDependenciesAgentModelInputToolNameToolTypeToolArgsToolResultSourceUriConvIdSensitiveToolsInjectionMarkersFoundryTrustedToolSourcesTimeGeneratedAccountName

Operators

letdynamictolowertostringcolumn_ifexistsisnotemptyprojectwhereagoextendextractstrcathas_anysummarizecountminmaxtake_anyjoinkinddatetime_diffmax_ofmin_oftointcountifmake_setcoalesceiffisemptyorder by

Severity

Medium

Tactics

InitialAccessExecutionDefenseEvasion

MITRE Techniques

T1566 T1059

Frequency: PT1H

Period: PT2H

Actions

GitHub

KQL Search