Multiple Domain Entity Email Url Info

Query

// This query assumes a feed of threat indicators is ingested/synchronized periodically, and each synchronization ingests new indicators and only old indicators that have been modified.
// Active threat indicators in Sentinel are renovated as ThreatIntelligenceIndicator events every ~12 days.
let query_frequency = 1h;
let query_period = 14d;
let query_wait = 1h;
let table_query_lookback = 14d;
let _TIBenignProperty =
    _GetWatchlist('ID-TIBenignProperty')
    | where Notes has_any ("[DestinationDomain]")
    | project IndicatorId, BenignProperty
;
let _TIExcludedSources = toscalar(
    _GetWatchlist('Activity-ExpectedSignificantActivity')
    | where Activity == "ThreatIndicatorSource"
    | summarize make_list(Auxiliar)
    );
let _ExpectedEmails =
    _GetWatchlist('Activity-ExpectedSignificantActivity')
    | where Activity == "MaliciousURLSentEmail"
    | project
        SenderFromAddress = SourceAddress,
        RecipientEmailAddress = DestinationAddress,
        Subject = Auxiliar
;
let _TITableMatch = (table_start: datetime, table_end: datetime, only_new_ti: boolean, ti_start: datetime = datetime(null)) {
    // Scheduled Analytics rules have a query period limit of 14d
    let _Indicators =// materialize(
        ThreatIntelligenceIndicator
        | where TimeGenerated > ago(query_period)
        // Take the earliest TimeGenerated and the latest column info
        | summarize hint.strategy=shuffle
            minTimeGenerated = min(TimeGenerated),
            arg_max(TimeGenerated, Active, Description, ActivityGroupNames, IndicatorId, ThreatType, DomainName, Url, ExpirationDateTime, ConfidenceScore, AdditionalInformation, ExternalIndicatorId)
            by IndicatorId
        // Remove inactive or expired indicators
        | where not(not(Active) or ExpirationDateTime < now())
        // Pick indicators that contain the desired entity type
        | where isnotempty(DomainName)
        | extend Domain = tolower(DomainName)
        // Remove indicators from specific sources
        | where not(AdditionalInformation has_any (_TIExcludedSources) or Description has_any (_TIExcludedSources))
        // Remove excluded indicators with benign properties
        | join kind=leftanti _TIBenignProperty on IndicatorId, $left.Domain == $right.BenignProperty
        // Deduplicate indicators by Domain column, equivalent to using join kind=innerunique afterwards
        | summarize hint.strategy=shuffle
            minTimeGenerated = min(minTimeGenerated),
            take_any(*)
            by Domain
        // If we want only new indicators, remove indicators received previously
        | where not(only_new_ti and minTimeGenerated < ti_start)
    //)
    ;
    //let _IndicatorsLength = toscalar(_Indicators | summarize count());
    //let _IndicatorsPrefilter = toscalar(
    //    _Indicators
    //    | extend AuxiliarField = tostring(split(Domain, ".")[-1])
    //    | summarize make_set_if(AuxiliarField, isnotempty(AuxiliarField))
    //);
    //let _IndicatorsPrefilterLength = array_length(_IndicatorsPrefilter);
    let _TableEvents =
        EmailUrlInfo
        | where ingestion_time() between (table_start .. table_end)
        // Filter events that may contain indicators
        | where isnotempty(UrlDomain)
        //| where not(_IndicatorsPrefilterLength < 10000 and not(UrlDomain has_any (_IndicatorsPrefilter))) // valid TLD ~1500 , "has_any" limit 10000
        | summarize hint.strategy=shuffle take_any(*) by OriginalDomain = tolower(UrlDomain), NetworkMessageId
        //| where not(_IndicatorsPrefilterLength < 10000 and not(tostring(split(OriginalDomain, ".")[-1]) in (_IndicatorsPrefilter)))
        | extend SplitLevelDomains = split(OriginalDomain, ".")
        | mv-expand Level = range(0, array_length(SplitLevelDomains) - 2) to typeof(int)
        | extend Domain = strcat_array(array_slice(SplitLevelDomains, Level, -1), ".")
        //| where not(_IndicatorsLength < 1000000 and not(Domain in (toscalar(_Indicators | summarize make_list(Domain))))) // "in" limit 1.000.000
        | extend EmailUrlInfo_Url = Url
        | project-rename EmailUrlInfo_TimeGenerated = TimeGenerated
    ;
    _Indicators
    | join kind=inner hint.strategy=shuffle _TableEvents on Domain
    // Take only a single event by key columns
    //| summarize hint.strategy=shuffle take_any(*) by Domain, NetworkMessageId
    | project
        EmailUrlInfo_TimeGenerated,
        Description, ActivityGroupNames, IndicatorId, ThreatType, DomainName, Url, ExpirationDateTime, ConfidenceScore, AdditionalInformation,
        UrlLocation, EmailUrlInfo_Url, UrlDomain, NetworkMessageId, ReportId
    | join kind=leftouter hint.strategy=shuffle (
        EmailEvents
        | where ingestion_time() between(table_start .. now())
        | project
            SenderFromAddress,
            SenderFromDomain,
            SenderMailFromAddress,
            SenderMailFromDomain,
            SenderDisplayName,
            SenderIPv4,
            SenderIPv6,
            AuthenticationDetails,
            RecipientEmailAddress,
            EmailDirection,
            Subject,
            EmailLanguage,
            UrlCount,
            AttachmentCount,
            AdditionalFields,
            OrgLevelPolicy,
            OrgLevelAction,
            UserLevelPolicy,
            UserLevelAction,
            EmailActionPolicy,
            EmailAction,
            DeliveryAction,
            DeliveryLocation,
            ThreatTypes,
            ConfidenceLevel,
            DetectionMethods,
            Connectors,
            NetworkMessageId,
            EmailEvents_ReportId = ReportId
        )
        on NetworkMessageId
    | project-away NetworkMessageId1
};
union// isfuzzy=true
    // Match      current table events                                all indicators available
    _TITableMatch(ago(query_frequency + query_wait), ago(query_wait), false),
    // Match      past table events                                                          new indicators since last query execution
    _TITableMatch(ago(table_query_lookback + query_wait), ago(query_frequency + query_wait), true, ago(query_frequency))
| summarize arg_max(EmailUrlInfo_TimeGenerated, *) by IndicatorId, NetworkMessageId
| extend
    timestamp = EmailUrlInfo_TimeGenerated,
    URLCustomEntity = Url
| join kind=leftanti _ExpectedEmails on SenderFromAddress

Explanation

This KQL query is designed to identify and analyze potential threats by matching threat intelligence indicators with email URL data. Here's a simplified breakdown of what the query does:

Setup and Configuration:
- The query sets up several parameters and helper functions to manage the time frames and data sources it will use.
- It defines a frequency for running the query (query_frequency), a period to look back (query_period), and a wait time (query_wait).
Data Preparation:
- It retrieves and processes threat indicators from a threat intelligence feed, filtering out inactive or expired indicators and those from excluded sources.
- It also filters out indicators with benign properties using a watchlist.
Email URL Data Matching:
- The query fetches email URL data from the EmailUrlInfo table, focusing on URLs that might match the threat indicators.
- It processes this data to extract domain information and prepares it for matching against the threat indicators.
Matching Logic:
- It matches the processed threat indicators with the email URL data to find potential threats.
- The query performs this matching for both current and past email URL data, considering new indicators since the last query execution.
Result Compilation:
- It compiles the results, ensuring that each threat indicator and email URL match is unique.
- The query enriches the results with additional email event data from the EmailEvents table.
Exclusion of Expected Emails:
- Finally, it excludes any matches where the sender's email address is in a list of expected emails, using another watchlist.

In summary, this query is designed to detect potential threats by correlating threat intelligence indicators with email URL data, while excluding known benign or expected activities.

Details

Jose Sebastián Canós

Released: December 13, 2023

Tables

ThreatIntelligenceIndicatorEmailUrlInfoEmailEvents

Keywords

ThreatIntelligenceIndicatorEmailUrlInfoEmailEvents

Operators

lethas_anyprojecttoscalarsummarizemake_listwhereisnotemptyextendtolowerjoinkindleftantionproject-renamesplitmv-expandrangearray_lengthstrcat_arrayarray_sliceproject-awayunionarg_maxbetweeningestion_timeleftouterhint.strategyshuffledatetimeagonowmaterialize

Actions

GitHub

KQL Search