Multiple Domain Entity Email Events

Query

// This query assumes a feed of threat indicators is ingested/synchronized periodically, and each synchronization ingests new indicators and only old indicators that have been modified.
// Active threat indicators in Sentinel are renovated as ThreatIntelligenceIndicator events every ~12 days.
let query_frequency = 1h;
let query_period = 14d;
let query_wait = 0h;
let table_query_lookback = 14d;
let _TIBenignProperty =
    _GetWatchlist('ID-TIBenignProperty')
    | where Notes has_any ("[SourceDomain]", "[DestinationDomain]")
    | project IndicatorId, BenignProperty
;
let _TIExcludedSources = toscalar(
    _GetWatchlist('Activity-ExpectedSignificantActivity')
    | where Activity == "ThreatIndicatorSource"
    | summarize make_list(Auxiliar)
    );
let _TITableMatch = (table_start: datetime, table_end: datetime, only_new_ti: boolean, ti_start: datetime = datetime(null)) {
    // Scheduled Analytics rules have a query period limit of 14d
    let _Indicators =// materialize(
        ThreatIntelligenceIndicator
        | where TimeGenerated > ago(query_period)
        // Take the earliest TimeGenerated and the latest column info
        | summarize hint.strategy=shuffle
            minTimeGenerated = min(TimeGenerated),
            arg_max(TimeGenerated, Active, Description, ActivityGroupNames, IndicatorId, ThreatType, DomainName, Url, ExpirationDateTime, ConfidenceScore, SourceSystem, Tags, AdditionalInformation, ExternalIndicatorId)
            by IndicatorId
        // Remove inactive or expired indicators
        | where not(not(Active) or ExpirationDateTime < now())
        // Pick indicators that contain the desired entity type
        | where isnotempty(DomainName)
        | extend Domain = tolower(DomainName)
        // Remove indicators from specific sources
        | where not(AdditionalInformation has_any (_TIExcludedSources) or Description has_any (_TIExcludedSources))
        // Remove excluded indicators with benign properties
        | join kind=leftanti _TIBenignProperty on IndicatorId, $left.Domain == $right.BenignProperty
        // Deduplicate indicators by Domain column, equivalent to using join kind=innerunique afterwards
        | summarize hint.strategy=shuffle
            minTimeGenerated = min(minTimeGenerated),
            take_any(*)
            by Domain
        // If we want only new indicators, remove indicators received previously
        | where not(only_new_ti and minTimeGenerated < ti_start)
    //)
    ;
    //let _IndicatorsLength = toscalar(_Indicators | summarize count());
    //let _IndicatorsPrefilter = toscalar(
    //    _Indicators
    //    | extend AuxiliarField = tostring(split(Domain, ".")[-1])
    //    | summarize make_set_if(AuxiliarField, isnotempty(AuxiliarField))
    //);
    //let _IndicatorsPrefilterLength = array_length(_IndicatorsPrefilter);
    let _TableEvents =
        EmailEvents
        | where ingestion_time() between (table_start .. table_end)
        // Filter events that may contain indicators
        | where not(EmailDirection in ("Intra-org"))
        | mv-expand Domain = pack_array(SenderMailFromDomain, iff(SenderFromDomain != SenderMailFromDomain, SenderFromDomain, ""), split(RecipientEmailAddress, "@")[-1]) to typeof(string)
        | where isnotempty(Domain)
        //| where not(_IndicatorsPrefilterLength < 10000 and not(Domain has_any (_IndicatorsPrefilter))) // valid TLD ~1500 , "has_any" limit 10000
        | summarize hint.strategy=shuffle take_any(*) by OriginalDomain = tolower(Domain)
        //| where not(_IndicatorsPrefilterLength < 10000 and not(tostring(split(OriginalDomain, ".")[-1]) in (_IndicatorsPrefilter)))
        | extend SplitLevelDomains = split(OriginalDomain, ".")
        | mv-expand Level = range(0, array_length(SplitLevelDomains) - 2) to typeof(int)
        | extend Domain = strcat_array(array_slice(SplitLevelDomains, Level, -1), ".")
        //| where not(_IndicatorsLength < 1000000 and not(Domain in (toscalar(_Indicators | summarize make_list(Domain))))) // "in" limit 1.000.000
        | project-rename EmailEvents_TimeGenerated = TimeGenerated
    ;
    _Indicators
    | join kind=inner hint.strategy=shuffle _TableEvents on Domain
    // Take only a single event by key columns
    //| summarize hint.strategy=shuffle take_any(*) by Domain, NetworkMessageId
    | project
        EmailEvents_TimeGenerated,
        Description, ActivityGroupNames, IndicatorId, ThreatType, DomainName, Url, ExpirationDateTime, ConfidenceScore, SourceSystem, Tags, AdditionalInformation,
        SenderFromAddress, SenderFromDomain, SenderMailFromAddress, SenderMailFromDomain, SenderDisplayName, SenderIPv4, SenderIPv6, AuthenticationDetails, RecipientEmailAddress, EmailDirection, Subject, EmailLanguage, UrlCount, AttachmentCount, AdditionalFields, OrgLevelPolicy, OrgLevelAction, UserLevelPolicy, UserLevelAction, EmailActionPolicy, EmailAction, DeliveryAction, DeliveryLocation, ThreatTypes, ConfidenceLevel, DetectionMethods, Connectors, NetworkMessageId, ReportId
};
union// isfuzzy=true
    // Match      current table events                                all indicators available
    _TITableMatch(ago(query_frequency + query_wait), ago(query_wait), false),
    // Match      past table events                                                          new indicators since last query execution
    _TITableMatch(ago(table_query_lookback + query_wait), ago(query_frequency + query_wait), true, ago(query_frequency))
| summarize arg_max(EmailEvents_TimeGenerated, *) by IndicatorId, NetworkMessageId
| extend
    timestamp = EmailEvents_TimeGenerated,
    URLCustomEntity = Url

Explanation

This KQL query is designed to detect and analyze email events that match threat indicators from a threat intelligence feed. Here's a simplified breakdown of what the query does:

Setup and Configuration:
- The query is set to run every hour (query_frequency = 1h) and looks back over a 14-day period (query_period = 14d).
- It uses watchlists to filter out benign properties and exclude certain sources from the threat indicators.
Threat Indicator Processing:
- It retrieves threat indicators from the ThreatIntelligenceIndicator table, focusing on active and non-expired indicators that contain domain information.
- It filters out indicators from specific sources and those with benign properties.
- It ensures that only new indicators (not seen in previous runs) are considered if specified.
Email Event Processing:
- It retrieves email events from the EmailEvents table within the specified time frame.
- It filters out intra-organizational emails and extracts domains from sender and recipient email addresses.
- It processes these domains to match against threat indicators.
Matching and Output:
- The query performs an inner join between the processed threat indicators and email events based on domain matches.
- It selects relevant fields from both threat indicators and email events for further analysis.
- It combines results from current and past email events with new threat indicators to ensure comprehensive coverage.
- The final output includes the most recent email event for each unique indicator and network message ID, with additional fields for further investigation.

Overall, this query is designed to identify potential threats in email communications by matching them against a set of threat indicators, while excluding known benign or irrelevant data.

Details

Jose Sebastián Canós

Released: December 13, 2023

Tables

ThreatIntelligenceIndicatorEmailEvents

Keywords

ThreatIntelligenceIndicatorEmailEvents

Operators

lethas_anyprojecttoscalarwheresummarizemake_listmaterializeminarg_maxisnotemptytolowerjoinkindleftantiextendnotbetweenmv-expandiffsplitpack_arrayproject-renamerangearray_lengthstrcat_arrayarray_sliceunionisfuzzyby

Actions

GitHub

KQL Search