module CVE_2021_44228; # Refer to the following for a description of the methods used in script. # Headers: https://corelight.com/blog/simplifying-detection-of-log4shell # LDAP: https://corelight.com/blog/detecting-the-log4j-exploit-via-zeek-and-ldap-traffic @load-sigs ./ldap_java.sig export { redef enum Notice::Type += { LOG4J_ATTEMPT_HEADER, LOG4J_LDAP_JAVA, LOG4J_SUCCESS }; option log = T; # redef'd when running tests with btest. Leave as `F`. option run_tests = F; # Can be domains or addrs, so just have it be a string. option ignorable_target_hosts: set[string] = {}; # Ignore hosts known to be benign & scanning for this behavior. option ignorable_orig_hosts: set[subnet] = {10.96.64.0/23,10.96.66.0/23,10.96.68.0/23,10.5.114.0/23,10.189.56.0/23,10.189.58.0/23,10.189.60.0/23,10.205.62.224/28,10.205.63.224/28,10.203.63.32/28,10.203.63.48/28,10.187.13.32/28,10.187.13.48/28,10.96.64.0/22,10.189.56.0/22}; # Ignore resp hosts. `ignorable_orig_hosts` is probably what you want. This # would be for (1) ignoring internal honeypots that you know will look # "exploitable" or a known "malicious" server attempting to exploit # vulnerable Java clients. option ignorable_resp_hosts: set[addr] = {}; # Try to normalize payloads to improve change of successfully retrieving the # payload information. option try_normalize = T; redef enum Log::ID += { LOG }; const log_path = "log4j" &redef; global log_policy: Log::PolicyHook; } redef enum HTTP::Tags += { LOG4J_RCE }; type Info: record { ts: time &log; uid: string &log; http_uri: string &log; uri: string &log; stem: string &log; target_host: string &log; target_port: string &log; method: string &log; is_orig: bool &log; name: string &log; value: string &log; matched_name: bool &log; matched_value: bool &log; }; type PayloadParts: record { uri: string; stem: string; host: string; port_: string; }; # Very general, FPs expected but we're casting a wide net intentionally. # Approach: # Match ${ # unless it's ${@ (php junk) # and only if it has a : in the middle and # and ending brace. # See test cases in zeek_init() for what we consider to be a TP/FP. global exploit_pattern: pattern = /\$\{[^@][^}]+:[^}]+\}/; # Stack used for `normalize`. Shouldn't be used outside of that function. global stack: vector of string; function peek(): string { if ( |stack| == 0 ) return ""; else return stack[|stack|-1]; } function pop(): string { if ( |stack| == 0 ) return ""; local x = peek(); stack = stack[0:|stack|-1]; return x; } function push(x: string) { stack += x; } function clear_stack() { stack = vector(); } # Attempts to normalize log4j payload to remove most common obfuscations. There # are effectively an infinite number of ways to do this, so don't expect it to # cover everything. See tests in `zeek_init()` to understand what it handles. # # Algorithm works as follows: # # "$" and "{" are pushed onto the stack when encountered. # Set a flag to show we have seen the first "$" "{" set. # If we are on our second+ set of "$" "{", start ignoring characters # If we see a ":" while ignoring, we have passed the function portion and should stop ignoring. # When we hit a "}", pop the previous "{" and "$" off the stack. If the stack is # now empty, this was the first instance (i.e., `${jdni...`) and it should be # preserved, otherwise, remove it. function normalize(payload: string): string { # Replace default substitution string with normal formatting string, i.e., ${::-j} -> ${:j} payload = gsub(payload, /::\-/, ":"); local to_remove: set[count]; local i = 0; local ignoring = F; local saw_first = F; while ( i != |payload| ) { local c = payload[i]; switch ( c ) { case "$": push(c); break; case "{": if ( peek() == "$" ) push(c); if ( !saw_first ) { saw_first = T; } else { # Add previous "$" add to_remove[i-1]; ignoring = T; } break; case ":": if ( ignoring ) { add to_remove[i]; ignoring = F; } break; case "}": local open_brace = pop(); local dollar = pop(); # We only want to remove internal ones if ( dollar == "$" && open_brace == "{" && |stack| > 0 ) add to_remove[i]; break; } if ( ignoring ) add to_remove[i]; ++i; } local new_payload: vector of string; i = 0; while ( i != |payload| ) { if ( i !in to_remove ) new_payload += payload[i]; ++i; } clear_stack(); return join_string_vec(new_payload, ""); } # If split doesn't return the expected number of indices, return the default "-" function safe_split1_w_default(s: string, p: pattern, idx: count, missing: string &default="-"): string { local tmp = split_string1(s, p); if ( |tmp| > idx ) return tmp[idx]; else return missing; } # Assumes `name` or `value` string passed as `s` has the structure: # ${jdni:ldap://payload_host:payload_port/path} for the payload. Many examples # of more complicated obfuscation exist. If the structure is different, fill # missing fields with "-" so other structures in the wild can be explored in the # logs. For example, Binary Edge are using the following type of obfuscation: # ...value='${jndi:${lower:l}${lower:d}a${lower:p}://world443.log4j.bin${upper:a}ryedge.io:80/callback}' function parse_payload(s: string): PayloadParts { if ( try_normalize ) s = normalize(s); local tmp = split_string(s, /\/\//); local last: string = "-"; if ( |tmp| > 0 ) last = tmp[(|tmp| - 1)]; local payload_uri = safe_split1_w_default(last, /\}/, 0); local payload_stem = safe_split1_w_default(payload_uri, /\//, 0); local payload_host = safe_split1_w_default(payload_stem, /\:/, 0); local payload_port = safe_split1_w_default(payload_stem, /\:/, 1); return PayloadParts($uri=payload_uri, $stem=payload_stem, $host=payload_host, $port_=payload_port); } event http_header(c: connection, is_orig: bool, name: string, value: string) { if ( c$id$orig_h in ignorable_orig_hosts ) return; if ( c$id$resp_h in ignorable_resp_hosts ) return; # Focus is mainly on client headers, but not filtering right now to explore interesting cases in the wild # if (!is_orig) # return; # Focus is mainly on value of header, but adding 'name' to explore what is being used in the wild local matched_name = exploit_pattern in name; local matched_value = exploit_pattern in value; local http_uri: string = ""; local http_method: string = ""; # Handle potentially missing fields if ( c$http?$uri ) http_uri = c$http$uri; if ( c$http?$method ) http_method = c$http$method; # Ignore matches that contain binary goop. This was a large contributor to # false positives. if ( matched_name && !is_ascii(name) ) return; if ( matched_value && !is_ascii(value) ) return; if ( !matched_name && !matched_value ) return; add c$http$tags[LOG4J_RCE]; local payload: PayloadParts; local info: Info; # TODO: add to a clusterized set for watching of subsequent traffic (LOG4J_SUCCESS notice). if ( matched_name ) { payload = parse_payload(name); if ( payload$host in ignorable_target_hosts ) return; info = Info($ts=network_time(), $uid=c$uid, $http_uri=http_uri, $uri=payload$uri, $stem=payload$stem, $target_host=payload$host, $target_port=payload$port_, $method=http_method, $is_orig=is_orig, $name=name, $value=value, $matched_name=matched_name, $matched_value=matched_value); NOTICE([$note=LOG4J_ATTEMPT_HEADER, $conn=c, $identifier=cat(c$id$orig_h,c$id$resp_h,c$id$resp_p,cat(name,value)), # $suppress_for=3600sec, $msg=fmt("Possible Log4j exploit CVE-2021-44228 exploit in header. Refer to sub field for sample of payload, original_URI and list of server headers"), $sub=fmt("uri='%s', payload_uri=%s, payload_stem=%s, payload_host=%s, payload_port=%s, method=%s, is_orig=%s, header name='%s', header value='%s' ", http_uri, payload$uri, payload$stem, payload$host, payload$port_, http_method, is_orig, name, value)]); if ( log ) Log::write(LOG, info); } if ( matched_value ) { payload = parse_payload(value); if ( payload$host in ignorable_target_hosts ) return; info = Info($ts=network_time(), $uid=c$uid, $http_uri=http_uri, $uri=payload$uri, $stem=payload$stem, $target_host=payload$host, $target_port=payload$port_, $method=http_method, $is_orig=is_orig, $name=name, $value=value, $matched_name=matched_name, $matched_value=matched_value); NOTICE([$note=LOG4J_ATTEMPT_HEADER, $conn=c, $identifier=cat(c$id$orig_h,c$id$resp_h,c$id$resp_p,cat(name,value)), # $suppress_for=3600sec, $msg=fmt("Possible Log4j exploit CVE-2021-44228 exploit in header. Refer to sub field for sample of payload, original_URI and list of server headers"), $sub=fmt("uri='%s', payload_uri=%s, payload_stem=%s, payload_host=%s, payload_port=%s, method=%s, is_orig=%s, header name='%s', header value='%s' ", http_uri, payload$uri, payload$stem, payload$host, payload$port_, http_method, is_orig, name, value)]); if ( log ) Log::write(LOG, info); } } event signature_match(state: signature_state, msg: string, data: string) { if ( !(msg == "log4j_javaclassname_udp" || msg == "log4j_javaclassname_tcp") ) return; NOTICE([$note=LOG4J_LDAP_JAVA, $conn=state$conn, $identifier=cat(state$conn$id$orig_h,state$conn$id$resp_h,state$conn$id$resp_p), # $suppress_for=3600sec, $msg=fmt("Possible Log4j exploit CVE-2021-44228 exploit, JAVA over LDAP. Refer to sub field for sample of payload."), $sub=data]); } event zeek_init() &priority=5 { Log::create_stream(CVE_2021_44228::LOG, [$columns=Info, $path=log_path, $policy=log_policy]); }