commit e92000e5b0ddbd4c247757fe56ab20bb1811f35f Author: Patrick Kelley Date: Wed May 7 14:10:34 2025 -0400 Initial diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..b92c07a --- /dev/null +++ b/LICENSE @@ -0,0 +1,29 @@ +BSD 3-Clause License + +Copyright (c) 2017, Stephen Hosom +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..65b651a --- /dev/null +++ b/README.md @@ -0,0 +1,80 @@ +# Module for File Extraction + +This is a Zeek package that provides convenient extraction of files. + +As a secondary goal, this script performs additional commonly requested file extraction and logging tasks, such as naming extracted files after their calculated file checksum or naming the file with its common file extension. + +## Installing with zkg (preferred) + +This package can be installed through the [zeek package manager](https://docs.zeek.org/projects/package-manager/en/stable/quickstart.html) by utilizing the following commands: + +```sh +zkg install zeek/hosom/file-extraction + +# you must separately load the package for it to actually do anything +zkg load zeek/hosom/file-extraction +``` + +## Installing manually + +While not preferred, this package can also be installed manually. To do this, follow the tasks below: + +``` +cd /share/zeek/site + +git clone git://github.com/hosom/file-extraction file-extraction + +echo "@load file-extraction" >> local.zeek +``` + +## Configuration + +The package installs with the **extract-common-exploit-types.zeek** policy, however, additional functionality may be desired. + +Configuration must **always be done within the config.zeek** file. Failure to isolate configuration to **config.zeek** will result in your configuration being overwritten. + +### Advanced Configuration + +For advanced configuration of file extraction, the best option available is to hook the FileExtraction::extract hook. For examples of this, look at the scripts in the plugins directory. + +## Plugins + +### extract-all-files.zeek + +Attaches the extract files analyzer to every file that has a mime_type detected. + +### extract-java.zeek + +Attaches the extract files analyzer to every JNLP and Java Archive file detected. + +### extract-pe.zeek + +Attaches the extract files analyzer to every PE file detected. + +### extract-ms-office.zeek + +Attaches the extract files analyzer to every ms office file detected. + +### extract-pdf.zeek + +Attaches the extract files analyzer to every PDF file detected. + +### extract-common-exploit-types.zeek + +Loads the following plugins: +- extract-java.zeek +- extract-pe.zeek +- extract-ms-office.zeek +- extract-pdf.zeek + +### store-files-by-md5.zeek + +Uses file_state_remove to rename extracted files based on the md5 checksum whenever it is available. + +### store-files-by-sha1.zeek + +Uses file_state_remove to rename extracted files based on the sha1 checksum whenever it is available. + +### store-files-by-sha256.zeek + +Uses file_state_remove to rename extracted files based on the sha256 checksum whenever it is available. diff --git a/scripts/__load__.zeek b/scripts/__load__.zeek new file mode 100644 index 0000000..d408350 --- /dev/null +++ b/scripts/__load__.zeek @@ -0,0 +1,2 @@ +@load ./main +@load ./config \ No newline at end of file diff --git a/scripts/config.zeek b/scripts/config.zeek new file mode 100644 index 0000000..c6af4d3 --- /dev/null +++ b/scripts/config.zeek @@ -0,0 +1,12 @@ +# All configuration must occur within this file. +# All other files may be overwritten during upgrade +module FileExtraction; + +# Configure where extracted files will be stored +redef path = ""; + +# Configure 'plugins' that can be loaded +# these are shortcut modules to specify common +# file extraction policies. Example: +# @load ./plugins/extract-pe.bro +@load ./plugins/extract-common-exploit-types \ No newline at end of file diff --git a/scripts/file-extensions.zeek b/scripts/file-extensions.zeek new file mode 100644 index 0000000..4e95ccc --- /dev/null +++ b/scripts/file-extensions.zeek @@ -0,0 +1,92 @@ +module FileExtraction; + +export { + ## Map file extensions to file mime_type + const mime_to_ext: table[string] of string = { + ["application/x-dosexec"] = "exe", + ["application/msword"] = "doc", + ["application/x-dmg"] = "dmg", + ["application/x-gzip"] = "gz", + ["application/x-rar"] = "rar", + ["application/x-tar"] = "tar", + ["application/x-xar"] = "pkg", + ["application/x-rpm"] = "rpm", + ["application/x-stuffit"] = "sif", + ["application/x-archive"] = "", + ["application/x-arc"] = "arc", + ["application/x-eet"] = "eet", + ["application/x-zoo"] = "zoo", + ["application/x-lz4"] = "lz4", + ["application/x-lrzip"] = "lrz", + ["application/x-lzh"] = "lzh", + ["application/warc"] = "warc", + ["application/x-7z-compressed"] ="7z", + ["application/x-xz"] = "xz", + ["application/x-lha"] = "lha", + ["application/x-arj"] = "arj", + ["application/x-cpio"] = "cpio", + ["application/x-compress"] = "", + ["application/x-lzma"] = "", + ["application/zip"] = "zip", + ["application/vnd.ms-cab-compressed"] = "cab", + ["application/pdf"] = "pdf", + ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"] = "docx", + ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"] = "xlsx", + ["application/vnd.openxmlformats-officedocument.presentationml.presentation"] ="pptx", + ["application/font-woff"] = "woff", + ["application/x-font-ttf"] = "ttf", + ["application/vnd.ms-fontobject"] = "eot", + ["application/x-font-sfn"] = "", + ["application/vnd.ms-opentype"] = "otf", + ["application/x-mif"] = "mif", + ["application/vnd.font-fontforge-sfd"] = "sfd", + ["audio/mpeg"] = "mp3", + ["audo/m4a"] = "mp4", + ["image/tiff"] = "tiff", + ["image/gif"] = "gif", + ["image/jpeg"] = "jpg", + ["image/x-ms-bmp"] = "bmp", + ["image/x-icon"] = "ico", + ["image/x-cursor"] = "cur", + ["image/vnd.adobe.photoshop"] = "pnd", + ["image/png"] = "png", + ["text/html"] = "html", + ["text/plain"] = "txt", + ["text/json"] = "json", + ["text/rtf"] = "rtf", + ["application/xml"] = "xml", + ["text/rss"] = "rss", + ["application/java-archive"] = "jar", + ["application/x-java-applet"] = "jar", + ["application/x-shockwave-flash"] = "swf", + ["application/pkcs7-signature"] = "p7", + ["application/x-pem"] = "pem", + ["application/x-java-jnlp-file"] = "jnlp", + ["application/vnd.tcpdump.pcap"] = "pcap", + ["text/x-shellscript"] = "sh", + ["text/x-perl"] = "pl", + ["text/x-ruby"] = "rb", + ["text/x-python"] = "py", + ["text/x-awk"] = "awk", + ["text/x-lua"] ="lua", + ["application/javascript"] = "js", + ["text/x-php"] = "php", + ["application/x-executable"] = "", + ["application/x-coredump"] = "core", + ["video/x-flv"] = "flv", + ["video/x-fli"] = "fli", + ["video/x-flc"] = "flc", + ["video/mj2"] = "mj2", + ["video/x-mng"] = "mng", + ["video/x-jng"] = "jng", + ["video/mpeg"] = "mpg", + ["video/mpv"] = "mpv", + ["video/h264"] = "264", + ["video/webm"] = "webm", + ["video/matroska"] = "mkv", + ["vidoe/x-sgi-movie"] = "sgi", + ["video/quicktime"] = "qt", + ["video/mp4"] = "mp4", + ["video/3gpp"] = "3gp", + }; +} diff --git a/scripts/main.zeek b/scripts/main.zeek new file mode 100644 index 0000000..6166a5b --- /dev/null +++ b/scripts/main.zeek @@ -0,0 +1,29 @@ +@load ./file-extensions + +module FileExtraction; + +export { + ## Path to store files + const path: string = "" &redef; + ## Hook to include files in extraction + global extract: hook(f: fa_file, meta: fa_metadata); + ## Hook to exclude files from extraction + global ignore: hook(f: fa_file, meta: fa_metadata); +} + +event file_sniff(f: fa_file, meta: fa_metadata) + { + if ( meta?$mime_type && !hook FileExtraction::extract(f, meta) ) + { + if ( !hook FileExtraction::ignore(f, meta) ) + return; + if ( meta$mime_type in mime_to_ext ) + local fext = mime_to_ext[meta$mime_type]; + else + fext = split_string(meta$mime_type, /\//)[1]; + + local fname = fmt("%s%s-%s.%s", path, f$source, f$id, fext); + Files::add_analyzer(f, Files::ANALYZER_EXTRACT, + [$extract_filename=fname]); + } + } diff --git a/scripts/plugins/extract-all-files.zeek b/scripts/plugins/extract-all-files.zeek new file mode 100644 index 0000000..576d18e --- /dev/null +++ b/scripts/plugins/extract-all-files.zeek @@ -0,0 +1,8 @@ +@load ../__load__ + +module FileExtraction; + +hook FileExtraction::extract(f: fa_file, meta: fa_metadata) &priority=10 + { + break; + } diff --git a/scripts/plugins/extract-archive.zeek b/scripts/plugins/extract-archive.zeek new file mode 100644 index 0000000..07f0b04 --- /dev/null +++ b/scripts/plugins/extract-archive.zeek @@ -0,0 +1,24 @@ +@load ../__load__ + +module FileExtraction; + +const archive_types: set[string] = { "application/x-rar-compressed", + "application/x-bzip2", + "application/gzip", + "application/x-lzma", + "application/x-lzip", + "application/x-xz", + "application/x-lzop", + "application/x-compress", + "application/x-7z-compressed", + "application/x-ace-compressed", + "application/vnd.ms-cab-compressed", + "application/x-gtar", + "application/zip", + }; + +hook FileExtraction::extract(f: fa_file, meta: fa_metadata) &priority=5 + { + if ( meta$mime_type in archive_types ) + break; + } diff --git a/scripts/plugins/extract-common-exploit-types.zeek b/scripts/plugins/extract-common-exploit-types.zeek new file mode 100644 index 0000000..e09dc4f --- /dev/null +++ b/scripts/plugins/extract-common-exploit-types.zeek @@ -0,0 +1,6 @@ +@load ../__load__ + +@load ./extract-java +@load ./extract-pe +@load ./extract-ms-office +@load ./extract-pdf diff --git a/scripts/plugins/extract-java.zeek b/scripts/plugins/extract-java.zeek new file mode 100644 index 0000000..e1c7438 --- /dev/null +++ b/scripts/plugins/extract-java.zeek @@ -0,0 +1,15 @@ +@load ../__load__ + +module FileExtraction; + +const java_types: set[string] = { + "application/java-archive", + "application/x-java-applet", + "application/x-java-jnlp-file" + }; + +hook FileExtraction::extract(f: fa_file, meta: fa_metadata) &priority=5 + { + if ( meta$mime_type in java_types ) + break; + } diff --git a/scripts/plugins/extract-ms-office.zeek b/scripts/plugins/extract-ms-office.zeek new file mode 100644 index 0000000..d7e915d --- /dev/null +++ b/scripts/plugins/extract-ms-office.zeek @@ -0,0 +1,15 @@ +@load ../__load__ + +module FileExtraction; + +const office_types: set[string] = { "application/msword", + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "application/vnd.openxmlformats-officedocument.presentationml.presentation", + }; + +hook FileExtraction::extract(f: fa_file, meta: fa_metadata) &priority=5 + { + if ( meta$mime_type in office_types ) + break; + } diff --git a/scripts/plugins/extract-pdf.zeek b/scripts/plugins/extract-pdf.zeek new file mode 100644 index 0000000..4477e4a --- /dev/null +++ b/scripts/plugins/extract-pdf.zeek @@ -0,0 +1,11 @@ +@load ../__load__ + +module FileExtraction; + +const pdf_types: set[string] = { "application/pdf" }; + +hook FileExtraction::extract(f: fa_file, meta: fa_metadata) &priority=5 + { + if ( meta$mime_type in pdf_types ) + break; + } diff --git a/scripts/plugins/extract-pe.zeek b/scripts/plugins/extract-pe.zeek new file mode 100644 index 0000000..0319629 --- /dev/null +++ b/scripts/plugins/extract-pe.zeek @@ -0,0 +1,9 @@ +@load ../__load__ + +module FileExtraction; + +hook FileExtraction::extract(f: fa_file, meta: fa_metadata) &priority=5 + { + if ( meta$mime_type == "application/x-dosexec" ) + break; + } diff --git a/scripts/plugins/store-files-by-md5.zeek b/scripts/plugins/store-files-by-md5.zeek new file mode 100644 index 0000000..8181d11 --- /dev/null +++ b/scripts/plugins/store-files-by-md5.zeek @@ -0,0 +1,19 @@ +@load ../__load__ +@load policy/frameworks/files/hash-all-files + +event file_state_remove(f: fa_file) + { + + if ( !f$info?$extracted || !f$info?$md5 || FileExtraction::path == "" ) + return; + + local orig = f$info$extracted; + + local split_orig = split_string(f$info$extracted, /\./); + local extension = split_orig[|split_orig|-1]; + + local dest = fmt("%s%s-%s.%s", FileExtraction::path, f$source, f$info$md5, extension); + + if(rename(orig, dest)) + f$info$extracted = dest; + } diff --git a/scripts/plugins/store-files-by-sha1.zeek b/scripts/plugins/store-files-by-sha1.zeek new file mode 100644 index 0000000..8a1aca5 --- /dev/null +++ b/scripts/plugins/store-files-by-sha1.zeek @@ -0,0 +1,23 @@ +@load ../__load__ +@load policy/frameworks/files/hash-all-files + +event file_state_remove(f: fa_file) + { + + if ( !f$info?$extracted || !f$info?$sha1 || FileExtraction::path == "" ) + return; + + local orig = f$info$extracted; + + local split_orig = split_string(f$info$extracted, /\./); + local extension = split_orig[|split_orig|-1]; + + local dest = fmt("%s%s-%s.%s", FileExtraction::path, f$source, f$info$sha1, extension); + + local cmd = fmt("mv %s %s", orig, dest); + when ( local result = Exec::run([$cmd=cmd]) ) + { + } + f$info$extracted = dest; + + } diff --git a/scripts/plugins/store-files-by-sha256.zeek b/scripts/plugins/store-files-by-sha256.zeek new file mode 100644 index 0000000..e32383c --- /dev/null +++ b/scripts/plugins/store-files-by-sha256.zeek @@ -0,0 +1,38 @@ +@load ../__load__ +@load policy/frameworks/files/hash-all-files + +event file_sniff(f: fa_file, meta: fa_metadata) + { + + if ( meta?$mime_type && !hook FileExtraction::extract(f, meta) ) + { + + if ( !hook FileExtraction::ignore(f, meta) ) + return; + + Files::add_analyzer(f, Files::ANALYZER_SHA256); + + } + + } + +event file_state_remove(f: fa_file) + { + + if ( !f$info?$extracted || !f$info?$sha256 || FileExtraction::path == "" ) + return; + + local orig = f$info$extracted; + + local split_orig = split_string(f$info$extracted, /\./); + local extension = split_orig[|split_orig|-1]; + + local dest = fmt("%s%s-%s.%s", FileExtraction::path, f$source, f$info$sha256, extension); + + local cmd = fmt("mv %s %s", orig, dest); + when ( local result = Exec::run([$cmd=cmd]) ) + { + } + f$info$extracted = dest; + + } diff --git a/zkg.meta b/zkg.meta new file mode 100644 index 0000000..0fab3a7 --- /dev/null +++ b/zkg.meta @@ -0,0 +1,8 @@ +[package] +description = Extract files from network traffic with Zeek. +tags = files, file extraction, file analysis +version = 1.0.0 +script_dir = scripts +config_files = scripts/config.zeek +depends = + zeek >=3.0.0 \ No newline at end of file