""" A module defining the main Zeek Package Manager interface which supplies methods to interact with and operate on Zeek packages. """ import configparser import copy import filecmp import json import os import pathlib import re import shutil import subprocess import sys import tarfile from collections import deque from urllib.parse import urlparse import git import semantic_version as semver from . import ( LOG, __version__, ) from ._util import ( configparser_section_dict, copy_over_path, delete_path, find_program, get_zeek_info, get_zeek_version, git_checkout, git_clone, git_default_branch, git_pull, git_version_tags, is_sha1, make_dir, make_symlink, normalize_version_tag, read_zeek_config_line, safe_tarfile_extractall, std_encoding, ) from .package import ( BUILTIN_SCHEME, BUILTIN_SOURCE, LEGACY_METADATA_FILENAME, LEGACY_PLUGIN_MAGIC_FILE, LEGACY_PLUGIN_MAGIC_FILE_DISABLED, METADATA_FILENAME, PLUGIN_MAGIC_FILE, PLUGIN_MAGIC_FILE_DISABLED, TRACKING_METHOD_BRANCH, TRACKING_METHOD_COMMIT, TRACKING_METHOD_VERSION, InstalledPackage, Package, PackageInfo, PackageStatus, PackageVersion, aliases, canonical_url, make_builtin_package, name_from_path, ) from .package import ( is_valid_name as is_valid_package_name, ) from .source import AGGREGATE_DATA_FILE, Source from .uservar import ( UserVar, ) class Stage: def __init__(self, manager, state_dir=None): self.manager = manager if state_dir: self.state_dir = state_dir self.clone_dir = os.path.join(self.state_dir, "clones") self.script_dir = os.path.join(self.state_dir, "scripts", "packages") self.plugin_dir = os.path.join(self.state_dir, "plugins", "packages") self.bin_dir = os.path.join(self.state_dir, "bin") else: # Stages not given a test directory are essentially a shortcut to # standard functionality; this doesn't require all directories: self.state_dir = None self.clone_dir = manager.package_clonedir self.script_dir = manager.script_dir self.plugin_dir = manager.plugin_dir self.bin_dir = manager.bin_dir def populate(self): # If we're staging to a temporary location, blow anything existing there # away first. if self.state_dir: delete_path(self.state_dir) make_dir(self.clone_dir) make_dir(self.script_dir) make_dir(self.plugin_dir) make_dir(self.bin_dir) # To preserve %(package_base)s functionality in build/test commands # during staging in testing folders, we need to provide one location # that combines the existing installed packages, plus any under test, # with the latter overriding any already installed ones. We symlink the # real install folders into the staging one. The subsequent cloning of # the packages under test will remove those links as needed. if self.state_dir: with os.scandir(self.manager.package_clonedir) as it: for entry in it: if not entry.is_dir(): continue make_symlink(entry.path, os.path.join(self.clone_dir, entry.name)) def get_subprocess_env(self): zeekpath = os.environ.get("ZEEKPATH") pluginpath = os.environ.get("ZEEK_PLUGIN_PATH") if not (zeekpath and pluginpath): zeek_config = find_program("zeek-config") if zeek_config: cmd = subprocess.Popen( [zeek_config, "--zeekpath", "--plugin_dir"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=1, universal_newlines=True, ) line1 = read_zeek_config_line(cmd.stdout) line2 = read_zeek_config_line(cmd.stdout) if not zeekpath: zeekpath = line1 if not pluginpath: pluginpath = line2 else: return None, 'no "zeek-config" found in PATH' zeekpath = os.path.dirname(self.script_dir) + os.pathsep + zeekpath pluginpath = os.path.dirname(self.plugin_dir) + os.pathsep + pluginpath env = os.environ.copy() env["PATH"] = self.bin_dir + os.pathsep + os.environ.get("PATH", "") env["ZEEKPATH"] = zeekpath env["ZEEK_PLUGIN_PATH"] = pluginpath return env, "" class Manager: """A package manager object performs various operations on packages. It uses a state directory and a manifest file within it to keep track of package sources, installed packages and their statuses. Attributes: sources (dict of str -> :class:`.source.Source`): dictionary package sources keyed by the name given to :meth:`add_source()` installed_pkgs (dict of str -> :class:`.package.InstalledPackage`): a dictionary of installed packaged keyed on package names (the last component of the package's git URL) zeek_dist (str): path to the Zeek source code distribution. This is needed for packages that contain Zeek plugins that need to be built from source code. state_dir (str): the directory where the package manager will a maintain manifest file, package/source git clones, and other persistent state the manager needs in order to operate user_vars (dict of str -> str): dictionary of key-value pairs where the value will be substituted into package build commands in place of the key. backup_dir (str): a directory where the package manager will store backup files (e.g. locally modified package config files) log_dir (str): a directory where the package manager will store misc. logs files (e.g. package build logs) scratch_dir (str): a directory where the package manager performs miscellaneous/temporary file operations script_dir (str): the directory where the package manager will copy each installed package's `script_dir` (as given by its :file:`zkg.meta` or :file:`bro-pkg.meta`). Each package gets a subdirectory within `script_dir` associated with its name. plugin_dir (str): the directory where the package manager will copy each installed package's `plugin_dir` (as given by its :file:`zkg.meta` or :file:`bro-pkg.meta`). Each package gets a subdirectory within `plugin_dir` associated with its name. bin_dir (str): the directory where the package manager will link executables into that are provided by an installed package through `executables` (as given by its :file:`zkg.meta` or :file:`bro-pkg.meta`) source_clonedir (str): the directory where the package manager will clone package sources. Each source gets a subdirectory associated with its name. package_clonedir (str): the directory where the package manager will clone installed packages. Each package gets a subdirectory associated with its name. package_testdir (str): the directory where the package manager will run tests. Each package gets a subdirectory associated with its name. manifest (str): the path to the package manager's manifest file. This file maintains a list of installed packages and their status. autoload_script (str): path to a Zeek script named :file:`packages.zeek` that the package manager maintains. It is a list of ``@load`` for each installed package that is marked as loaded (see :meth:`load()`). autoload_package (str): path to a Zeek :file:`__load__.zeek` script which is just a symlink to `autoload_script`. It's always located in a directory named :file:`packages`, so as long as :envvar:`ZEEKPATH` is configured correctly, ``@load packages`` will load all installed packages that have been marked as loaded. """ def __init__( self, state_dir, script_dir, plugin_dir, zeek_dist="", user_vars=None, bin_dir="", ): """Creates a package manager instance. Args: state_dir (str): value to set the `state_dir` attribute to script_dir (str): value to set the `script_dir` attribute to plugin_dir (str): value to set the `plugin_dir` attribute to zeek_dist (str): value to set the `zeek_dist` attribute to user_vars (dict of str -> str): key-value pair substitutions for use in package build commands. bin_dir (str): value to set the `bin_dir` attribute to. If empty/nil value, defaults to setting `bin_dir` attribute to `/bin`. Raises: OSError: when a package manager state directory can't be created IOError: when a package manager state file can't be created """ LOG.debug("init Manager version %s", __version__) self.sources = {} self.installed_pkgs = {} self._builtin_packages = None # Cached Zeek built-in packages. self._builtin_packages_discovered = False # Flag if discovery even worked. self.zeek_dist = zeek_dist self.state_dir = state_dir self.user_vars = {} if user_vars is None else user_vars self.backup_dir = os.path.join(self.state_dir, "backups") self.log_dir = os.path.join(self.state_dir, "logs") self.scratch_dir = os.path.join(self.state_dir, "scratch") self._script_dir = script_dir self.script_dir = os.path.join(script_dir, "packages") self._plugin_dir = plugin_dir self.plugin_dir = os.path.join(plugin_dir, "packages") self.bin_dir = bin_dir or os.path.join(self.state_dir, "bin") self.source_clonedir = os.path.join(self.state_dir, "clones", "source") self.package_clonedir = os.path.join(self.state_dir, "clones", "package") self.package_testdir = os.path.join(self.state_dir, "testing") self.manifest = os.path.join(self.state_dir, "manifest.json") self.autoload_script = os.path.join(self.script_dir, "packages.zeek") self.autoload_package = os.path.join(self.script_dir, "__load__.zeek") make_dir(self.state_dir) make_dir(self.log_dir) make_dir(self.scratch_dir) make_dir(self.source_clonedir) make_dir(self.package_clonedir) make_dir(self.script_dir) make_dir(self.plugin_dir) make_dir(self.bin_dir) _create_readme(os.path.join(self.script_dir, "README")) _create_readme(os.path.join(self.plugin_dir, "README")) if not os.path.exists(self.manifest): self._write_manifest() prev_script_dir, prev_plugin_dir, prev_bin_dir = self._read_manifest() # Place all Zeek built-in packages into installed packages. for info in self.discover_builtin_packages(): self.installed_pkgs[info.package.name] = InstalledPackage( package=info.package, status=info.status, ) refresh_bin_dir = False # whether we need to updates link in bin_dir relocating_bin_dir = False # whether bin_dir has relocated need_manifest_update = False if os.path.realpath(prev_script_dir) != os.path.realpath(self.script_dir): LOG.info("relocating script_dir %s -> %s", prev_script_dir, self.script_dir) if os.path.exists(prev_script_dir): delete_path(self.script_dir) shutil.move(prev_script_dir, self.script_dir) prev_zeekpath = os.path.dirname(prev_script_dir) for pkg_name in self.installed_pkgs: old_link = os.path.join(prev_zeekpath, pkg_name) new_link = os.path.join(self.zeekpath(), pkg_name) if os.path.lexists(old_link): LOG.info("moving package link %s -> %s", old_link, new_link) shutil.move(old_link, new_link) else: LOG.info("skip moving package link %s -> %s", old_link, new_link) need_manifest_update = True refresh_bin_dir = True if os.path.realpath(prev_plugin_dir) != os.path.realpath(self.plugin_dir): LOG.info("relocating plugin_dir %s -> %s", prev_plugin_dir, self.plugin_dir) if os.path.exists(prev_plugin_dir): delete_path(self.plugin_dir) shutil.move(prev_plugin_dir, self.plugin_dir) need_manifest_update = True refresh_bin_dir = True if prev_bin_dir and os.path.realpath(prev_bin_dir) != os.path.realpath( self.bin_dir, ): LOG.info("relocating bin_dir %s -> %s", prev_bin_dir, self.bin_dir) need_manifest_update = True refresh_bin_dir = True relocating_bin_dir = True if refresh_bin_dir: self._refresh_bin_dir(self.bin_dir) if relocating_bin_dir: self._clear_bin_dir(prev_bin_dir) try: # We try to remove the old bin_dir. That may not succeed in case # it wasn't actually managed by us, but that's ok. os.rmdir(prev_bin_dir) except OSError: pass if need_manifest_update: self._write_manifest() self._write_autoloader() make_symlink("packages.zeek", self.autoload_package) def _write_autoloader(self): """Write the :file:`packages.zeek` loader script. Raises: IOError: if :file:`packages.zeek` loader script cannot be written """ with open(self.autoload_script, "w") as f: content = ( "# WARNING: This file is managed by zkg.\n" "# Do not make direct modifications here.\n" ) for ipkg in self.loaded_packages(): if self.has_scripts(ipkg): content += f"@load ./{ipkg.package.name}\n" f.write(content) def _write_plugin_magic(self, ipkg): """Enables/disables any Zeek plugin included with a package. Zeek's plugin code scans its plugin directories for __zeek_plugin__ magic files, which indicate presence of a plugin directory. When this file does not exist, Zeek does not recognize a plugin. When we're loading a package, this function renames an existing __zeek_plugin__.disabled file to __zeek_plugin__, and vice versa when we're unloading a package. When the package doesn't include a plugin, or when the plugin directory already contains a correctly named magic file, this function does nothing. Until Zeek 6.1, the magic file was named __bro_plugin__. zkg implements a fallback for recognizing the older name so that newer zkg versions continue to work with older Zeek versions for some time longer. """ package_dir = pathlib.Path(self.plugin_dir) / ipkg.package.name magic_paths_enabled = [ package_dir / PLUGIN_MAGIC_FILE, package_dir / LEGACY_PLUGIN_MAGIC_FILE, ] magic_paths_disabled = [ package_dir / PLUGIN_MAGIC_FILE_DISABLED, package_dir / LEGACY_PLUGIN_MAGIC_FILE_DISABLED, ] for path_enabled, path_disabled in zip( magic_paths_enabled, magic_paths_disabled, ): if ipkg.status.is_loaded: if path_disabled.exists(): try: path_disabled.rename(path_enabled) except OSError as exception: LOG.error( "could not enable plugin: %s %s", type(exception).__name__, exception, ) else: if path_enabled.exists(): try: path_enabled.rename(path_disabled) except OSError as exception: LOG.error( "could not disable plugin: %s %s", type(exception).__name__, exception, ) def _read_manifest(self): """Read the manifest file containing the list of installed packages. Returns: tuple: (previous script_dir, previous plugin_dir) Raises: IOError: when the manifest file can't be read """ with open(self.manifest) as f: data = json.load(f) version = data["manifest_version"] pkg_list = data["installed_packages"] self.installed_pkgs = {} for dicts in pkg_list: pkg_dict = dicts["package_dict"] status_dict = dicts["status_dict"] pkg_name = pkg_dict["name"] if version == 0 and "index_data" in pkg_dict: del pkg_dict["index_data"] pkg_dict["canonical"] = True pkg = Package(**pkg_dict) status = PackageStatus(**status_dict) self.installed_pkgs[pkg_name] = InstalledPackage(pkg, status) return data["script_dir"], data["plugin_dir"], data.get("bin_dir", None) def _write_manifest(self): """Writes the manifest file containing the list of installed packages. Raises: IOError: when the manifest file can't be written """ pkg_list = [] for _, installed_pkg in self.installed_pkgs.items(): if installed_pkg.is_builtin(): continue pkg_list.append( { "package_dict": installed_pkg.package.__dict__, "status_dict": installed_pkg.status.__dict__, }, ) data = { "manifest_version": 1, "script_dir": self.script_dir, "plugin_dir": self.plugin_dir, "bin_dir": self.bin_dir, "installed_packages": pkg_list, } with open(self.manifest, "w") as f: json.dump(data, f, indent=2, sort_keys=True) def zeekpath(self): """Return the path where installed package scripts are located. This path can be added to :envvar:`ZEEKPATH` for interoperability with Zeek. """ return os.path.dirname(self.script_dir) def zeek_plugin_path(self): """Return the path where installed package plugins are located. This path can be added to :envvar:`ZEEK_PLUGIN_PATH` for interoperability with Zeek. """ return os.path.dirname(self.plugin_dir) def add_source(self, name, git_url): """Add a git repository that acts as a source of packages. Args: name (str): a short name that will be used to reference the package source. git_url (str): the git URL of the package source Returns: str: empty string if the source is successfully added, else the reason why it failed. """ if name == BUILTIN_SOURCE: return f"{name} is a reserved source name" if name in self.sources: existing_source = self.sources[name] if existing_source.git_url == git_url: LOG.debug('duplicate source "%s"', name) return True return ( f"source already exists with different URL: {existing_source.git_url}" ) clone_path = os.path.join(self.source_clonedir, name) # Support @ in the path to denote the "version" to checkout version = None # Prepend 'ssh://' and replace the first ':' with '/' if git_url # looks like a scp-like URL, e.g. git@github.com:user/repo.git. # urlparse will otherwise parse everything into path and the @ # is confusing the versioning logic. Note that per the git-clone # docs git recognizes scp-style URLs only when there are no slashes # before the first colon. colonidx, slashidx = git_url.find(":"), git_url.find("/") if ( "://" not in git_url and colonidx > 0 and (slashidx == -1 or slashidx > colonidx) ): parse_result = urlparse("ssh://" + git_url.replace(":", "/", 1)) else: parse_result = urlparse(git_url) if parse_result.path and "@" in parse_result.path: git_url, version = git_url.rsplit("@", 1) try: source = Source( name=name, clone_path=clone_path, git_url=git_url, version=version, ) except git.GitCommandError as error: LOG.warning("failed to clone git repo: %s", error) return "failed to clone git repo" else: self.sources[name] = source return "" def source_packages(self): """Return a list of :class:`.package.Package` within all sources.""" rval = [] for _, source in self.sources.items(): rval += source.packages() return rval def discover_builtin_packages(self): """ Discover packages included in Zeek for dependency resolution. This is using Zeek's ``--build-info`` flag and specifically the ``zkg.provides`` entry it contains. Requires Zeek 6.0 and later. Returns: list of :class:`.package.BuiltinPackage`: List of built-in packages. """ if self._builtin_packages is not None: return self._builtin_packages self._builtin_packages = [] try: zeek_executable = get_zeek_info().zeek except LookupError as e: LOG.warning("unable to discover builtin-packages: %s", str(e)) return self._builtin_packages try: build_info_str = subprocess.check_output( [zeek_executable, "--build-info"], stderr=subprocess.DEVNULL, timeout=10, ) build_info = json.loads(build_info_str) except subprocess.CalledProcessError: # Not a warning() due to being a bit noisy. LOG.info("unable to discover built-in packages - requires Zeek 6.0") return self._builtin_packages except json.JSONDecodeError as e: LOG.error("unable to parse Zeek's build info output: %s", str(e)) return self._builtin_packages if "zkg" not in build_info or "provides" not in build_info["zkg"]: LOG.warning("missing zkg.provides entry in zeek --build-info output") return self._builtin_packages self._builtin_packages_discovered = True for p in build_info["zkg"]["provides"]: name, version = p.get("name"), p.get("version") commit = p.get("commit") if not name or not version: LOG.warning("zkg.provides entry missing name or version: %s", repr(p)) continue orig_version = version # The "version" field may not be semantic version compatible. # For example, 1.4.2-68 is parsed as prerelease 68 of 1.4.2, but # from update-changes/git describe, it's 68 commits after 1.4.2. # Deal with that by stripping -68, but leave -rc1 or -dev alone. m = re.match(r"([0-9]+\.[0-9]+\.[0-9]+)-[0-9]+", version) if m: version = m.group(1) LOG.debug( "found built-in package %s with version %s (%s)", name, version, orig_version, ) self._builtin_packages.append( make_builtin_package( name=name, current_version=version, current_hash=commit, ), ) return self._builtin_packages def find_builtin_package(self, pkg_path): """ Find a builtin plugin that matches ``pkg_path``. Args: pkg_path (str): the full git URL of a package or the shortened path/name that refers to it within a package source. Returns: PackageInfo: PackageInfo instance representing a builtin package matching ``pkg_path``. """ pkg_name = name_from_path(pkg_path) for info in self.discover_builtin_packages(): if info.package.matches_path(pkg_name): return info return None def installed_packages(self): """Return list of :class:`.package.InstalledPackage`.""" return [ipkg for _, ipkg in sorted(self.installed_pkgs.items())] def installed_package_dependencies(self): """Return dict of 'package' -> dict of 'dependency' -> 'version'. Package-name / dependency-name / and version-requirement values are all strings. """ return { name: ipkg.package.dependencies() for name, ipkg in self.installed_pkgs.items() } def loaded_packages(self): """Return list of loaded :class:`.package.InstalledPackage`.""" rval = [] for _, ipkg in sorted(self.installed_pkgs.items()): if ipkg.status.is_loaded: rval.append(ipkg) return rval def package_build_log(self, pkg_path): """Return the path to the package manager's build log for a package. Args: pkg_path (str): the full git URL of a package or the shortened path/name that refers to it within a package source. E.g. for a package source called "zeek" with package named "foo" in :file:`alice/zkg.index`, the following inputs may refer to the package: "foo", "alice/foo", or "zeek/alice/foo". """ name = name_from_path(pkg_path) return os.path.join(self.log_dir, f"{name}-build.log") def match_source_packages(self, pkg_path): """Return a list of :class:`.package.Package` that match a given path. Args: pkg_path (str): the full git URL of a package or the shortened path/name that refers to it within a package source. E.g. for a package source called "zeek" with package named "foo" in :file:`alice/zkg.index`, the following inputs may refer to the package: "foo", "alice/foo", or "zeek/alice/foo". """ rval = [] canon_url = canonical_url(pkg_path) for pkg in self.source_packages(): if pkg.matches_path(canon_url): rval.append(pkg) return rval def find_installed_package(self, pkg_path): """Return an :class:`.package.InstalledPackage` if one matches the name. Args: pkg_path (str): the full git URL of a package or the shortened path/name that refers to it within a package source. E.g. for a package source called "zeek" with package named "foo" in :file:`alice/zkg.index`, the following inputs may refer to the package: "foo", "alice/foo", or "zeek/alice/foo". """ pkg_name = name_from_path(pkg_path) return self.installed_pkgs.get(pkg_name) def get_installed_package_dependencies(self, pkg_path): """Return a set of tuples of dependent package names and their version number if pkg_path is an installed package. Args: pkg_path (str): the full git URL of a package or the shortened path/name that refers to it within a package source. E.g. for a package source called "zeek" with package named "foo" in :file:`alice/zkg.index`, the following inputs may refer to the package: "foo", "alice/foo", or "zeek/alice/foo". """ ipkg = self.find_installed_package(pkg_path) if ipkg: return ipkg.package.dependencies() return None def has_scripts(self, installed_pkg): """Return whether a :class:`.package.InstalledPackage` installed scripts. Args: installed_pkg(:class:`.package.InstalledPackage`): the installed package to check for whether it has installed any Zeek scripts. Returns: bool: True if the package has installed Zeek scripts. """ return os.path.exists(os.path.join(self.script_dir, installed_pkg.package.name)) def has_plugin(self, installed_pkg): """Return whether a :class:`.package.InstalledPackage` installed a plugin. Args: installed_pkg(:class:`.package.InstalledPackage`): the installed package to check for whether it has installed a Zeek plugin. Returns: bool: True if the package has installed a Zeek plugin. """ return os.path.exists(os.path.join(self.plugin_dir, installed_pkg.package.name)) def save_temporary_config_files(self, installed_pkg): """Return a list of temporary package config file backups. Args: installed_pkg(:class:`.package.InstalledPackage`): the installed package to save temporary config file backups for. Returns: list of (str, str): tuples that describe the config files backups. The first element is the config file as specified in the package metadata (a file path relative to the package's root directory). The second element is an absolute file system path to where that config file has been copied. It should be considered temporary, so make use of it before doing any further operations on packages. """ import re metadata = installed_pkg.package.metadata config_files = re.split(r",\s*", metadata.get("config_files", "")) if not config_files: return [] pkg_name = installed_pkg.package.name clone_dir = os.path.join(self.package_clonedir, pkg_name) rval = [] for config_file in config_files: config_file_path = os.path.join(clone_dir, config_file) if not os.path.isfile(config_file_path): LOG.info( "package '%s' claims config file at '%s', but it does not exist", pkg_name, config_file, ) continue backup_file = os.path.join(self.scratch_dir, "tmpcfg", config_file) make_dir(os.path.dirname(backup_file)) shutil.copy2(config_file_path, backup_file) rval.append((config_file, backup_file)) return rval def modified_config_files(self, installed_pkg): """Return a list of package config files that the user has modified. Args: installed_pkg(:class:`.package.InstalledPackage`): the installed package to check for whether it has installed any Zeek scripts. Returns: list of (str, str): tuples that describe the modified config files. The first element is the config file as specified in the package metadata (a file path relative to the package's root directory). The second element is an absolute file system path to where that config file is currently installed. """ import re metadata = installed_pkg.package.metadata config_files = re.split(r",\s*", metadata.get("config_files", "")) if not config_files: return [] pkg_name = installed_pkg.package.name script_install_dir = os.path.join(self.script_dir, pkg_name) plugin_install_dir = os.path.join(self.plugin_dir, pkg_name) clone_dir = os.path.join(self.package_clonedir, pkg_name) script_dir = metadata.get("script_dir", "") plugin_dir = metadata.get("plugin_dir", "build") rval = [] for config_file in config_files: their_config_file_path = os.path.join(clone_dir, config_file) if not os.path.isfile(their_config_file_path): LOG.info( "package '%s' claims config file at '%s', but it does not exist", pkg_name, config_file, ) continue if config_file.startswith(plugin_dir): our_config_file_path = os.path.join( plugin_install_dir, config_file[len(plugin_dir) :], ) if not os.path.isfile(our_config_file_path): LOG.info( "package '%s' config file '%s' not found in plugin_dir: %s", pkg_name, config_file, our_config_file_path, ) continue elif config_file.startswith(script_dir): our_config_file_path = os.path.join( script_install_dir, config_file[len(script_dir) :], ) if not os.path.isfile(our_config_file_path): LOG.info( "package '%s' config file '%s' not found in script_dir: %s", pkg_name, config_file, our_config_file_path, ) continue else: # Their config file is outside script/plugin install dirs, # so no way user has it even installed, much less modified. LOG.warning( "package '%s' config file '%s' not within" " plugin_dir or script_dir", pkg_name, config_file, ) continue if not filecmp.cmp(our_config_file_path, their_config_file_path): rval.append((config_file, our_config_file_path)) return rval def backup_modified_files(self, backup_subdir, modified_files): """Creates backups of modified config files Args: modified_files(list of (str, str)): the return value of :meth:`modified_config_files()`. backup_subdir(str): the subdir of `backup_dir` in which Returns: list of str: paths indicating the backup locations. The order of the returned list corresponds directly to the order of `modified_files`. """ import time rval = [] for modified_file in modified_files: config_file = modified_file[0] config_file_dir = os.path.dirname(config_file) install_path = modified_file[1] filename = os.path.basename(install_path) backup_dir = os.path.join(self.backup_dir, backup_subdir, config_file_dir) timestamp = time.strftime(".%Y-%m-%d-%H:%M:%S") backup_path = os.path.join(backup_dir, filename + timestamp) make_dir(backup_dir) shutil.copy2(install_path, backup_path) rval.append(backup_path) return rval class SourceAggregationResults: """The return value of a call to :meth:`.Manager.aggregate_source()`. Attributes: refresh_error (str): an empty string if no overall error occurred in the "refresh" operation, else a description of what wrong package_issues (list of (str, str)): a list of reasons for failing to collect metadata per packages/repository. The first tuple element gives the repository URL in which the problem occurred and the second tuple element describes the failure. """ def __init__(self, refresh_error="", package_issues=None): self.refresh_error = refresh_error self.package_issues = package_issues if package_issues else [] def aggregate_source(self, name, push=False): """Pull latest git info from a package source and aggregate metadata. This is like calling :meth:`refresh_source()` with the *aggregate* arguments set to True. This makes the latest pre-aggregated package metadata available or performs the aggregation locally in order to push it to the actual package source. Locally aggregated data also takes precedence over the source's pre-aggregated data, so it can be useful in the case the operator of the source does not update their pre-aggregated data at a frequent enough interval. Args: name(str): the name of the package source. E.g. the same name used as a key to :meth:`add_source()`. push (bool): whether to push local changes to the aggregated metadata to the remote package source. Returns: :class:`.Manager.SourceAggregationResults`: the results of the refresh/aggregation. """ return self._refresh_source(name, True, push) def refresh_source(self, name, aggregate=False, push=False): """Pull latest git information from a package source. This makes the latest pre-aggregated package metadata available or performs the aggregation locally in order to push it to the actual package source. Locally aggregated data also takes precedence over the source's pre-aggregated data, so it can be useful in the case the operator of the source does not update their pre-aggregated data at a frequent enough interval. Args: name(str): the name of the package source. E.g. the same name used as a key to :meth:`add_source()`. aggregate (bool): whether to perform a local metadata aggregation by crawling all packages listed in the source's index files. push (bool): whether to push local changes to the aggregated metadata to the remote package source. If the `aggregate` flag is set, the data will be pushed after the aggregation is finished. Returns: str: an empty string if no errors occurred, else a description of what went wrong. """ res = self._refresh_source(name, aggregate, push) return res.refresh_error def _refresh_source(self, name, aggregate=False, push=False): """Used by :meth:`refresh_source()` and :meth:`aggregate_source()`.""" if name not in self.sources: return self.SourceAggregationResults("source name does not exist") source = self.sources[name] LOG.debug('refresh "%s": pulling %s', name, source.git_url) aggregate_file = os.path.join(source.clone.working_dir, AGGREGATE_DATA_FILE) agg_file_ours = os.path.join(self.scratch_dir, AGGREGATE_DATA_FILE) agg_file_their_orig = os.path.join( self.scratch_dir, AGGREGATE_DATA_FILE + ".orig", ) delete_path(agg_file_ours) delete_path(agg_file_their_orig) if os.path.isfile(aggregate_file): shutil.copy2(aggregate_file, agg_file_ours) source.clone.git.reset(hard=True) source.clone.git.clean("-f", "-x", "-d") if os.path.isfile(aggregate_file): shutil.copy2(aggregate_file, agg_file_their_orig) try: source.clone.git.fetch("--recurse-submodules=yes") git_pull(source.clone) except git.GitCommandError as error: LOG.error("failed to pull source %s: %s", name, error) return self.SourceAggregationResults( f"failed to pull from remote source: {error}", ) if os.path.isfile(agg_file_ours): if os.path.isfile(aggregate_file): # There's a tracked version of the file after pull. if os.path.isfile(agg_file_their_orig): # We had local modifications to the file. if filecmp.cmp(aggregate_file, agg_file_their_orig): # Their file hasn't changed, use ours. shutil.copy2(agg_file_ours, aggregate_file) LOG.debug( "aggegrate file in source unchanged, restore local one", ) else: # Their file changed, use theirs. LOG.debug("aggegrate file in source changed, discard local one") else: # File was untracked before pull and tracked after, # use their version. LOG.debug("new aggegrate file in source, discard local one") else: # They don't have the file after pulling, so restore ours. shutil.copy2(agg_file_ours, aggregate_file) LOG.debug("no aggegrate file in source, restore local one") aggregation_issues = [] if aggregate: parser = configparser.ConfigParser(interpolation=None) prev_parser = configparser.ConfigParser(interpolation=None) prev_packages = set() if os.path.isfile(aggregate_file): prev_parser.read(aggregate_file) prev_packages = set(prev_parser.sections()) agg_adds = [] agg_mods = [] agg_dels = [] for index_file in source.package_index_files(): urls = [] with open(index_file) as f: urls = [line.rstrip("\n") for line in f] for url in urls: pkg_name = name_from_path(url) clonepath = os.path.join(self.scratch_dir, pkg_name) delete_path(clonepath) try: clone = git_clone(url, clonepath, shallow=True) except git.GitCommandError as error: LOG.warn( "failed to clone %s, skipping aggregation: %s", url, error, ) aggregation_issues.append((url, repr(error))) continue version_tags = git_version_tags(clone) if len(version_tags): version = version_tags[-1] else: version = git_default_branch(clone) try: git_checkout(clone, version) except git.GitCommandError as error: LOG.warn( 'failed to checkout branch/version "%s" of %s, ' "skipping aggregation: %s", version, url, error, ) msg = ( f'failed to checkout branch/version "{version}": {error!r}' ) aggregation_issues.append((url, msg)) continue metadata_file = _pick_metadata_file(clone.working_dir) metadata_parser = configparser.ConfigParser(interpolation=None) invalid_reason = _parse_package_metadata( metadata_parser, metadata_file, ) if invalid_reason: LOG.warn( "skipping aggregation of %s: bad metadata: %s", url, invalid_reason, ) aggregation_issues.append((url, invalid_reason)) continue metadata = _get_package_metadata(metadata_parser) index_dir = os.path.dirname(index_file)[ len(self.source_clonedir) + len(name) + 2 : ] qualified_name = os.path.join(index_dir, pkg_name) parser.add_section(qualified_name) for key, value in sorted(metadata.items()): parser.set(qualified_name, key, value) parser.set(qualified_name, "url", url) parser.set(qualified_name, "version", version) if qualified_name not in prev_packages: agg_adds.append(qualified_name) else: prev_meta = configparser_section_dict( prev_parser, qualified_name, ) new_meta = configparser_section_dict(parser, qualified_name) if prev_meta != new_meta: agg_mods.append(qualified_name) with open(aggregate_file, "w") as f: parser.write(f) agg_dels = list(prev_packages.difference(set(parser.sections()))) adds_str = " (" + ", ".join(sorted(agg_adds)) + ")" if agg_adds else "" mods_str = " (" + ", ".join(sorted(agg_mods)) + ")" if agg_mods else "" dels_str = " (" + ", ".join(sorted(agg_dels)) + ")" if agg_dels else "" LOG.debug( "metadata refresh: %d additions%s, %d changes%s, %d removals%s", len(agg_adds), adds_str, len(agg_mods), mods_str, len(agg_dels), dels_str, ) if push: if os.path.isfile( os.path.join(source.clone.working_dir, AGGREGATE_DATA_FILE), ): source.clone.git.add(AGGREGATE_DATA_FILE) if source.clone.is_dirty(): # There's an assumption here that the dirty state is # due to a metadata refresh. This could be incorrect # if somebody makes local modifications and then runs # the refresh without --aggregate, but it's not clear # why one would use zkg for this as opposed to git # itself. source.clone.git.commit( "--no-verify", "--message", "Update aggregated metadata.", ) LOG.info('committed package source "%s" metadata update', name) source.clone.git.push("--no-verify") return self.SourceAggregationResults("", aggregation_issues) def refresh_installed_packages(self): """Fetch latest git information for installed packages. This retrieves information about outdated packages, but does not actually upgrade their installations. Raises: IOError: if the package manifest file can't be written """ for ipkg in self.installed_packages(): if ipkg.is_builtin(): LOG.debug( 'skipping refresh of built-in package "%s"', ipkg.package.name, ) continue clonepath = os.path.join(self.package_clonedir, ipkg.package.name) clone = git.Repo(clonepath) LOG.debug("fetch package %s", ipkg.package.qualified_name()) try: clone.git.fetch("--recurse-submodules=yes") except git.GitCommandError as error: LOG.warn( "failed to fetch package %s: %s", ipkg.package.qualified_name(), error, ) ipkg.status.is_outdated = _is_clone_outdated( clone, ipkg.status.current_version, ipkg.status.tracking_method, ) self._write_manifest() def upgrade(self, pkg_path): """Upgrade a package to the latest available version. Args: pkg_path (str): the full git URL of a package or the shortened path/name that refers to it within a package source. E.g. for a package source called "zeek" with package named "foo" in :file:`alice/zkg.index`, the following inputs may refer to the package: "foo", "alice/foo", or "zeek/alice/foo". Returns: str: an empty string if package upgrade succeeded else an error string explaining why it failed. Raises: IOError: if the manifest can't be written """ pkg_path = canonical_url(pkg_path) LOG.debug('upgrading "%s"', pkg_path) ipkg = self.find_installed_package(pkg_path) if not ipkg: LOG.info('upgrading "%s": no matching package', pkg_path) return "no such package installed" if ipkg.status.is_pinned: LOG.info('upgrading "%s": package is pinned', pkg_path) return "package is pinned" if not ipkg.status.is_outdated: LOG.info('upgrading "%s": package not outdated', pkg_path) return "package is not outdated" clonepath = os.path.join(self.package_clonedir, ipkg.package.name) clone = git.Repo(clonepath) if ipkg.status.tracking_method == TRACKING_METHOD_VERSION: version_tags = git_version_tags(clone) return self._install(ipkg.package, version_tags[-1]) elif ipkg.status.tracking_method == TRACKING_METHOD_BRANCH: git_pull(clone) return self._install(ipkg.package, ipkg.status.current_version) elif ipkg.status.tracking_method == TRACKING_METHOD_COMMIT: # The above check for whether the installed package is outdated # also should have already caught this situation. return "package is not outdated" else: raise NotImplementedError def remove(self, pkg_path): """Remove an installed package. Args: pkg_path (str): the full git URL of a package or the shortened path/name that refers to it within a package source. E.g. for a package source called "zeek" with package named "foo" in :file:`alice/zkg.index`, the following inputs may refer to the package: "foo", "alice/foo", or "zeek/alice/foo". Returns: bool: True if an installed package was removed, else False. Raises: IOError: if the package manifest file can't be written OSError: if the installed package's directory can't be deleted """ pkg_path = canonical_url(pkg_path) LOG.debug('removing "%s"', pkg_path) ipkg = self.find_installed_package(pkg_path) if not ipkg: LOG.info('removing "%s": could not find matching package', pkg_path) return False if ipkg.is_builtin(): LOG.error('cannot remove built-in package "%s"', pkg_path) return False self.unload(pkg_path) pkg_to_remove = ipkg.package delete_path(os.path.join(self.package_clonedir, pkg_to_remove.name)) delete_path(os.path.join(self.script_dir, pkg_to_remove.name)) delete_path(os.path.join(self.plugin_dir, pkg_to_remove.name)) delete_path(os.path.join(self.zeekpath(), pkg_to_remove.name)) for alias in pkg_to_remove.aliases(): delete_path(os.path.join(self.zeekpath(), alias)) for exe in self._get_executables(pkg_to_remove.metadata): link = os.path.join(self.bin_dir, os.path.basename(exe)) if os.path.islink(link): try: LOG.debug("removing link %s", link) os.unlink(link) except OSError as err: LOG.warn("cannot remove link for %s", err) del self.installed_pkgs[pkg_to_remove.name] self._write_manifest() LOG.debug('removed "%s"', pkg_path) return True def pin(self, pkg_path): """Pin a currently installed package to the currently installed version. Pinned packages are never upgraded when calling :meth:`upgrade()`. Args: pkg_path (str): the full git URL of a package or the shortened path/name that refers to it within a package source. E.g. for a package source called "zeek" with package named "foo" in :file:`alice/zkg.index`, the following inputs may refer to the package: "foo", "alice/foo", or "zeek/alice/foo". Returns: :class:`.package.InstalledPackage`: None if no matching installed package could be found, else the installed package that was pinned. Raises: IOError: when the manifest file can't be written """ pkg_path = canonical_url(pkg_path) LOG.debug('pinning "%s"', pkg_path) ipkg = self.find_installed_package(pkg_path) if not ipkg: LOG.info('pinning "%s": no matching package', pkg_path) return None if ipkg.status.is_pinned: LOG.debug('pinning "%s": already pinned', pkg_path) return ipkg ipkg.status.is_pinned = True self._write_manifest() LOG.debug('pinned "%s"', pkg_path) return ipkg def unpin(self, pkg_path): """Unpin a currently installed package and allow it to be upgraded. Args: pkg_path (str): the full git URL of a package or the shortened path/name that refers to it within a package source. E.g. for a package source called "zeek" with package named "foo" in :file:`alice/zkg.index`, the following inputs may refer to the package: "foo", "alice/foo", or "zeek/alice/foo". Returns: :class:`.package.InstalledPackage`: None if no matching installed package could be found, else the installed package that was unpinned. Raises: IOError: when the manifest file can't be written """ pkg_path = canonical_url(pkg_path) LOG.debug('unpinning "%s"', pkg_path) ipkg = self.find_installed_package(pkg_path) if not ipkg: LOG.info('unpinning "%s": no matching package', pkg_path) return None if not ipkg.status.is_pinned: LOG.debug('unpinning "%s": already unpinned', pkg_path) return ipkg ipkg.status.is_pinned = False self._write_manifest() LOG.debug('unpinned "%s"', pkg_path) return ipkg def load(self, pkg_path): """Mark an installed package as being "loaded". The collection of "loaded" packages is a convenient way for Zeek to more simply load a whole group of packages installed via the package manager. Args: pkg_path (str): the full git URL of a package or the shortened path/name that refers to it within a package source. E.g. for a package source called "zeek" with package named "foo" in :file:`alice/zkg.index`, the following inputs may refer to the package: "foo", "alice/foo", or "zeek/alice/foo". Returns: str: empty string if the package is successfully marked as loaded, else an explanation of why it failed. Raises: IOError: if the loader script or manifest can't be written """ pkg_path = canonical_url(pkg_path) LOG.debug('loading "%s"', pkg_path) ipkg = self.find_installed_package(pkg_path) if not ipkg: LOG.info('loading "%s": no matching package', pkg_path) return "no such package" if ipkg.status.is_loaded: LOG.debug('loading "%s": already loaded', pkg_path) return "" pkg_load_script = os.path.join( self.script_dir, ipkg.package.name, "__load__.zeek", ) if not os.path.exists(pkg_load_script) and not self.has_plugin(ipkg): LOG.debug( 'loading "%s": %s not found and package has no plugin', pkg_path, pkg_load_script, ) return "no __load__.zeek within package script_dir and no plugin included" ipkg.status.is_loaded = True self._write_autoloader() self._write_manifest() self._write_plugin_magic(ipkg) LOG.debug('loaded "%s"', pkg_path) return "" def loaded_package_states(self): """Save "loaded" state for all installed packages. Returns: dict: dictionary of "loaded" status for installed packages """ return { name: ipkg.status.is_loaded for name, ipkg in self.installed_pkgs.items() } def restore_loaded_package_states(self, saved_state): """Restores state for installed packages. Args: saved_state (dict): dictionary of saved "loaded" state for installed packages. """ for pkg_name, ipkg in self.installed_pkgs.items(): if ipkg.status.is_loaded == saved_state[pkg_name]: continue ipkg.status.is_loaded = saved_state[pkg_name] self._write_plugin_magic(ipkg) self._write_autoloader() self._write_manifest() def load_with_dependencies(self, pkg_name, visited=None): """Mark dependent (but previously installed) packages as being "loaded". Args: pkg_name (str): name of the package. visited (set(str)): set of packages visited along the recursive loading Returns: list(str, str): list of tuples containing dependent package name and whether it was marked as loaded or else an explanation of why the loading failed. """ if visited is None: visited = set() ipkg = self.find_installed_package(pkg_name) # skip loading a package if it is not installed. if not ipkg: return [(pkg_name, "Loading dependency failed. Package not installed.")] load_error = self.load(pkg_name) if load_error: return [(pkg_name, load_error)] retval = [] visited.add(pkg_name) for pkg in self.get_installed_package_dependencies(pkg_name): if _is_reserved_pkg_name(pkg): continue if pkg in visited: continue retval += self.load_with_dependencies(pkg, visited) return retval def list_depender_pkgs(self, pkg_path): """List of depender packages. If C depends on B and B depends on A, we represent the dependency chain as C -> B -> A. Thus, package C is dependent on A and B, while package B is dependent on just A. Example representation:: { 'A': set(), 'B': set([A, version_of_A]) 'C': set([B, version_of_B]) } Further, package A is a direct dependee for B (and implicitly for C), while B is a direct depender (and C is an implicit depender) for A. Args: pkg_path (str): the full git URL of a package or the shortened path/name that refers to it within a package source. E.g. for a package source called "zeek" with package named "foo" in :file:`alice/zkg.index`, the following inputs may refer to the package: "foo", "alice/foo", or "zeek/alice/foo". Returns: list: list of depender packages. """ depender_packages, pkg_name = set(), name_from_path(pkg_path) queue = deque([pkg_name]) pkg_dependencies = self.installed_package_dependencies() while queue: item = queue.popleft() for _pkg_name in pkg_dependencies: pkg_dependees = set(pkg_dependencies.get(_pkg_name)) if item in pkg_dependees: # check if there is a cyclic dependency if _pkg_name == pkg_name: return sorted([*list(depender_packages), [pkg_name]]) queue.append(_pkg_name) depender_packages.add(_pkg_name) return sorted(depender_packages) def unload_with_unused_dependers(self, pkg_name): """Unmark dependent (but previously installed packages) as being "loaded". Args: pkg_name (str): name of the package. Returns: list(str, str): list of tuples containing dependent package name and whether it was marked as unloaded or else an explanation of why the unloading failed. Raises: IOError: if the loader script or manifest can't be written """ def _has_all_dependers_unloaded(item, dependers): for depender in dependers: ipkg = self.find_installed_package(depender) if ipkg and ipkg.status.is_loaded: return False return True errors = [] queue = deque([pkg_name]) while queue: item = queue.popleft() deps = self.get_installed_package_dependencies(item) for pkg in deps: if _is_reserved_pkg_name(pkg): continue ipkg = self.find_installed_package(pkg) # it is possible that this dependency has been removed via zkg if not ipkg: errors.append((pkg, "Package not installed.")) return errors if ipkg.status.is_loaded: queue.append(pkg) ipkg = self.find_installed_package(item) # it is possible that this package has been removed via zkg if not ipkg: errors.append((item, "Package not installed.")) return errors if ipkg.status.is_loaded: dep_packages = self.list_depender_pkgs(item) # check if there is a cyclic dependency if item in dep_packages: for dep in dep_packages: if item != dep: ipkg = self.find_installed_package(dep) if ipkg and ipkg.status.is_loaded: self.unload(dep) errors.append((dep, "")) self.unload(item) errors.append((item, "")) continue # check if all dependers are unloaded elif _has_all_dependers_unloaded(item, dep_packages): self.unload(item) errors.append((item, "")) continue # package is in use else: dep_packages = self.list_depender_pkgs(pkg_name) dep_listing = "" for _name in dep_packages: dep_listing += f'"{_name}", ' errors.append( ( item, f"Package is in use by other packages --- {dep_listing[:-2]}.", ), ) return errors return errors def unload(self, pkg_path): """Unmark an installed package as being "loaded". The collection of "loaded" packages is a convenient way for Zeek to more simply load a whole group of packages installed via the package manager. Args: pkg_path (str): the full git URL of a package or the shortened path/name that refers to it within a package source. E.g. for a package source called "zeek" with package named "foo" in :file:`alice/zkg.index`, the following inputs may refer to the package: "foo", "alice/foo", or "zeek/alice/foo". Returns: bool: True if a package is successfully unmarked as loaded. Raises: IOError: if the loader script or manifest can't be written """ pkg_path = canonical_url(pkg_path) LOG.debug('unloading "%s"', pkg_path) ipkg = self.find_installed_package(pkg_path) if not ipkg: LOG.info('unloading "%s": no matching package', pkg_path) return False if not ipkg.status.is_loaded: LOG.debug('unloading "%s": already unloaded', pkg_path) return True ipkg.status.is_loaded = False self._write_autoloader() self._write_manifest() self._write_plugin_magic(ipkg) LOG.debug('unloaded "%s"', pkg_path) return True def bundle_info(self, bundle_file): """Retrieves information on all packages contained in a bundle. Args: bundle_file (str): the path to the bundle to inspect. Returns: (str, list of (str, str, :class:`.package.PackageInfo`)): a tuple with the the first element set to an empty string if the information successfully retrieved, else an error message explaining why the bundle file was invalid. The second element of the tuple is a list containing information on each package contained in the bundle: the exact git URL and version string from the bundle's manifest along with the package info object retrieved by inspecting git repo contained in the bundle. """ LOG.debug('getting bundle info for file "%s"', bundle_file) bundle_dir = os.path.join(self.scratch_dir, "bundle") delete_path(bundle_dir) make_dir(bundle_dir) infos = [] try: safe_tarfile_extractall(bundle_file, bundle_dir) except Exception as error: return (str(error), infos) manifest_file = os.path.join(bundle_dir, "manifest.txt") config = configparser.ConfigParser(delimiters="=") config.optionxform = str if not config.read(manifest_file): return ("invalid bundle: no manifest file", infos) if not config.has_section("bundle"): return ("invalid bundle: no [bundle] section in manifest file", infos) manifest = config.items("bundle") for git_url, version in manifest: package = Package( git_url=git_url, name=git_url.split("/")[-1], canonical=True, ) pkg_path = os.path.join(bundle_dir, package.name) LOG.debug('getting info for bundled package "%s"', package.name) pkg_info = self.info(pkg_path, version=version, prefer_installed=False) infos.append((git_url, version, pkg_info)) return ("", infos) def info(self, pkg_path, version="", prefer_installed=True): """Retrieves information about a package. Args: pkg_path (str): the full git URL of a package or the shortened path/name that refers to it within a package source. E.g. for a package source called "zeek" with package named "foo" in :file:`alice/zkg.index`, the following inputs may refer to the package: "foo", "alice/foo", or "zeek/alice/foo". version (str): may be a git version tag, branch name, or commit hash from which metadata will be pulled. If an empty string is given, then the latest git version tag is used (or the default branch like "main" or "master" if no version tags exist). prefer_installed (bool): if this is set, then the information from any current installation of the package is returned instead of retrieving the latest information from the package's git repo. The `version` parameter is also ignored when this is set as it uses whatever version of the package is currently installed. Returns: A :class:`.package.PackageInfo` object. """ pkg_path = canonical_url(pkg_path) name = name_from_path(pkg_path) if not is_valid_package_name(name): reason = f"Package name {name!r} is not valid." return PackageInfo(Package(git_url=pkg_path), invalid_reason=reason) LOG.debug('getting info on "%s"', pkg_path) # Handle built-in packages like installed packages # but avoid looking up the repository information. bpkg_info = self.find_builtin_package(pkg_path) if prefer_installed and bpkg_info: return bpkg_info ipkg = self.find_installed_package(pkg_path) if prefer_installed and ipkg: status = ipkg.status pkg_name = ipkg.package.name clonepath = os.path.join(self.package_clonedir, pkg_name) clone = git.Repo(clonepath) return _info_from_clone(clone, ipkg.package, status, status.current_version) else: status = None matches = self.match_source_packages(pkg_path) if not matches: package = Package(git_url=pkg_path) try: return self._info(package, status, version) except git.GitCommandError as error: LOG.info( 'getting info on "%s": invalid git repo path: %s', pkg_path, error, ) LOG.info('getting info on "%s": matched no source package', pkg_path) reason = ( "package name not found in sources and also" " not a usable git URL (invalid or inaccessible," " use -vvv for details)" ) return PackageInfo(package=package, invalid_reason=reason, status=status) if len(matches) > 1: matches_string = [match.qualified_name() for match in matches] LOG.info( 'getting info on "%s": matched multiple packages: %s', pkg_path, matches_string, ) reason = ( f'"{pkg_path}" matches multiple packages, ' f"try a more specific name from: {matches_string}" ) return PackageInfo(invalid_reason=reason, status=status) package = matches[0] try: return self._info(package, status, version) except git.GitCommandError as error: LOG.info('getting info on "%s": invalid git repo path: %s', pkg_path, error) reason = "git repository is either invalid or unreachable" return PackageInfo(package=package, invalid_reason=reason, status=status) def _info(self, package, status, version): """Retrieves information about a package. Returns: A :class:`.package.PackageInfo` object. Raises: git.GitCommandError: when failing to clone the package repo """ clonepath = os.path.join(self.scratch_dir, package.name) clone = _clone_package(package, clonepath, version) versions = git_version_tags(clone) if not version: if len(versions): version = versions[-1] else: version = git_default_branch(clone) try: git_checkout(clone, version) except git.GitCommandError: reason = f'no such commit, branch, or version tag: "{version}"' return PackageInfo(package=package, status=status, invalid_reason=reason) LOG.debug('checked out "%s", branch/version "%s"', package, version) return _info_from_clone(clone, package, status, version) def package_versions(self, installed_package): """Returns a list of version number tags available for a package. Args: installed_package (:class:`.package.InstalledPackage`): the package for which version number tags will be retrieved. Returns: list of str: the version number tags. """ name = installed_package.package.name clonepath = os.path.join(self.package_clonedir, name) clone = git.Repo(clonepath) return git_version_tags(clone) def validate_dependencies( self, requested_packages, ignore_installed_packages=False, ignore_suggestions=False, use_builtin_packages=True, ): """Validates package dependencies. Args: requested_packages (list of (str, str)): a list of (package name or git URL, version) string tuples validate. If the version string is empty, the latest available version of the package is used. ignore_installed_packages (bool): whether the dependency analysis should consider installed packages as satisfying dependency requirements. ignore_suggestions (bool): whether the dependency analysis should consider installing dependencies that are marked in another package's 'suggests' metadata field. use_builtin_packages (bool): whether package information from builtin packages is used for dependency resolution. Returns: (str, list of (:class:`.package.PackageInfo`, str, bool)): the first element of the tuple is an empty string if dependency graph was successfully validated, else an error string explaining what is invalid. In the case it was validated, the second element is a list of tuples, each representing a package, where: - The first element is a dependency package that would need to be installed in order to satisfy the dependencies of the requested packages. - The second element of tuples in the list is a version string of the associated package that satisfies dependency requirements. - The third element of the tuples in the list is a boolean value indicating whether the package is included in the list because it's merely suggested by another package. The list will not include any packages that are already installed or that are in the `requested_packages` argument. The list is sorted in dependency order: whenever a dependency in turn has dependencies, those are guaranteed to appear in order in the list. This means that reverse iteration of the list guarantees processing of dependencies prior to the depender packages. """ class Node: def __init__(self, name): self.name = name self.info = None self.requested_version = None # (tracking method, version) self.installed_version = None # (tracking method, version) self.dependers = {} # name -> version, name needs self at version self.dependees = {} # name -> version, self needs name at version self.is_suggestion = False def __str__(self): return ( f"{self.name}\n\t" f"requested: {self.requested_version}\n\t" f"installed: {self.installed_version}\n\t" f"dependers: {self.dependers}\n\t" f"suggestion: {self.is_suggestion}" ) graph = {} # Node.name -> Node, nodes store edges requests = [] # List of Node, just for requested packages # 1. Try to make nodes for everything in the dependency graph... # Add nodes for packages that are requested for installation for name, version in requested_packages: info = self.info(name, version=version, prefer_installed=False) if info.invalid_reason: return ( f'invalid package "{name}": {info.invalid_reason}', [], ) node = Node(info.package.qualified_name()) node.info = info method = node.info.version_type node.requested_version = PackageVersion(method, version) graph[node.name] = node requests.append(node) # Recursively add nodes for all dependencies of requested packages, to_process = copy.copy(graph) while to_process: (_, node) = to_process.popitem() dd = node.info.dependencies(field="depends") ds = node.info.dependencies(field="suggests") if dd is None: return ( f'package "{node.name}" has malformed "depends" field', [], ) all_deps = dd.copy() if not ignore_suggestions: if ds is None: return ( f'package "{node.name}" has malformed "suggests" field', [], ) all_deps.update(ds) for dep_name, _ in all_deps.items(): if dep_name == "zeek": # A zeek node will get added later. continue if dep_name == "zkg": # A zkg node will get added later. continue # Suggestion status propagates to 'depends' field of suggested packages. is_suggestion = node.is_suggestion or ( dep_name in ds and dep_name not in dd ) # If a dependency can be fulfilled by a built-in package # use its PackageInfo directly instead of going through # self.info() to search for it in package sources, where # it may not actually exist. info = None if use_builtin_packages: info = self.find_builtin_package(dep_name) if info is None: info = self.info(dep_name, prefer_installed=False) if info.invalid_reason: return ( f'package "{node.name}" has invalid dependency "{dep_name}": {info.invalid_reason}', [], ) dep_name_orig = dep_name dep_name = info.package.qualified_name() LOG.debug( 'dependency "%s" of "%s" resolved to "%s"', dep_name_orig, node.name, dep_name, ) if dep_name in graph: if graph[dep_name].is_suggestion and not is_suggestion: # Suggestion found to be required by another package. graph[dep_name].is_suggestion = False continue if dep_name in to_process: if to_process[dep_name].is_suggestion and not is_suggestion: # Suggestion found to be required by another package. to_process[dep_name].is_suggestion = False continue node = Node(dep_name) node.info = info node.is_suggestion = is_suggestion graph[node.name] = node to_process[node.name] = node # Add nodes for things that are already installed (including zeek) if not ignore_installed_packages: zeek_version = get_zeek_version() if zeek_version: node = Node("zeek") node.installed_version = PackageVersion( TRACKING_METHOD_VERSION, zeek_version, ) graph["zeek"] = node else: LOG.warning('could not get zeek version: no "zeek-config" in PATH ?') node = Node("zkg") node.installed_version = PackageVersion( TRACKING_METHOD_VERSION, __version__, ) graph["zkg"] = node for ipkg in self.installed_packages(): name = ipkg.package.qualified_name() status = ipkg.status if name not in graph: info = self.info(name, prefer_installed=True) node = Node(name) node.info = info graph[node.name] = node graph[name].installed_version = PackageVersion( status.tracking_method, status.current_version, ) # 2. Fill in the edges of the graph with dependency information. for name, node in graph.items(): if name == "zeek": continue if name == "zkg": continue dd = node.info.dependencies(field="depends") ds = node.info.dependencies(field="suggests") if dd is None: return ( f'package "{node.name}" has malformed "depends" field', [], ) all_deps = dd.copy() if not ignore_suggestions: if ds is None: return ( f'package "{node.name}" has malformed "suggests" field', [], ) all_deps.update(ds) for dep_name, dep_version in all_deps.items(): if dep_name == "zeek": if "zeek" in graph: graph["zeek"].dependers[name] = dep_version node.dependees["zeek"] = dep_version elif dep_name == "zkg": if "zkg" in graph: graph["zkg"].dependers[name] = dep_version node.dependees["zkg"] = dep_version else: for _, dependency_node in graph.items(): if dependency_node.name == "zeek": continue if dependency_node.name == "zkg": continue if dependency_node.info.package.matches_path(dep_name): dependency_node.dependers[name] = dep_version node.dependees[dependency_node.name] = dep_version break # 3. Try to solve for a connected graph with no edge conflicts. # Traverse graph in breadth-first order, starting from artificial root # with all nodes requested by caller as child nodes. nodes_todo = requests # The resulting list of packages required to satisfy dependencies, # in depender -> dependent (i.e., root -> leaves in dependency tree) # order. new_pkgs = [] while nodes_todo: node = nodes_todo.pop(0) for name in node.dependees: nodes_todo.append(graph[name]) # Avoid cyclic dependencies: ensure we traverse these edges only # once. (The graph may well be a dag, so it's okay to encounter # specific nodes repeatedly.) node.dependees = [] if not node.dependers: if node.installed_version: # We can ignore packages alreaday installed if nothing else # depends on them. continue if node.requested_version: # Only the packges requested by the caller have a requested # version. We skip those too if nothing depends on them. continue # A new package nothing depends on -- odd? new_pkgs.append( (node.info, node.info.best_version(), node.is_suggestion), ) continue if node.requested_version: # Check that requested version doesn't conflict with dependers. for depender_name, version_spec in node.dependers.items(): msg, fullfills = node.requested_version.fullfills(version_spec) if not fullfills: return ( f'unsatisfiable dependency: requested "{node.name}" ({node.requested_version.version}),' f' but "{depender_name}" requires {version_spec} ({msg})', new_pkgs, ) elif node.installed_version: # Check that installed version doesn't conflict with dependers. # track_method, required_version = node.installed_version for depender_name, version_spec in node.dependers.items(): msg, fullfills = node.installed_version.fullfills(version_spec) if not fullfills: return ( f'unsatisfiable dependency: "{node.name}" ({node.installed_version.version}) is installed,' f' but "{depender_name}" requires {version_spec} ({msg})', new_pkgs, ) else: # Choose best version that satisfies constraints best_version = None need_branch = False need_version = False def no_best_version_string(node): rval = f'"{node.name}" has no version satisfying dependencies:\n' for depender_name, version_spec in node.dependers.items(): rval += f'\t"{depender_name}" requires: "{version_spec}"\n' return rval for _, version_spec in node.dependers.items(): if version_spec.startswith("branch="): need_branch = True elif version_spec != "*": need_version = True if need_branch and need_version: return (no_best_version_string(node), new_pkgs) if need_branch: branch_name = None for _, version_spec in node.dependers.items(): if version_spec == "*": continue if not branch_name: branch_name = version_spec[len("branch=") :] continue if branch_name != version_spec[len("branch=") :]: return (no_best_version_string(node), new_pkgs) if branch_name: best_version = branch_name else: best_version = node.info.default_branch elif need_version: for version in node.info.versions[::-1]: normal_version = normalize_version_tag(version) req_semver = semver.Version.coerce(normal_version) satisfied = True for depender_name, version_spec in node.dependers.items(): try: semver_spec = semver.Spec(version_spec) except ValueError: return ( f'package "{depender_name}" has invalid semver spec: {version_spec}', new_pkgs, ) if req_semver not in semver_spec: satisfied = False break if satisfied: best_version = version break if not best_version: return (no_best_version_string(node), new_pkgs) else: # Must have been all '*' wildcards or no dependers best_version = node.info.best_version() new_pkgs.append((node.info, best_version, node.is_suggestion)) # Remove duplicate new nodes, preserving their latest (i.e. deepest-in- # tree) occurrences. Traversing the resulting list right-to-left guarantees # that we never visit a node before we've visited all of its dependees. seen_nodes = set() res = [] for it in reversed(new_pkgs): if it[0].package.name in seen_nodes: continue seen_nodes.add(it[0].package.name) res.insert(0, it) return ("", res) def bundle(self, bundle_file, package_list, prefer_existing_clones=False): """Creates a package bundle. Args: bundle_file (str): filesystem path of the zip file to create. package_list (list of (str, str)): a list of (git URL, version) string tuples to put in the bundle. If the version string is empty, the latest available version of the package is used. prefer_existing_clones (bool): if True and the package list contains a package at a version that is already installed, then the existing git clone of that package is put into the bundle instead of cloning from the remote repository. Returns: str: empty string if the bundle is successfully created, else an error string explaining what failed. """ bundle_dir = os.path.join(self.scratch_dir, "bundle") delete_path(bundle_dir) make_dir(bundle_dir) manifest_file = os.path.join(bundle_dir, "manifest.txt") config = configparser.ConfigParser(delimiters="=") config.optionxform = str config.add_section("bundle") # To be placed into the meta section. builtin_packages = [] def match_package_url_and_version(git_url, version): for ipkg in self.installed_packages(): if ipkg.package.git_url != git_url: continue if ipkg.status.current_version != version: continue return ipkg return None for git_url, version in package_list: # Record built-in packages in the bundle's manifest, but # otherwise ignore them silently. if git_url.startswith(BUILTIN_SCHEME): builtin_packages.append((git_url, version)) continue name = name_from_path(git_url) clonepath = os.path.join(bundle_dir, name) config.set("bundle", git_url, version) if prefer_existing_clones: ipkg = match_package_url_and_version(git_url, version) if ipkg: src = os.path.join(self.package_clonedir, ipkg.package.name) shutil.copytree(src, clonepath, symlinks=True) clone = git.Repo(clonepath) clone.git.reset(hard=True) clone.git.clean("-f", "-x", "-d") for modified_config in self.modified_config_files(ipkg): dst = os.path.join(clonepath, modified_config[0]) shutil.copy2(modified_config[1], dst) continue try: git_clone(git_url, clonepath, shallow=(not is_sha1(version))) except git.GitCommandError as error: return f"failed to clone {git_url}: {error}" # Record the built-in packages expected by this bundle (or simply # installed on the source system) in a new [meta] section to aid # debugging. This isn't interpreted, but if unbundle produces # warnings it may proof helpful. if builtin_packages: config.add_section("meta") entries = [] for git_url, version in builtin_packages: entries.append(f"{name_from_path(git_url)}={version}") config.set("meta", "builtin_packages", ",".join(entries)) with open(manifest_file, "w") as f: config.write(f) archive = shutil.make_archive(bundle_dir, "gztar", bundle_dir) delete_path(bundle_file) shutil.move(archive, bundle_file) return "" def unbundle(self, bundle_file): """Installs all packages contained within a bundle. Args: bundle_file (str): the path to the bundle to install. Returns: str: an empty string if the operation was successful, else an error message indicated what went wrong. """ LOG.debug('unbundle "%s"', bundle_file) bundle_dir = os.path.join(self.scratch_dir, "bundle") delete_path(bundle_dir) make_dir(bundle_dir) try: safe_tarfile_extractall(bundle_file, bundle_dir) except Exception as error: return str(error) manifest_file = os.path.join(bundle_dir, "manifest.txt") config = configparser.ConfigParser(delimiters="=") config.optionxform = str if not config.read(manifest_file): return "invalid bundle: no manifest file" if not config.has_section("bundle"): return "invalid bundle: no [bundle] section in manifest file" manifest = config.items("bundle") for git_url, version in manifest: package = Package( git_url=git_url, name=git_url.split("/")[-1], canonical=True, ) # Prepare the clonepath with the contents from the bundle. clonepath = os.path.join(self.package_clonedir, package.name) delete_path(clonepath) shutil.move(os.path.join(bundle_dir, package.name), clonepath) LOG.debug('unbundle installing "%s"', package.name) error = self._install(package, version, use_existing_clone=True) if error: return error # For all the packages that we've just unbundled, verify that their # dependencies are fulfilled through installed packages or built-in # packages and log a warning if not. # # Possible reasons are built-in packages on the source system missing # on the destination system or usage of --nodeps when creating the bundle. for git_url, _ in manifest: deps = self.get_installed_package_dependencies(git_url) if deps is None: LOG.warning('package "%s" not installed?', git_url) continue for dep, version_spec in deps.items(): ipkg = self.find_installed_package(dep) if ipkg is None: LOG.warning('dependency "%s" of bundled "%s" missing', dep, git_url) continue msg, fullfills = ipkg.fullfills(version_spec) if not fullfills: LOG.warning( 'dependency "%s" (%s) of "%s" not compatible with "%s"', dep, ipkg.status.current_version, git_url, version_spec, ) return "" def test(self, pkg_path, version="", test_dependencies=False): """Test a package. Args: pkg_path (str): the full git URL of a package or the shortened path/name that refers to it within a package source. E.g. for a package source called "zeek" with package named "foo" in :file:`alice/zkg.index`, the following inputs may refer to the package: "foo", "alice/foo", or "zeek/alice/foo". version (str): if not given, then the latest git version tag is used (or if no version tags exist, the default branch like "main" or "master" is used). If given, it may be either a git version tag or a git branch name. test_dependencies (bool): if True, any dependencies required for the given package will also get tested. Off by default, meaning such dependencies will get locally built and staged, but not tested. Returns: (str, bool, str): a tuple containing an error message string, a boolean indicating whether the tests passed, as well as a path to the directory in which the tests were run. In the case where tests failed, the directory can be inspected to figure out what went wrong. In the case where the error message string is not empty, the error message indicates the reason why tests could not be run. Absence of a test_command in the requested package is considered an error. """ pkg_path = canonical_url(pkg_path) LOG.debug('testing "%s"', pkg_path) pkg_info = self.info(pkg_path, version=version, prefer_installed=False) if pkg_info.invalid_reason: return (pkg_info.invalid_reason, "False", "") if "test_command" not in pkg_info.metadata: return ("Package does not specify a test_command", False, "") if not version: version = pkg_info.metadata_version package = pkg_info.package stage = Stage(self, os.path.join(self.package_testdir, package.name)) stage.populate() request = [(package.qualified_name(), version)] invalid_deps, new_pkgs = self.validate_dependencies(request, False) if invalid_deps: return (invalid_deps, False, stage.state_dir) env, err = stage.get_subprocess_env() if env is None: LOG.warning("%s when running tests for %s", err, package.name) return (err, False, stage.state_dir) pkgs = [] pkgs.append((pkg_info, version)) for info, version, _ in new_pkgs: pkgs.append((info, version)) # Clone all packages, checkout right version, and build/install to # staging area. for info, version in reversed(pkgs): LOG.debug( 'preparing "%s" for testing: version %s', info.package.name, version, ) clonepath = os.path.join(stage.clone_dir, info.package.name) # After we prepared the stage, the clonepath might exist (as a # symlink to the installed-version package clone) if we're testing # an alternative version of an installed package. Remove the # symlink. if os.path.islink(clonepath): delete_path(clonepath) try: clone = _clone_package(info.package, clonepath, version) except git.GitCommandError as error: LOG.warning("failed to clone git repo: %s", error) return ( f"failed to clone {info.package.git_url}", False, stage.state_dir, ) try: git_checkout(clone, version) except git.GitCommandError as error: LOG.warning("failed to checkout git repo version: %s", error) return ( f"failed to checkout {version} of {info.package.git_url}", False, stage.state_dir, ) fail_msg = self._stage(info.package, version, clone, stage, env) if fail_msg: return (fail_msg, False, self.state_dir) # Finally, run tests (with correct environment set) if test_dependencies: test_pkgs = pkgs else: test_pkgs = [(pkg_info, version)] for info, _ in reversed(test_pkgs): LOG.info('testing "%s"', package) # Interpolate the test command: metadata, invalid_reason = self._interpolate_package_metadata( info.metadata, stage, ) if invalid_reason: return (invalid_reason, False, stage.state_dir) if "test_command" not in metadata: LOG.info( 'Skipping unit tests for "%s": no test_command in metadata', info.package.qualified_name(), ) continue test_command = metadata["test_command"] cwd = os.path.join(stage.clone_dir, info.package.name) outfile = os.path.join(cwd, "zkg.test_command.stdout") errfile = os.path.join(cwd, "zkg.test_command.stderr") LOG.debug( 'running test_command for %s with cwd="%s", PATH="%s",' ' and ZEEKPATH="%s": %s', info.package.name, cwd, env["PATH"], env["ZEEKPATH"], test_command, ) with open(outfile, "w") as test_stdout, open(errfile, "w") as test_stderr: cmd = subprocess.Popen( test_command, shell=True, cwd=cwd, env=env, stdout=test_stdout, stderr=test_stderr, ) rc = cmd.wait() if rc != 0: return ( f"test_command failed with exit code {rc}", False, stage.state_dir, ) return ("", True, stage.state_dir) def _get_executables(self, metadata): return metadata.get("executables", "").split() def _stage(self, package, version, clone, stage, env=None): """Stage a package. Staging is the act of getting a package ready for use at a particular location in the file system, called a "stage". The stage may be the actual installation folders for the system's Zeek distribution, or one purely internal to zkg's stage management when testing a package. The steps involved in staging include cloning and checking out the package at the desired version, building it if it features a build_command, and installing script & plugin folders inside the requested stage. Args: package (:class:`.package.Package`): the package to stage version (str): the git tag, branch name, or commit hash of the package version to stage clone (:class:`git.Repo`): the on-disk clone of the package's git repository. stage (:class:`Stage`): the staging object describing the disk locations for installation. env (dict of str -> str): an optional environment to pass to the child process executing the package's build_command, if any. If None, the current environment is used. Returns: str: empty string if staging succeeded, otherwise an error string explaining why it failed. """ LOG.debug('staging "%s": version %s', package, version) metadata_file = _pick_metadata_file(clone.working_dir) metadata_parser = configparser.ConfigParser(interpolation=None) invalid_reason = _parse_package_metadata(metadata_parser, metadata_file) if invalid_reason: return invalid_reason metadata = _get_package_metadata(metadata_parser) metadata, invalid_reason = self._interpolate_package_metadata(metadata, stage) if invalid_reason: return invalid_reason build_command = metadata.get("build_command", "") if build_command: LOG.debug( 'building "%s": running build_command: %s', package, build_command, ) bufsize = 4096 build = subprocess.Popen( build_command, shell=True, cwd=clone.working_dir, env=env, bufsize=bufsize, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) try: buildlog = self.package_build_log(clone.working_dir) with open(buildlog, "wb") as f: LOG.info( 'installing "%s": writing build log: %s', package, buildlog, ) f.write("=== STDERR ===\n".encode(std_encoding(sys.stderr))) while True: data = build.stderr.read(bufsize) if data: f.write(data) else: break f.write("=== STDOUT ===\n".encode(std_encoding(sys.stdout))) while True: data = build.stdout.read(bufsize) if data: f.write(data) else: break except OSError as error: LOG.warning( 'installing "%s": failed to write build log %s %s: %s', package, buildlog, error.errno, error.strerror, ) returncode = build.wait() if returncode != 0: return f"package build_command failed, see log in {buildlog}" pkg_script_dir = metadata.get("script_dir", "") script_dir_src = os.path.join(clone.working_dir, pkg_script_dir) script_dir_dst = os.path.join(stage.script_dir, package.name) if not os.path.exists(script_dir_src): return f"package's 'script_dir' does not exist: {pkg_script_dir}" pkgload = os.path.join(script_dir_src, "__load__.zeek") if os.path.isfile(pkgload): try: symlink_path = os.path.join( os.path.dirname(stage.script_dir), package.name, ) make_symlink(os.path.join("packages", package.name), symlink_path) for alias in aliases(metadata): symlink_path = os.path.join( os.path.dirname(stage.script_dir), alias, ) make_symlink(os.path.join("packages", package.name), symlink_path) except OSError as exception: error = f"could not create symlink at {symlink_path}" error += f": {type(exception).__name__}: {exception}" return error error = _copy_package_dir( package, "script_dir", script_dir_src, script_dir_dst, self.scratch_dir, ) if error: return error else: if "script_dir" in metadata: return f"no __load__.zeek file found in package's 'script_dir' : {pkg_script_dir}" else: LOG.warning( 'installing "%s": no __load__.zeek in implicit' " script_dir, skipped installing scripts", package, ) pkg_plugin_dir = metadata.get("plugin_dir", "build") plugin_dir_src = os.path.join(clone.working_dir, pkg_plugin_dir) plugin_dir_dst = os.path.join(stage.plugin_dir, package.name) if not os.path.exists(plugin_dir_src): LOG.info( 'installing "%s": package "plugin_dir" does not exist: %s', package, pkg_plugin_dir, ) if pkg_plugin_dir != "build": # It's common for a package to not have build directory for # plugins, so don't error out in that case, just log it. return f"package's 'plugin_dir' does not exist: {pkg_plugin_dir}" error = _copy_package_dir( package, "plugin_dir", plugin_dir_src, plugin_dir_dst, self.scratch_dir, ) if error: return error # Ensure any listed executables exist as advertised. for p in self._get_executables(metadata): full_path = os.path.join(clone.working_dir, p) if not os.path.isfile(full_path): return f"executable '{p}' is missing" if not os.access(full_path, os.X_OK): return f"file '{p}' is not executable" if stage.bin_dir is not None: make_symlink( full_path, os.path.join(stage.bin_dir, os.path.basename(p)), force=True, ) return "" def install(self, pkg_path, version=""): """Install a package. Args: pkg_path (str): the full git URL of a package or the shortened path/name that refers to it within a package source. E.g. for a package source called "zeek" with package named "foo" in :file:`alice/zkg.index`, the following inputs may refer to the package: "foo", "alice/foo", or "zeek/alice/foo". version (str): if not given, then the latest git version tag is installed (or if no version tags exist, the default branch like "main" or "master" is installed). If given, it may be either a git version tag, a git branch name, or a git commit hash. Returns: str: empty string if package installation succeeded else an error string explaining why it failed. Raises: IOError: if the manifest can't be written """ pkg_path = canonical_url(pkg_path) LOG.debug('installing "%s"', pkg_path) ipkg = self.find_installed_package(pkg_path) if ipkg: conflict = ipkg.package if conflict.qualified_name().endswith(pkg_path): LOG.debug('installing "%s": re-install: %s', pkg_path, conflict) clonepath = os.path.join(self.package_clonedir, conflict.name) _clone_package(conflict, clonepath, version) return self._install(conflict, version) else: LOG.info( 'installing "%s": matched already installed package: %s', pkg_path, conflict, ) return f'package with name "{conflict.name}" ({conflict}) is already installed' matches = self.match_source_packages(pkg_path) if not matches: try: package = Package(git_url=pkg_path) return self._install(package, version) except git.GitCommandError as error: LOG.info('installing "%s": invalid git repo path: %s', pkg_path, error) LOG.info('installing "%s": matched no source package', pkg_path) return "package not found in sources and also not a valid git URL" if len(matches) > 1: matches_string = [match.qualified_name() for match in matches] LOG.info( 'installing "%s": matched multiple packages: %s', pkg_path, matches_string, ) return ( f'"{pkg_path}" matches multiple packages, ' f"try a more specific name from: {matches_string}" ) try: return self._install(matches[0], version) except git.GitCommandError as error: LOG.warning('installing "%s": source package git repo is invalid', pkg_path) return f'failed to clone package "{pkg_path}": {error}' def _validate_alias_conflict(self, pkg, metadata_dict): """Check if there's an alias conflict. If any of the installed packages aliases collide with the package's name or its aliases, return a string describing the issue. Args: package (:class:`.package.Package`): the package to be installed metadata_dict (dict): The metadata for the given package. package.metadata may not be valid yet. Returns: str: empty string on success, else descriptive error message. """ package_names = {} alias_names = {} for ipkg in self.installed_packages(): if ipkg.package == pkg: continue qn = ipkg.package.qualified_name() package_names[ipkg.package.name] = qn for ipkg_alias in ipkg.package.aliases(): alias_names[ipkg_alias] = qn # Is the new package's name the same as an existing alias? if pkg.name in alias_names: qn = alias_names[pkg.name] return f'name "{pkg.name}" conflicts with alias from "{qn}"' # Any of the aliases matching another package's name or another alias? for alias in aliases(metadata_dict): if alias in package_names: qn = package_names[alias] return ( f'alias "{alias}" conflicts with name of installed package "{qn}"' ) if alias in alias_names: qn = alias_names[alias] return ( f'alias "{alias}" conflicts with alias of installed package "{qn}"' ) return "" def _install(self, package, version, use_existing_clone=False): """Install a :class:`.package.Package`. Returns: str: empty string if package installation succeeded else an error string explaining why it failed. Raises: git.GitCommandError: if the git repo is invalid IOError: if the package manifest file can't be written """ clonepath = os.path.join(self.package_clonedir, package.name) ipkg = self.find_installed_package(package.name) if use_existing_clone or ipkg: clone = git.Repo(clonepath) else: clone = _clone_package(package, clonepath, version) status = PackageStatus() status.is_loaded = ipkg.status.is_loaded if ipkg else False status.is_pinned = ipkg.status.is_pinned if ipkg else False version_tags = git_version_tags(clone) if version: if _is_commit_hash(clone, version): status.tracking_method = TRACKING_METHOD_COMMIT elif version in version_tags: status.tracking_method = TRACKING_METHOD_VERSION else: branches = _get_branch_names(clone) if version in branches: status.tracking_method = TRACKING_METHOD_BRANCH else: LOG.info( 'branch "%s" not in available branches: %s', version, branches, ) return f'no such branch or version tag: "{version}"' else: if len(version_tags): version = version_tags[-1] status.tracking_method = TRACKING_METHOD_VERSION else: version = git_default_branch(clone) status.tracking_method = TRACKING_METHOD_BRANCH status.current_version = version git_checkout(clone, version) status.current_hash = clone.head.object.hexsha status.is_outdated = _is_clone_outdated(clone, version, status.tracking_method) metadata_file = _pick_metadata_file(clone.working_dir) metadata_parser = configparser.ConfigParser(interpolation=None) invalid_reason = _parse_package_metadata(metadata_parser, metadata_file) if invalid_reason: return invalid_reason raw_metadata = _get_package_metadata(metadata_parser) invalid_reason = self._validate_alias_conflict(package, raw_metadata) if invalid_reason: return invalid_reason # A dummy stage that uses the actual installation folders; # we do not need to populate() it. stage = Stage(self) fail_msg = self._stage(package, version, clone, stage) if fail_msg: return fail_msg if not package.source: # If installing directly from git URL, see if it actually is found # in a package source and fill in those details. for pkg in self.source_packages(): if pkg.git_url == package.git_url: package.source = pkg.source package.directory = pkg.directory package.metadata = pkg.metadata break package.metadata = raw_metadata self.installed_pkgs[package.name] = InstalledPackage(package, status) self._write_manifest() self._refresh_bin_dir(self.bin_dir) LOG.debug('installed "%s"', package) return "" def _interpolate_package_metadata(self, metadata, stage): # This is a bit circular: we need to parse the user variables, if any, # from the metadata before we can substitute them into other package # metadata. requested_user_vars = UserVar.parse_dict(metadata) if requested_user_vars is None: return None, "package has malformed 'user_vars' metadata field" substitutions = { "zeek_dist": self.zeek_dist, "package_base": stage.clone_dir, } substitutions.update(self.user_vars) for uvar in requested_user_vars: val_from_env = os.environ.get(uvar.name()) if val_from_env: substitutions[uvar.name()] = val_from_env if uvar.name() not in substitutions: substitutions[uvar.name()] = uvar.val() # Now apply the substitutions via a new config parser: metadata_parser = configparser.ConfigParser(defaults=substitutions) metadata_parser.read_dict({"package": metadata}) return _get_package_metadata(metadata_parser), None # Ensure we have links in bin_dir for all executables coming with any of # the currently installed packages. def _refresh_bin_dir(self, bin_dir, prev_bin_dir=None): for ipkg in self.installed_pkgs.values(): for exe in self._get_executables(ipkg.package.metadata): # Put symlinks in place that are missing in current directory src = os.path.join(self.package_clonedir, ipkg.package.name, exe) dst = os.path.join(bin_dir, os.path.basename(exe)) if ( not os.path.exists(dst) or not os.path.islink(dst) or os.path.realpath(src) != os.path.realpath(dst) ): LOG.debug("creating link %s -> %s", src, dst) make_symlink(src, dst, force=True) else: LOG.debug("link %s is up to date", dst) # Remove all links in bin_dir that are associated with executables # coming with any of the currently installed package. def _clear_bin_dir(self, bin_dir): for ipkg in self.installed_pkgs.values(): for exe in self._get_executables(ipkg.package.metadata): old = os.path.join(bin_dir, os.path.basename(exe)) if os.path.islink(old): try: os.unlink(old) LOG.debug("removed link %s", old) except Exception: LOG.warn("failed to remove link %s", old) def _get_branch_names(clone): rval = [] for ref in clone.references: branch_name = str(ref.name) if not branch_name.startswith("origin/"): continue rval.append(branch_name.split("origin/")[1]) return rval def _is_version_outdated(clone, version): version_tags = git_version_tags(clone) latest = normalize_version_tag(version_tags[-1]) return normalize_version_tag(version) != latest def _is_branch_outdated(clone, branch): it = clone.iter_commits(f"{branch}..origin/{branch}") num_commits_behind = sum(1 for c in it) return num_commits_behind > 0 def _is_clone_outdated(clone, ref_name, tracking_method): if tracking_method == TRACKING_METHOD_VERSION: return _is_version_outdated(clone, ref_name) elif tracking_method == TRACKING_METHOD_BRANCH: return _is_branch_outdated(clone, ref_name) elif tracking_method == TRACKING_METHOD_COMMIT: return False else: raise NotImplementedError def _is_commit_hash(clone, text): try: commit = clone.commit(text) return commit.hexsha.startswith(text) except Exception: return False def _copy_package_dir(package, dirname, src, dst, scratch_dir): """Copy a directory from a package to its installation location. Returns: str: empty string if package dir copy succeeded else an error string explaining why it failed. """ if not os.path.exists(src): return "" if os.path.isfile(src) and tarfile.is_tarfile(src): tmp_dir = os.path.join(scratch_dir, "untar") delete_path(tmp_dir) make_dir(tmp_dir) try: safe_tarfile_extractall(src, tmp_dir) except Exception as error: return str(error) ld = os.listdir(tmp_dir) if len(ld) != 1: # Apple `tar` might store HFS+ extended metadata in tar files. # These metadata files have the names `._FOO` for each entry `FOO`. # Since we expect a single top-level directory for the extracted # plugin, ignore the metadata file if we see it. ld.sort() if len(ld) == 2 and ld[0] == f"._{ld[1]}": ld = ld[1:] else: return f"failed to copy package {dirname}: invalid tarfile" src = os.path.join(tmp_dir, ld[0]) if not os.path.isdir(src): return f"failed to copy package {dirname}: not a dir or tarfile" def ignore(_, files): rval = [] for f in files: if f in {".git", "bro-pkg.meta", "zkg.meta"}: rval.append(f) return rval try: copy_over_path(src, dst, ignore=ignore) except shutil.Error as error: errors = error.args[0] reasons = "" for err in errors: src, dst, msg = err reason = f"failed to copy {dirname}: {src} -> {dst}: {msg}" reasons += "\n" + reason LOG.warning('installing "%s": %s', package, reason) return f"failed to copy package {dirname}: {reasons}" return "" def _create_readme(file_path): if os.path.exists(file_path): return with open(file_path, "w") as f: f.write("WARNING: This directory is managed by zkg.\n") f.write("Don't make direct modifications to anything within it.\n") def _clone_package(package, clonepath, version): """Clone a :class:`.package.Package` git repo. Returns: git.Repo: the cloned package Raises: git.GitCommandError: if the git repo is invalid """ delete_path(clonepath) shallow = not is_sha1(version) return git_clone(package.git_url, clonepath, shallow=shallow) def _get_package_metadata(parser): metadata = {item[0]: item[1] for item in parser.items("package")} return metadata def _pick_metadata_file(directory): rval = os.path.join(directory, METADATA_FILENAME) if os.path.exists(rval): return rval return os.path.join(directory, LEGACY_METADATA_FILENAME) def _parse_package_metadata(parser, metadata_file): """Return string explaining why metadata is invalid, or '' if valid.""" if not parser.read(metadata_file): LOG.warning("%s: missing metadata file", metadata_file) return ( f"missing {METADATA_FILENAME} (or {LEGACY_METADATA_FILENAME}) metadata file" ) if not parser.has_section("package"): LOG.warning("%s: metadata missing [package]", metadata_file) return f"{os.path.basename(metadata_file)} is missing [package] section" for a in aliases(_get_package_metadata(parser)): if not is_valid_package_name(a): return f'invalid alias "{a}"' return "" _legacy_metadata_warnings = set() def _info_from_clone(clone, package, status, version): """Retrieves information about a package. Returns: A :class:`.package.PackageInfo` object. """ versions = git_version_tags(clone) default_branch = git_default_branch(clone) if _is_commit_hash(clone, version): version_type = TRACKING_METHOD_COMMIT elif version in versions: version_type = TRACKING_METHOD_VERSION else: version_type = TRACKING_METHOD_BRANCH metadata_file = _pick_metadata_file(clone.working_dir) metadata_parser = configparser.ConfigParser(interpolation=None) invalid_reason = _parse_package_metadata(metadata_parser, metadata_file) if invalid_reason: return PackageInfo( package=package, invalid_reason=invalid_reason, status=status, versions=versions, metadata_version=version, version_type=version_type, metadata_file=metadata_file, default_branch=default_branch, ) if ( os.path.basename(metadata_file) == LEGACY_METADATA_FILENAME and package.qualified_name() not in _legacy_metadata_warnings ): LOG.warning( "Package %s is using the legacy bro-pkg.meta metadata file. " "While bro-pkg.meta still functions, it is recommended to " "use zkg.meta instead for future-proofing. Please report this " "to the package maintainers.", package.qualified_name(), ) _legacy_metadata_warnings.add(package.qualified_name()) metadata = _get_package_metadata(metadata_parser) return PackageInfo( package=package, invalid_reason=invalid_reason, status=status, metadata=metadata, versions=versions, metadata_version=version, version_type=version_type, metadata_file=metadata_file, default_branch=default_branch, ) def _is_reserved_pkg_name(name): return name == "zeek" or name == "zkg"