Patrick Kelley 8fd444092b initial
2025-05-07 15:35:15 -04:00

653 lines
22 KiB
C++

// Copyright (c) 2020-now by the Zeek Project. See LICENSE for details.
#pragma once
#include <cxxabi.h>
#include <algorithm>
#include <list>
#include <optional>
#include <set>
#include <string>
#include <string_view>
#include <tuple>
#include <utility>
#include <vector>
#include <hilti/rt/3rdparty/ArticleEnumClass-v2/EnumClass.h>
#include <hilti/rt/exception.h>
#include <hilti/rt/filesystem.h>
#include <hilti/rt/result.h>
#include <hilti/rt/types/set_fwd.h>
#include <hilti/rt/types/time.h>
#include <hilti/rt/types/vector_fwd.h>
/**
* Helper to create runtime type with enum semantics.
*
* This macros should only be used if the enum type needs to default to a value
* other than `Undef` which is usually expected by users. All other cases
* should use `HILTI_RT_ENUM`.
*
* @param name name of the type to create.
* @param default_ enum value to use for default-constructed values.
* @param __VA_ARGS__ comma-separated list of enumerator definitions, either
* identifier or identifier with initializer
*/
#define HILTI_RT_ENUM_WITH_DEFAULT(name, default_, ...) \
struct name { \
enum Value : int64_t { __VA_ARGS__ }; \
constexpr name(int64_t value = default_) noexcept : _value(value) {} \
friend name Enum(Value value) { return name(value); } \
friend constexpr bool operator==(const name& a, const name& b) noexcept { return a.value() == b.value(); } \
friend constexpr bool operator!=(const name& a, const name& b) noexcept { return ! (a == b); } \
friend constexpr bool operator<(const name& a, const name& b) noexcept { return a.value() < b.value(); } \
constexpr int64_t value() const { return _value; } \
int64_t _value; \
}
/**
* Helper to create runtime type with enum semantics with default value `Undef`.
*
* This macro is the preferred way to create runtime types for enums since
* types created with this macro have the usual semantics of defaulting values
* to `Undef`.
*
* @param name name of the type to create.
* @param __VA_ARGS__ comma-separated list of enumerator definitions, either
* identifier or identifier with initializer. This list should contain
* an enumerator `Undef`.
*/
#define HILTI_RT_ENUM(name, ...) HILTI_RT_ENUM_WITH_DEFAULT(name, Undef, __VA_ARGS__)
/**
* On Linux `__thread` is faster than C++'s `thread_local`. However, on macOS
* `__thread` doesn't work. Also see this for a lot of detail:
* https://maskray.me/blog/2021-02-14-all-about-thread-local-storage.
*/
#if defined(__linux__)
#define HILTI_THREAD_LOCAL __thread
#else
#define HILTI_THREAD_LOCAL thread_local
#endif
namespace hilti::rt {
/** Reports an internal error and aborts execution. */
void internalError(std::string_view msg) __attribute__((noreturn));
} // namespace hilti::rt
#undef TINYFORMAT_ERROR
#define TINYFORMAT_ERROR(reason) throw ::hilti::rt::FormattingError(reason)
#include <hilti/rt/3rdparty/tinyformat/tinyformat.h>
#include <hilti/rt/extension-points.h>
#include <hilti/rt/fmt.h>
namespace hilti::rt {
/** Returns a string identifying the version of the runtime library. */
extern std::string version();
/** Dumps a backtrack to stderr and then aborts execution. */
extern void abort_with_backtrace() __attribute__((noreturn));
/** Aborts with an internal error saying we should not be where we are. */
extern void cannot_be_reached() __attribute__((noreturn));
/** Statistics about resource usage. */
struct ResourceUsage {
// Note when changing this, update `resource_usage()`.
double user_time; //< user time since runtime initialization
double system_time; //< system time since runtime initialization
uint64_t memory_heap; //< current size of heap in bytes
uint64_t num_fibers; //< number of fibers currently in use
uint64_t max_fibers; //< high-water mark for number of fibers in use
uint64_t max_fiber_stack_size; //< global high-water mark for fiber stack size
uint64_t cached_fibers; //< number of fibers currently cached for reuse
};
/** Returns statistics about the current resource usage. */
ResourceUsage resource_usage();
/** Returns the value of an environment variable, if set. */
extern std::optional<std::string> getenv(const std::string& name);
/**
* Creates a temporary file in the system temporary directory.
*
* @param prefix prefix to use for the file's basename
* @return a valid path or an error
* */
hilti::rt::Result<hilti::rt::filesystem::path> createTemporaryFile(const std::string& prefix = "");
/** Turns a path into an absolute path with all dots removed. */
hilti::rt::filesystem::path normalizePath(const hilti::rt::filesystem::path& p);
/**
* Returns a string view with all trailing characters of a given set removed.
*
* \note This function is not UTF8-aware.
*/
inline std::string_view rtrim(std::string_view s, const std::string& chars) noexcept {
s.remove_suffix(s.size() -
[](size_t pos) { return pos != std::string_view::npos ? pos + 1 : 0; }(s.find_last_not_of(chars)));
return s;
}
/**
* Returns a string view with all leading characters of a given set removed.
*
* \note This function is not UTF8-aware.
*/
inline std::string_view ltrim(std::string_view s, const std::string& chars) noexcept {
s.remove_prefix(std::min(s.find_first_not_of(chars), s.size()));
return s;
}
/**
* Returns a string view with all leading & trailing characters of a given
* set removed.
*
* \note This function is not UTF8-aware.
*/
inline std::string_view trim(std::string_view s, const std::string& chars) noexcept {
return ltrim(rtrim(s, chars), chars);
}
namespace detail {
constexpr char whitespace_chars[] = " \t\f\v\n\r";
} // namespace detail
/**
* Returns a string view with all trailing white space removed.
*
* \note This function is not UTF8-aware.
*/
inline std::string_view rtrim(std::string_view s) noexcept { return rtrim(s, detail::whitespace_chars); }
/**
* Returns a string view with all leading white space removed.
*
* \note This function is not UTF8-aware.
*/
inline std::string_view ltrim(std::string_view s) noexcept { return ltrim(s, detail::whitespace_chars); }
/**
* Returns a string view with all leading & trailing white space removed.
*
* \note This function is not UTF8-aware.
*/
inline std::string_view trim(std::string_view s) noexcept { return trim(s, detail::whitespace_chars); }
/**
* Splits a string at all occurrences of a delimiter. Successive occurrences
* of the delimiter will be split into multiple pieces.
*
* \note This function is not UTF8-aware.
*/
std::vector<std::string_view> split(std::string_view s, std::string_view delim);
/**
* Splits a string at all occurrences of successive white space.
*
* \note This function is not UTF8-aware.
*/
std::vector<std::string_view> split(std::string_view s);
/**
* Splits a string once at the 1st occurrence of successive whitespace. Leaves
* the 2nd element of the result pair unset if whitespace does not occur.
*
* \note This function is not UTF8-aware.
*/
extern std::pair<std::string, std::string> split1(std::string s);
/**
* Splits a string once at the last occurrence of successive whitespace. Leaves
* the 2nd element of the result pair unset if whitespace does not occur.
* \note This function is not UTF8-aware.
*/
extern std::pair<std::string, std::string> rsplit1(std::string s);
/**
* Splits a string once at the 1st occurrence of a delimiter. Leaves the 2nd
* element of the result pair unset if the delimiter does not occur.
*
* \note This function is not UTF8-aware.
*/
extern std::pair<std::string, std::string> split1(std::string s, const std::string& delim);
/**
* Splits a string once at the last occurrence of a delimiter. Leaves the 1st
* element of the result pair unset if the delimiter does not occur.
*
* \note This function is not UTF8-aware.
*/
extern std::pair<std::string, std::string> rsplit1(std::string s, const std::string& delim);
/**
* Replaces all occurrences of one string with another.
*
* \note This function is not UTF8-aware.
*/
std::string replace(std::string s, std::string_view o, std::string_view n);
/**
* Returns true if a string begins with another.
*
* \note This function is not UTF8-aware.
*/
bool startsWith(std::string_view s, std::string_view prefix);
/**
* Returns true if a string ends with another.
*
* \note This function is not UTF8-aware.
*/
bool endsWith(std::string_view s, std::string_view suffix);
/**
* Python-style enumerate() that returns an iterable yielding pairs `(index,
* val)`. From http://reedbeta.com/blog/python-like-enumerate-in-cpp17/.
*/
template<typename T, typename TIter = decltype(std::begin(std::declval<T>())),
typename = decltype(std::end(std::declval<T>()))>
constexpr auto enumerate(T&& iterable) {
struct iterator {
size_t i;
TIter iter;
bool operator!=(const iterator& other) const { return iter != other.iter; }
void operator++() {
++i;
++iter;
}
auto operator*() const { return std::tie(i, *iter); }
};
struct iterable_wrapper {
T iterable;
auto begin() { return iterator{0, std::begin(iterable)}; }
auto end() { return iterator{0, std::end(iterable)}; }
};
return iterable_wrapper{std::forward<T>(iterable)};
}
/*
* Expands escape sequences in a UTF8 string. The following escape sequences
* are supported:
*
* ============ ============================
* Escape Result
* ============ ============================
* \\ Backslash
* \\n Line feed
* \\r Carriage return
* \\t Tabulator
* \\uXXXX 16-bit Unicode codepoint
* \\UXXXXXXXX 32-bit Unicode codepoint
* \\xXX 8-bit hex value
* ============ ============================
*
* @param str string to expand
* @return A UTF8 string with escape sequences expanded
*/
std::string expandUTF8Escapes(std::string s);
namespace render_style {
/**
* Flags specifying escaping style when rendering raw data for printing. The
* default style renders all non-printable characters as hex escapes (`\xNN`)
* and escapes backslashes with a second backslash. Any specified flags modify
* the default style accordingly.
*/
enum class Bytes {
Default = 0, /**< name for unmodified default style */
EscapeQuotes = (1U << 1U), /**< escape double quotes with backslashes */
UseOctal = (1U << 2U), /**< escape non-printables with `\NNN` instead of `\xNN` */
NoEscapeBackslash = (1U << 3U), /**< do not escape backslashes */
};
/**
* Flags specifying escaping style when rendering UTF8 strings for printing.
* The default style escapes control characters and null bytes with
* corresponding C-style control escapes (e.g., `\n`, `\0`), and escapes any
* backslashes with a second backslash. Any specified flags modify the default
* style accordingly. If not otherwise noted, any escapings are reversible
* through `expandUTF8Escapes()`.
*/
enum class UTF8 {
Default = 0, /**< name for unmodified default style */
EscapeQuotes = (1U << 1U), /**< escape double quotes with backslashes */
NoEscapeBackslash = (1U << 2U), /**< do not escape backslashes; this may leave the result non-reversible */
NoEscapeControl = (1U << 3U), /**< do not escape control characters and null bytes */
NoEscapeHex =
(1U << 4U), /**< do not escape already existing `\xNN` escape codes; this may leave the result non-reversible */
};
} // namespace render_style
} // namespace hilti::rt
enableEnumClassBitmask(hilti::rt::render_style::Bytes); // must be in global scope
enableEnumClassBitmask(hilti::rt::render_style::UTF8); // must be in global scope
namespace hilti::rt {
/*
* Escapes non-printable characters in a raw string. This produces a new
* string that can be reverted by expandEscapes().
*
* @param str string to escape
* @param escape_quotes if true, also escapes quotes characters
* @param use_octal use `\NNN` instead of `\XX` (needed for C++)
* @return escaped string
*/
std::string escapeBytes(std::string_view s, bitmask<render_style::Bytes> style = render_style::Bytes::Default);
/*
* Escapes non-printable and control characters in an UTF8 string. This
* produces a new string that can be reverted by expandEscapes().
*
* @param str string to escape
* @param escape_quotes if true, also escapes quotes characters
* @param escape_control if false, do not escape control characters
* @param keep_hex if true, do not escape our custom "\xYY" escape codes
* @return escaped std::string
*/
std::string escapeUTF8(std::string_view s, bitmask<render_style::UTF8> style = render_style::UTF8::Default);
/**
* Joins elements of a container into a string, using a specified delimiter
* to separate them.
*/
template<typename T>
std::string join(const T& l, const std::string& delim = "") {
std::string result;
bool first = true;
for ( const auto& i : l ) {
if ( not first )
result += delim;
result += std::string(i);
first = false;
}
return result;
}
namespace detail {
/** Helper template to detect whether a type is a `Vector`. */
template<typename T>
struct is_Vector : std::false_type {};
template<typename T, typename Allocator>
struct is_Vector<Vector<T, Allocator>> : std::true_type {};
/** Helper which given some container `C` of `X` returns a default constructed
* container of the same type class as `C` but with element type `Y`. */
template<typename C, typename Y>
constexpr auto transform_result_value(const C&) {
using X = typename C::value_type;
if constexpr ( std::is_same_v<C, std::vector<X>> ) {
return std::vector<Y>();
}
else if constexpr ( std::is_same_v<C, std::set<X>> ) {
return std::set<Y>();
}
else if constexpr ( is_Vector<C>::value ) {
// We do not preserve the allocator since a proper custom one could depend on `Y`.
return Vector<Y>();
}
else if constexpr ( std::is_same_v<C, Set<X>> ) {
return Set<Y>();
}
else
return std::vector<Y>(); // fallback
}
} // namespace detail
/** Applies a function to each element of container. */
template<typename C, typename F>
auto transform(const C& x, F f) {
using Y = typename std::invoke_result_t<F, typename C::value_type&>;
auto y = detail::transform_result_value<C, Y>(x);
std::transform(std::begin(x), std::end(x), std::inserter(y, std::end(y)), f);
return y;
}
class OutOfRange;
/**
* Parses a numerical value from a character sequence into an
* integer. Character sequences can start with `+` or `-` to
* denote the sign.
*
* Users should check the returned iterator to detect how many
* characters were extracted. If the returned iterator is
* different from `s` the extracted numerical value was stored in
* the memory pointed to by `result`; otherwise `result` remains
* unchanged.
*
* @pre The input sequence must not be empty, i.e., we require `s != e`.
* @pre Base must be in the inclusive range [2, 36].
*
* @par s beginning of the input range.
* @par e end of the input range.
* @par base base of the input range.
* @par result address of the memory location to used for storing
* a possible parsed result.
* @return iterator to the first character not used in value
* extraction.
*/
template<class Iter, typename Result>
inline Iter atoi_n(Iter s, Iter e, uint8_t base, Result* result) {
if ( base < 2 || base > 36 )
throw OutOfRange("base for numerical conversion must be between 2 and 36");
if ( s == e )
throw InvalidArgument("cannot decode from empty range");
std::optional<Result> n = std::nullopt;
bool neg = false;
auto it = s;
if ( *it == '-' ) {
neg = true;
++it;
}
else if ( *it == '+' ) {
neg = false;
++it;
}
for ( ; it != e; ++it ) {
auto c = *it;
Result d;
if ( c >= '0' && c < '0' + base )
d = c - '0';
else if ( c >= 'a' && c < 'a' - 10 + base )
d = c - 'a' + 10;
else if ( c >= 'A' && c < 'A' - 10 + base )
d = c - 'A' + 10;
else
break;
n = n.value_or(Result()) * base + d;
}
if ( ! n )
return s;
s = it;
if ( neg )
*result = -*n;
else
*result = *n;
return s;
}
/**
* Computes integer powers
*/
template<typename I1, typename I2>
inline I1 pow(I1 base, I2 exp) {
I1 x = 1;
while ( true ) {
if ( exp & 1 )
x *= base;
exp >>= 1;
if ( ! exp )
break;
base *= base;
}
return x;
}
// Tuple for-each, from
// https://stackoverflow.com/questions/40212085/type-erasure-for-objects-containing-a-stdtuple-in-c11
namespace detail {
template<typename T, typename F, std::size_t... Is>
constexpr auto map_tuple(T&& tup, F& f, std::index_sequence<Is...> /*unused*/) {
return std::make_tuple(f(std::get<Is>(std::forward<T>(tup)))...);
}
} // namespace detail
/** Generic tuple for-each that runs a callback for each element. */
template<typename F, std::size_t I = 0, typename... Ts>
void tuple_for_each(const std::tuple<Ts...>& tup, F func) {
if constexpr ( I == sizeof...(Ts) )
return;
else {
func(std::get<I>(tup));
tuple_for_each<F, I + 1>(tup, func);
}
}
/**
* Applies a transformation function to each element of a tuple, returning a
* new tuple.
*/
template<typename T, typename F, std::size_t TupSize = std::tuple_size_v<std::decay_t<T>>>
constexpr auto map_tuple(T&& tup, F f) {
return detail::map_tuple(std::forward<T>(tup), f, std::make_index_sequence<TupSize>{});
}
/** Available byte orders. */
HILTI_RT_ENUM(ByteOrder, Little, Big, Network, Host, Undef = -1);
/**
* Returns the byte order of the system we're running on. The result is
* either `ByteOrder::Little` or `ByteOrder::Big`.
*/
extern ByteOrder systemByteOrder();
namespace detail::adl {
std::string to_string(const ByteOrder& x, tag /*unused*/);
}
/** Formats a time according to user-specified format string.
*
* This function uses the currently active locale and timezone to format
* values. Formatted strings cannot exceed 128 bytes.
*
* @param format a POSIX-conformant format string, see
* https://pubs.opengroup.org/onlinepubs/009695399/functions/strftime.html
* for the available format specifiers
* @param time timestamp to format
* @return formatted timestamp
* @throw `InvalidArgument` if the timestamp could not be formatted
*/
std::string strftime(const std::string& format, const Time& time);
/** Parse time from string.
*
* This function uses the currently active locale and timezone to parse values.
*
* @param buf string to parse
* @param format format string dictating how to interpret `buf`, see
* https://pubs.opengroup.org/onlinepubs/009695399/functions/strptime.html
* for the available format specifiers.
* @return parsed time value
* @throw `InvalidArgument` if the time value could not be parsed
* `OutOfRange` if the parse time value cannot be represented
*/
Time strptime(const std::string& buf, const std::string& format);
// RAII helper to create a temporary directory.
class TemporaryDirectory {
public:
TemporaryDirectory() {
const auto tmpdir = hilti::rt::filesystem::temp_directory_path();
auto template_ = (tmpdir / "hilti-rt-test-XXXXXX").native();
auto path = ::mkdtemp(template_.data());
if ( ! path )
throw RuntimeError("cannot create temporary directory");
_path = path;
}
TemporaryDirectory(const TemporaryDirectory& other) = delete;
TemporaryDirectory(TemporaryDirectory&& other) noexcept { _path = std::move(other._path); }
~TemporaryDirectory() {
// In general, ignore errors in this function.
std::error_code ec;
if ( ! hilti::rt::filesystem::exists(_path, ec) )
return;
// Make sure we have permissions to remove the directory.
hilti::rt::filesystem::permissions(_path, hilti::rt::filesystem::perms::all, ec);
// The desugared loop contains an iterator increment which could throw (no automagic call of
// `std::filesystem::recursive_directory_iterator::increment`), see LWG3013 for the "fix".
// Ignore errors from that.
try {
for ( const auto& entry : hilti::rt::filesystem::recursive_directory_iterator(_path, ec) )
hilti::rt::filesystem::permissions(entry, hilti::rt::filesystem::perms::all, ec);
} catch ( ... ) {
; // Ignore error.
}
hilti::rt::filesystem::remove_all(_path, ec); // ignore errors
}
const auto& path() const { return _path; }
TemporaryDirectory& operator=(const TemporaryDirectory& other) = delete;
TemporaryDirectory& operator=(TemporaryDirectory&& other) noexcept {
_path = std::move(other._path);
return *this;
}
private:
hilti::rt::filesystem::path _path;
};
// Combine two or more hashes.
template<typename... Hashes>
constexpr std::size_t hashCombine(std::size_t hash1, std::size_t hash2, Hashes... hashes) {
auto result = hash1 ^ (hash2 << 1);
if constexpr ( sizeof...(hashes) > 0 )
return hashCombine(result, hashes...);
else
return result;
}
} // namespace hilti::rt