#! /usr/bin/env python3 # # https://pypi.org/project/argcomplete/#global-completion # PYTHON_ARGCOMPLETE_OK """This script provides tooling to administrate the github.com/zeek organization.""" import argparse import json import os import sys from enum import IntEnum from pathlib import Path # This is the high-level error for API problems, per # https://github.com/fastai/ghapi/issues/138 and confirmed # in manual testing from urllib.error import HTTPError def print_err(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) try: import yaml except ImportError: # PyYAML is optional; we will see below whether we need it as we retrieve # the auth token. pass try: from ghapi.all import GhApi, paged except ImportError: print_err("This requires the Python Github API package: https://ghapi.fast.ai") sys.exit(1) try: # Argcomplete provides command-line completion for users of argparse. # We support it if available, but don't complain when it isn't. import argcomplete except ImportError: pass GITHUB_SERVER = "github.com" GITHUB_ORG = "zeek" # There's a mismatch between how Github shows access levels in the UI vs how # they work in the API. We use the UI-level ones here and provide helper # functions to translate. The discrepancies are "read" being "pull" in the API, # and "write" being "push". The others match. AccessLevel = IntEnum("AccessLevel", ["READ", "TRIAGE", "WRITE", "MAINTAIN", "ADMIN"]) def get_api_token(args): if args.auth_token: return args.auth_token # This is the ghapi-recommended way to configure your token and works # transparently: token = os.getenv("GITHUB_TOKEN") if token: return token # Try somewhat hard to locate gh's config file: gh_host_config = ( Path(os.getenv("XDG_CONFIG_HOME", default=Path.home() / ".config")) / "gh" / "hosts.yml" ) if gh_host_config.is_file(): if "yaml" not in sys.modules: print_err( "This requires PyYAML to use your existing gh config. See: https://pyyaml.org" ) print_err( "Please set the GITHUB_TOKEN environment variable to your Github API token." ) sys.exit(1) try: with open(gh_host_config) as stream: ymldata = yaml.safe_load(stream) return ymldata[GITHUB_SERVER]["oauth_token"] except (OSError, yaml.YAMLError, KeyError) as error: print_err(f"Unexpected gh configuration ({error}).") print_err( "Please set the GITHUB_TOKEN environment variable to your Github API token." ) sys.exit(1) print_err( "Please provide a Github API token via --auth-token or set the GITHUB_TOKEN environment variable" ) sys.exit(1) def get_access_level_string(perms): res = get_access_level(perms) if not res: return None return res.name.lower() def access_level_to_github(level): if level == AccessLevel.ADMIN: return "admin" if level == AccessLevel.MAINTAIN: return "maintain" if level == AccessLevel.WRITE: return "push" if level == AccessLevel.TRIAGE: return "triage" if level == AccessLevel.READ: return "pull" return None def access_level_from_github(level): if level == "admin": return AccessLevel.ADMIN if level == "maintain": return AccessLevel.MAINTAIN if level == "push": return AccessLevel.WRITE if level == "triage": return AccessLevel.TRIAGE if level == "pull": return AccessLevel.READ return None def get_access_level(perm): """Returns the access level for a given user/team as shown on the access settings page of a repo, based on the given set of permissions. This returns an AccessLevel, or None when no access level was identifiable. """ # This is a bit messy because permissions are presented differently in # different corners of the API. Sometimes it's a string, sometimes an # object with individual permission bits. For now just check if it's a # string and handle that separately. if isinstance(perm, str): return access_level_from_github(perm) try: if perm.admin: return AccessLevel.ADMIN if perm.maintain: return AccessLevel.MAINTAIN if perm.push: return AccessLevel.WRITE if perm.triage: return AccessLevel.TRIAGE if perm.pull: return AccessLevel.READ except AttributeError: pass return None def get_all_repos(api): """Returns a list of all repos in the GITHUB_ORG, sorted by name.""" repos = [] for page in paged(api.repos.list_for_org, org=GITHUB_ORG, per_page=100): repos.extend(page) repos.sort(key=lambda repo: repo.name) return repos def cmd_repos_list_access(api, args): result = [] for repo in get_all_repos(api): teams = api.repos.list_teams(repo.name) teams_results = [] for team in sorted(teams, key=lambda t: t.name): teams_results.append( { "name": team.name, "access": get_access_level_string(team.permissions), } ) # "direct" here means we don't list each individual member of teams that # also have access to the repo: users = api.repos.list_collaborators(repo.name, affiliation="direct") users_results = [] for user in sorted(users, key=lambda u: u.login): users_results.append( { "name": user.login, "access": get_access_level_string(user.permissions), } ) result.append( { "repo": repo.name, "teams": teams_results, "users": users_results, } ) print(json.dumps(result, sort_keys=True)) return True def cmd_repos_list_redundant_users(api, args): result = [] for repo in get_all_repos(api): teams = api.repos.list_teams(repo.name) teams_data = {} for team in teams: teams_data[team.name] = { "permission": team.permission, "users": [ member.login for member in api.teams.list_members_in_org(GITHUB_ORG, team.slug) ], } # "direct" here means we don't list each individual member of teams that # also have access to the repo: users = api.repos.list_collaborators(repo.name, affiliation="direct") for user in sorted(users, key=lambda u: u.login): for team, data in teams_data.items(): if user.login not in data["users"]: continue # This repo gives access to a team that already includes the # explicitly listed user. See if the user is added with # greater permissions, otherwise this user is redundant. team_access = get_access_level(data["permission"]) user_access = get_access_level(user.permissions) if user_access <= team_access: result.append( { "repo": repo.name, "user": user.login, "redundancy": team, } ) print(json.dumps(result, sort_keys=True)) return True def cmd_repos_remove_redundant_users(api, args): result = [] outcome = True for repo in get_all_repos(api): teams = api.repos.list_teams(repo.name) teams_data = {} for team in teams: teams_data[team.name] = { "permission": team.permission, "users": [ member.login for member in api.teams.list_members_in_org(GITHUB_ORG, team.slug) ], } # "direct" here means we don't list each individual member of teams that # also have access to the repo: users = api.repos.list_collaborators(repo.name, affiliation="direct") for user in sorted(users, key=lambda u: u.login): for team, data in teams_data.items(): if user.login not in data["users"]: continue # This repo gives access to a team that already includes the # explicitly listed user. See if the user is added with # greater permissions, otherwise this user is redundant. team_access = get_access_level(data["permission"]) user_access = get_access_level(user.permissions) if user_access <= team_access: try: if not args.dry_run: api.repos.remove_collaborator(repo.name, user.login) result.append( { "repo": repo.name, "user": user.login, "success": True, } ) except HTTPError as err: result.append( { "repo": repo.name, "user": user.login, "success": False, "code": err.code, "reason": err.reason, } ) outcome = False print(json.dumps(result, sort_keys=True)) return outcome def cmd_repos_add_team(api, args): if args.access_level not in [level.name.lower() for level in AccessLevel]: print_err( f"The given access level '{args.access_level}' is not valid. See --help.\n" ) return False access_level = access_level_to_github(AccessLevel[args.access_level.upper()]) # Resolve the given team name to a slug: team_slug = None teams = sorted(api.teams.list(GITHUB_ORG), key=lambda t: t.name) for team in teams: if ( team.name.lower() == args.team.lower() or team.slug.lower() == args.team.lower() ): team_slug = team.slug break if not team_slug: print_err( f"Team '{args.team}' is not valid for the '{GITHUB_ORG}' organization." ) print_err(f"Must be one of {', '.join(["'" + t.name + "'" for t in teams])}.") return False result = [] outcome = True for repo in get_all_repos(api): try: if not args.dry_run: api.teams.add_or_update_repo_permissions_in_org( GITHUB_ORG, team_slug, repo.name, access_level ) result.append( { "repo": repo.name, "success": True, } ) except HTTPError as err: result.append( { "repo": repo.name, "success": False, "code": err.code, "reason": err.reason, } ) outcome = False print(json.dumps(result, sort_keys=True)) return outcome def main(): top_parser = argparse.ArgumentParser( description="Helpers for automated Zeek repo administration on Github." ) top_parser.add_argument( "--auth-token", default=None, help="Your Github API token. You can also use the GITHUB_TOKEN environment " "variable to provide this. As a fallback, this script tries to use your " "gh configuration, if available.", ) cmd_parser = top_parser.add_subparsers( title="commands", dest="command", help="See `%(prog)s -h` for per-command usage info.", ) repos_parser = cmd_parser.add_parser( "repos", help="Do something for every repo in the Zeek organization." ) repos_cmd_parser = repos_parser.add_subparsers( title="commands", dest="command", help="See `%(prog)s -h` for per-command usage info.", ) repos_list_access_cmd_parser = repos_cmd_parser.add_parser( "list-access", help="List all users and teams that have access to a repo, along with the access level.", ) repos_list_access_cmd_parser.set_defaults(run_cmd=cmd_repos_list_access) repos_list_redundant_users_cmd_parser = repos_cmd_parser.add_parser( "list-redundant-users", help="List users already covered via teams providing at least the same access level.", ) repos_list_redundant_users_cmd_parser.set_defaults( run_cmd=cmd_repos_list_redundant_users ) repos_remove_redundant_user_cmd_parser = repos_cmd_parser.add_parser( "remove-redundant-users", help="Remove users already included via a team that provides at least the same access level.", ) repos_remove_redundant_user_cmd_parser.set_defaults( run_cmd=cmd_repos_remove_redundant_users ) repos_remove_redundant_user_cmd_parser.add_argument( "--dry-run", action="store_true", help="Don't make any changes, just pretend API calls succeed", ) repos_add_team_cmd_parser = repos_cmd_parser.add_parser( "add-team", help="Add the given team to each repository at the given access level. Teams that already have access are updated to the provided level.", ) repos_add_team_cmd_parser.set_defaults(run_cmd=cmd_repos_add_team) repos_add_team_cmd_parser.add_argument( "--dry-run", action="store_true", help="Don't make any changes, just pretend API calls succeed", ) repos_add_team_cmd_parser.add_argument( "--team", required=True, help="The name or slug of the team. Case-insensitive." ) repos_add_team_cmd_parser.add_argument( "--access-level", required=True, help=f"The access level. One of {', '.join(["'" + level.name.lower() + "'" for level in AccessLevel])}.", ) if "argcomplete" in sys.modules: argcomplete.autocomplete(top_parser) args = top_parser.parse_args() api = GhApi(owner=GITHUB_ORG, token=get_api_token(args)) try: return args.run_cmd(api, args) except HTTPError as err: sys.stderr.write(f"API error at {err.url}: {err.code}, {err.reason}\n") return False except KeyboardInterrupt: return False if __name__ == "__main__": sys.exit(0 if main() else 1)