#!/bin/bash

## Copyright (C) 2026 - 2026 ENCRYPTED SUPPORT LLC <adrelanos@whonix.org>
## See the file COPYING for copying conditions.

## AI-Assisted

## Fork every selected repository from a source GitHub user/org into
## a target user/org. Idempotent. Optional flags re-apply per-repo
## settings on the target side. Run `github-org-fork --help` for full
## usage.

set -o errexit
set -o nounset
set -o pipefail
set -o errtrace
shopt -s inherit_errexit
shopt -s shift_verbose

# shellcheck source=../libexec/developer-meta-files/github-org-lib.bsh
source /usr/libexec/developer-meta-files/github-org-lib.bsh
## R-082: source every helper-scripts file used directly.
# shellcheck source=../../../helper-scripts/usr/libexec/helper-scripts/log_run_die.sh
source "${HELPER_SCRIPTS_PATH:-}"/usr/libexec/helper-scripts/log_run_die.sh
# shellcheck source=../../../helper-scripts/usr/libexec/helper-scripts/has.sh
source "${HELPER_SCRIPTS_PATH:-}"/usr/libexec/helper-scripts/has.sh
# shellcheck source=../../../helper-scripts/usr/libexec/helper-scripts/strings.bsh
source "${HELPER_SCRIPTS_PATH:-}"/usr/libexec/helper-scripts/strings.bsh

include_private='false'
include_archived='false'
include_forks='false'
include_re=''
exclude_re=''
disable_issues='false'
disable_wiki='false'
disable_projects='false'
actions_state='leave'    ## enable | disable | leave
workflow_perms='leave'   ## read | write | leave
sync_branches='false'
# G-030: --apply or --dry-run required.
dry_run=0
mode_set=0
verbose=0

show_help() {
  cat <<'EOF'
Fork every selected repo from a source GitHub user/org into a target
user/org. Idempotent: existing forks are left in place, only missing
ones are created. Optional flags also (re-)apply per-repo settings on
the target side.

Usage:
  github-org-fork --apply   [OPTIONS] <source-owner> <target-owner>
  github-org-fork --dry-run [OPTIONS] <source-owner> <target-owner>

Options:
  --apply              perform fork creation + per-repo configuration
  --dry-run            report planned actions, do nothing
  --include-private    include private repos       (default: skip)
  --include-archived   include archived repos      (default: skip)
  --include-forks      include forks of other repos (default: skip)
  --include REGEX      only fork repos whose name matches REGEX
  --exclude REGEX      skip forking repos whose name matches REGEX
  --disable-issues     turn off issues on each target repo
  --disable-wiki       turn off wiki on each target repo
  --disable-projects   turn off projects on each target repo
  --actions {enable|disable|leave}
                       set Actions state on each target repo
                       (default: leave settings unchanged)
  --workflow-perms {read|write|leave}
                       set default GITHUB_TOKEN permissions in target
                       workflows (default: leave)
  --sync-branches      after fork-create + configure, fast-forward each
                       target repo's default branch from upstream.
  -v, --verbose        print each API call's effect
  -h, --help           show this help and exit

--include and --exclude can be passed multiple times. All includes and
excludes stack on top of each other.

Auth: ${GITHUB_TOKEN} env var, or ~/.config/github-token with
permissions 0600. The token must have admin access to the
org-ai-assisted org so that github-org-fork can create forks under it.
EOF
}

while [ "$#" -gt 0 ]; do
  case "$1" in
    --debug)
      set -x
      shift
      ;;
    --include-private)
      include_private='true'
      shift
      ;;
    --include-archived)
      include_archived='true'
      shift
      ;;
    --include-forks)
      include_forks='true'
      shift
      ;;
    ## --include / --exclude can each be passed multiple times. All
    ## includes are stacked to produce a whitelist, then all excludes
    ## are stacked to blacklist previously whitelisted values.
    --include)
      [ "$#" -ge 2 ] || die 64 "missing value for --include"
      if [ -z "$2" ]; then
        shift 2
        continue
      fi
      if [ -z "${include_re}" ]; then
        include_re="(${2})"
      else
        include_re+="|(${2})"
      fi
      shift 2
      ;;
    --exclude)
      [ "$#" -ge 2 ] || die 64 "missing value for --exclude"
      if [ -z "$2" ]; then
        shift 2
        continue
      fi
      if [ -z "${exclude_re}" ]; then
        exclude_re="(${2})"
      else
        exclude_re+="|(${2})"
      fi
      shift 2
      ;;
    --disable-issues)
      disable_issues='true'
      shift
      ;;
    --disable-wiki)
      disable_wiki='true'
      shift
      ;;
    --disable-projects)
      disable_projects='true'
      shift
      ;;
    --actions)
      [ "$#" -ge 2 ] || die 64 "missing value for --actions"
      case "$2" in
        enable|disable|leave)
          actions_state="$2"
          ;;
        *)
          die 64 "invalid --actions value: '$2'"
          ;;
      esac
      shift 2
      ;;
    --workflow-perms)
      [ "$#" -ge 2 ] || die 64 "missing value for --workflow-perms"
      case "$2" in
        read|write|leave)
          workflow_perms="$2"
          ;;
        *)
          die 64 "invalid --workflow-perms value: '$2'"
          ;;
      esac
      shift 2
      ;;
    --sync-branches)
      sync_branches='true'
      shift
      ;;
    --apply)
      [ "${mode_set}" -eq 0 ] || die 64 'conflicting mode flags; specify exactly one of --apply / --dry-run'
      mode_set=1
      shift
      ;;
    --dry-run)
      [ "${mode_set}" -eq 0 ] || die 64 'conflicting mode flags; specify exactly one of --apply / --dry-run'
      dry_run=1
      mode_set=1
      shift
      ;;
    -v|--verbose)
      verbose=1
      shift
      ;;
    -h|--help)
      show_help
      exit 0
      ;;
    --)
      shift
      break
      ;;
    -*)
      die 64 "unknown option: '$1'"
      ;;
    *)
      break
      ;;
  esac
done

if [ "$#" -ne 2 ]; then
  show_help >&2
  exit 64
fi

source_owner="$1"
target_owner="$2"

[ "${mode_set}" -eq 1 ] \
   || { show_help >&2; die 64 'specify exactly one of --apply / --dry-run'; }

ghorg_require_deps
ghorg_validate_name "${source_owner}" user
ghorg_validate_name "${target_owner}" user

## POST /repos/{owner}/{repo}/forks. The `organization` body field
## applies only when the target is an Organization; for a User it
## must be omitted, and GitHub then forks to the *authenticated* user
## regardless of `target_owner`. There is no API parameter to name a
## different user - verify auth user equals target_owner up front and
## refuse otherwise.
fork_one() {
  local repo target_type body auth_user result status

  repo="$1"

  ghorg_validate_name "${repo}" repo || return 1

  target_type="$(ghorg_account_type "${target_owner}")" || return 1

  if [ "${target_type}" = 'Organization' ]; then
    body="$(jq -n --arg org "${target_owner}" -- \
      '{organization: $org, default_branch_only: false}')"
  else
    auth_user="$(ghorg_authenticated_user)" || return 1
    if [ "${auth_user}" != "${target_owner}" ]; then
      log error "target user '${target_owner}' does not match auth user '${auth_user}'; refusing to fork"
      return 1
    fi
    body="$(jq -n -- '{default_branch_only: false}')"
  fi
  if [ "${dry_run}" = '1' ]; then
    log notice "DRY-RUN: fork ${source_owner}/${repo} -> ${target_owner}/${repo}"
    return 0
  fi
  result="$(ghorg_api POST "/repos/${source_owner}/${repo}/forks" "${body}")"
  status="$(ghorg_status_of "${result}")"
  case "${status}" in
    ## Docs document 202 only; 201 is also observed in practice when
    ## the fork was already in-flight. Accept both.
    202|201)
      log notice "forked: ${source_owner}/${repo} -> ${target_owner}/${repo}"
      ;;
    *)
      log error "fork '${source_owner}/${repo}': HTTP '${status}'"
      return 1
      ;;
  esac
}

## When a target repo with the given name already exists, look up its
## parent.full_name; if the parent isn't the source we're processing,
## the operator has two different upstream repos collapsed onto one
## mirror name. Warn but don't fail.
check_collision() {
  local repo parent expected

  repo="$1"

  parent="$(ghorg_repo_parent "${target_owner}" "${repo}")" || return 0
  expected="${source_owner}/${repo}"

  if [ -n "${parent}" ] && [ "${parent}" != "${expected}" ]; then
    log warn "collision: '${target_owner}/${repo}' exists as fork of '${parent}', expected '${expected}'"
  fi
}

## Apply optional --disable-* / --actions / --workflow-perms flags to
## a single target repo. Idempotent and skipped when no flags were
## passed.
configure_one() {
  local repo patch enabled body result status response_body

  repo="$1"

  if [ "${disable_issues}" = 'true' ] \
    || [ "${disable_wiki}" = 'true' ] \
    || [ "${disable_projects}" = 'true' ]; then
    patch='{}'
    [ "${disable_issues}" = 'true' ] && patch="$(printf '%s' "${patch}" | jq -- '. + {has_issues: false}')"
    [ "${disable_wiki}" = 'true' ] && patch="$(printf '%s' "${patch}" | jq -- '. + {has_wiki: false}')"
    [ "${disable_projects}" = 'true' ] && patch="$(printf '%s' "${patch}" | jq -- '. + {has_projects: false}')"
    if [ "${dry_run}" = '1' ]; then
      log notice "DRY-RUN: PATCH ${target_owner}/${repo} ${patch}"
    else
      result="$(ghorg_api PATCH "/repos/${target_owner}/${repo}" "${patch}")"
      status="$(ghorg_status_of "${result}")"
      if [ "${status}" != '200' ]; then
        response_body="$(ghorg_body_of "${result}")"
        log warn "PATCH '${target_owner}/${repo}': HTTP '${status}': ${response_body:0:512}"
      fi
      [ "${verbose}" = '1' ] && [ "${status}" = '200' ] && \
        log notice "configured: ${target_owner}/${repo} ${patch}"
    fi
  fi

  if [ "${actions_state}" != 'leave' ]; then
    [ "${actions_state}" = 'enable' ] && enabled='true' || enabled='false'
    body="$(jq -n --argjson e "${enabled}" -- '{enabled: $e, allowed_actions: "all"}')"
    if [ "${dry_run}" = '1' ]; then
      log notice "DRY-RUN: actions ${actions_state} on ${target_owner}/${repo}"
    else
      result="$(ghorg_api PUT "/repos/${target_owner}/${repo}/actions/permissions" "${body}")"
      status="$(ghorg_status_of "${result}")"
      case "${status}" in
        ## Docs document 204 only; 200 is observed in practice. Accept
        ## both.
        204|200) [ "${verbose}" = '1' ] && \
          log notice "actions=${actions_state} on ${target_owner}/${repo}" ;;
        *)
          response_body="$(ghorg_body_of "${result}")"
          log warn "actions ${actions_state} on ${target_owner}/${repo}: HTTP ${status}: ${response_body:0:512}"
          ;;
      esac
    fi
  fi

  if [ "${workflow_perms}" != 'leave' ]; then
    body="$(jq -n --arg p "${workflow_perms}" -- \
      '{default_workflow_permissions: $p, can_approve_pull_request_reviews: false}')"
    if [ "${dry_run}" = '1' ]; then
      log notice "DRY-RUN: workflow-perms ${workflow_perms} on ${target_owner}/${repo}"
    else
      result="$(ghorg_api PUT \
        "/repos/${target_owner}/${repo}/actions/permissions/workflow" "${body}")"
      status="$(ghorg_status_of "${result}")"
      case "${status}" in
        ## Docs document 204 only; 200 is observed in practice. Accept
        ## both.
        204|200) [ "${verbose}" = '1' ] && \
          log notice "workflow-perms=${workflow_perms} on ${target_owner}/${repo}" ;;
        *)
          response_body="$(ghorg_body_of "${result}")"
          log warn "workflow-perms ${workflow_perms} on ${target_owner}/${repo}: HTTP ${status}: ${response_body:0:512}"
          ;;
      esac
    fi
  fi
}

## Fast-forward target_owner/repo's default branch from upstream via
## POST /repos/{owner}/{repo}/merge-upstream:
##   200 - synced (or already up-to-date); .merge_type tells the form.
##   409 - upstream and fork diverged in a way merge-upstream cannot
##         fast-forward (someone pushed to the mirror directly).
##   422 - default branch missing on either side.
## 409/422 are warn so a single repo's divergence does not abort.
##
## Default branch via /repos/{owner}/{repo}.default_branch. Most are
## 'master', some 'main' or other; do not hard-code.
##
## WARNING: This function may cause GitHub to create merge commits
## remotely. It should therefore NEVER be used on official production
## repos.
sync_one() {
  local repo result status info default_branch body merge_type response_body

  repo="$1"

  if [ "${dry_run}" = '1' ]; then
    log notice "DRY-RUN: sync-branches ${target_owner}/${repo}"
    return 0
  fi

  info="$(ghorg_api GET "/repos/${target_owner}/${repo}")" || return 1
  status="$(ghorg_status_of "${info}")"
  if [ "${status}" != '200' ]; then
    log warn "sync-branches: lookup '${target_owner}/${repo}': HTTP '${status}'"
    return 0
  fi
  default_branch="$(ghorg_body_of "${info}" | jq -r -- '.default_branch')"
  ## NOTE: 'null' is a valid branch name. Probably not worth working
  ## around this.
  if [ -z "${default_branch}" ] || [ "${default_branch}" = 'null' ]; then
    log warn "sync-branches: '${target_owner}/${repo}' has no default branch"
    return 0
  fi
  ## Default branch is API-derived; passes into the request body (jq
  ## escapes safely) and into log() (which sanitizes internally).
  ##
  ## Use git's own ref-name validator instead of an inline allowlist.
  ## Branch names can legitimately contain '+' etc.
  ##
  ## 'git check-ref-format' does not support '--' as end-of-options on
  ## any of its forms. Validate as a full refname instead, prefixing
  ## 'refs/heads/' ourselves - the prefix ensures the resulting argv
  ## element never starts with '-'.
  if ! git check-ref-format "refs/heads/${default_branch}" >/dev/null 2>&1; then
    log warn "sync-branches: '${target_owner}/${repo}' default branch invalid: '${default_branch}'"
    return 0
  fi

  body="$(jq -n --arg b "${default_branch}" -- '{branch: $b}')"
  result="$(ghorg_api POST \
    "/repos/${target_owner}/${repo}/merge-upstream" "${body}")" || return 1
  status="$(ghorg_status_of "${result}")"
  case "${status}" in
    200)
      merge_type="$(ghorg_body_of "${result}" | jq -r -- '.merge_type')"
      ## https://docs.github.com/en/rest/branches/branches?apiVersion=2026-03-10
      ## does not document any "merged_commits" return value for the
      ## merge-upstream endpoint.
      [ "${verbose}" = '1' ] && log notice \
        "synced: ${target_owner}/${repo}/${default_branch} merge_type='${merge_type}'"
      ;;
    409)
      response_body="$(ghorg_body_of "${result}")"
      log warn "sync-branches: '${target_owner}/${repo}/${default_branch}' diverged from upstream (HTTP 409): ${response_body:0:512}"
      ;;
    422)
      ## 422 means 'merge-upstream validation failed' - upstream
      ## branch missing, target not a fork, target not behind, etc.
      ## Don't claim a specific cause; let the operator inspect.
      response_body="$(ghorg_body_of "${result}")"
      log warn "sync-branches: '${target_owner}/${repo}/${default_branch}' could not be synced (HTTP 422 validation failed): ${response_body:0:512}"
      ;;
    *)
      response_body="$(ghorg_body_of "${result}")"
      log warn "sync-branches: '${target_owner}/${repo}/${default_branch}': HTTP '${status}': ${response_body:0:512}"
      ;;
  esac
}

## Populates the associative array named in $1 with one key per
## newline-separated repo name in $2.
build_repo_set() {
  local list_text repo_name
  local -n set_name

  check_variable_name "$1" || return 1
  set_name="$1"
  list_text="$2"

  while IFS= read -r repo_name; do
    [ -z "${repo_name}" ] && continue
    set_name["${repo_name}"]='1'
  done <<< "${list_text}"
}

## Poll GET /repos/{owner}/{repo}/commits?per_page=1 until the fork
## is past the empty-repo state (a freshly-POSTed /forks repo
## returns 200 on /repos lookup but 409 on /commits until the
## initial mirror finishes). Bounded by
## GHORG_FORK_READY_TIMEOUT_SECONDS (default 300s); on timeout the
## caller's '|| true' moves on and the next idempotent run picks
## the configure up.
wait_for_fork_ready() {
  local repo result status elapsed wait deadline now

  repo="$1"

  ## Mock-mode tests don't simulate async fork creation; skip so they
  ## don't have to ship a /commits fixture for every forked repo.
  [ "${GHORG_MOCK:-false}" = 'true' ] && return 0

  is_whole_number "${GHORG_FORK_READY_TIMEOUT_SECONDS}" \
    || die 1 "GHORG_FORK_READY_TIMEOUT_SECONDS='${GHORG_FORK_READY_TIMEOUT_SECONDS}' not a whole number"

  deadline=$(( $(date -u +%s) + GHORG_FORK_READY_TIMEOUT_SECONDS ))
  wait=1
  while true; do
    result="$(ghorg_api GET "/repos/${target_owner}/${repo}/commits?per_page=1")" \
      || result=''
    status="$(ghorg_status_of "${result}")"
    if [ "${status}" = '200' ]; then
      [ "${verbose}" = '1' ] && log notice "fork ready: ${target_owner}/${repo}"
      return 0
    fi
    now="$(date -u +%s)"
    if [ "${now}" -ge "${deadline}" ]; then
      log warn "fork-ready wait: '${target_owner}/${repo}' not ready after '${GHORG_FORK_READY_TIMEOUT_SECONDS}'s (last HTTP '${status}'); a later run will pick up the configure"
      return 1
    fi
    elapsed=$(( now - (deadline - GHORG_FORK_READY_TIMEOUT_SECONDS) ))
    [ "${verbose}" = '1' ] && log info "fork-ready wait: ${target_owner}/${repo} HTTP '${status}' after '${elapsed}'s; retry in '${wait}'s"
    sleep -- "${wait}"
    wait=$(( wait * 2 ))
    [ "${wait}" -gt 30 ] && wait=30
  done
}

main() {
  local target_type auth_user source_repos target_repos repo_name
  local -A target_set=()
  local -a missing=() to_configure=()

  ## sync_one calls 'git check-ref-format'.
  die_if_not_has git

  log notice "source: ${source_owner} -> target: ${target_owner}"

  ## If the target is a User, GitHub forks to the authenticated user
  ## regardless of target_owner. Verify equivalence up front.
  target_type="$(ghorg_account_type "${target_owner}")"
  if [ "${target_type}" = 'User' ]; then
    auth_user="$(ghorg_authenticated_user)"
    [ "${auth_user}" = "${target_owner}" ] \
      || die 1 "target user '${target_owner}' does not match auth user '${auth_user}'"
  fi

  source_repos="$(ghorg_list_repos "${source_owner}" \
    "${include_private}" "${include_archived}" "${include_forks}" \
    | ghorg_filter_names "${include_re}" "${exclude_re}" \
    | sort --unique)"
  ## Target side: do NOT exclude forks - target forks ARE the mirrors.
  target_repos="$(ghorg_list_repos "${target_owner}" 'true' 'true' 'true' \
    | sort --unique)"

  if [ -z "${source_repos}" ]; then
    log notice 'no source repos matched.'
    return 0
  fi

  build_repo_set target_set "${target_repos}"
  while IFS= read -r repo_name; do
    [ -z "${repo_name}" ] && continue
    [ -z "${target_set[${repo_name}]:-}" ] && missing+=( "${repo_name}" )
  done <<< "${source_repos}"

  if [ "${#missing[@]}" -eq 0 ]; then
    log notice 'no new forks needed.'
  else
    for repo_name in "${missing[@]}"; do
      fork_one "${repo_name}"
    done
    for repo_name in "${missing[@]}"; do
      wait_for_fork_ready "${repo_name}" || true
    done
  fi

  target_repos="$(ghorg_list_repos "${target_owner}" 'true' 'true' 'true' \
    | sort --unique)"

  target_set=()
  build_repo_set target_set "${target_repos}"
  while IFS= read -r repo_name; do
    [ -z "${repo_name}" ] && continue
    [ -n "${target_set[${repo_name}]:-}" ] && to_configure+=( "${repo_name}" )
  done <<< "${source_repos}"

  ## Best-effort: a single repo's check_collision or configure_one
  ## failing must not abort the whole run. Common transient cause:
  ## right after POST /forks returns 202, the fork's Actions subsystem
  ## may not yet accept PUT /actions/permissions and returns 404/403.
  ## Without '|| true', errexit would skip every subsequent repo. A
  ## second run picks failures up idempotently.
  if [ "${#to_configure[@]}" -gt 0 ]; then
    for repo_name in "${to_configure[@]}"; do
      check_collision "${repo_name}" || true
      configure_one "${repo_name}" || true
      if [ "${sync_branches}" = 'true' ]; then
        sync_one "${repo_name}" || true
      fi
    done
  fi
}

main
