#!/usr/bin/env bash
#/ Usage: ghe-restore-repositories <host>
#/ Restore repositories from an rsync snapshot of all Git repository data.
#/
#/ Note: This script typically isn't called directly. It's invoked by the
#/ ghe-restore command.
set -e

# Bring in the backup configuration
# shellcheck source=share/github-backup-utils/ghe-backup-config
. "$( dirname "${BASH_SOURCE[0]}" )/ghe-backup-config"

# Check to make sure moreutils parallel is installed and working properly
ghe_parallel_check

# Show usage and bail with no arguments
[ -z "$*" ] && print_usage

bm_start "$(basename $0)"

# Grab host arg
GHE_HOSTNAME="$1"

# The snapshot to restore should be set by the ghe-restore command but this lets
# us run this script directly.
: ${GHE_RESTORE_SNAPSHOT:=current}

network_paths=$(cd $GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/ && find repositories -mindepth 6 -maxdepth 7 -name \*.git -exec dirname {} \; | uniq | grep nw | cut -d / -f2-)

if [ -z "$network_paths" ]; then
  log_warn "Warning: Repositories backup missing. Skipping ..."
  exit 0
else
  increment-progress-total-count 5
fi

# Perform a host-check and establish GHE_REMOTE_XXX variables.
ghe_remote_version_required "$GHE_HOSTNAME"
ghe_remote_parallel

# Generate SSH config for forwarding
# Split host:port into parts
port=$(ssh_port_part "$GHE_HOSTNAME")
host=$(ssh_host_part "$GHE_HOSTNAME")

# Add user / -l option
user="${host%@*}"
[ "$user" = "$host" ] && user="admin"

hostnames=$host
tempdir=$(mktemp -d -t backup-utils-restore-XXXXXX)
remote_tempdir=$(ghe-ssh "$GHE_HOSTNAME" -- mktemp -d -t backup-utils-restore-XXXXXX)
ssh_config_file_opt=
opts="$GHE_EXTRA_SSH_OPTS"
tmp_list=$tempdir/tmp_list
remote_tmp_list=$remote_tempdir/remote_tmp_list
to_restore=$tempdir/to_restore
remote_to_restore=$remote_tempdir/remote_to_restore
routes_list=$tempdir/routes_list
remote_routes_list=$remote_tempdir/remote_routes_list
remote_warnings=$remote_tempdir/repo_warnings

if $CLUSTER; then
  ssh_config_file="$tempdir/ssh_config"
  ssh_config_file_opt="-F $ssh_config_file"
  opts="$opts -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -o PasswordAuthentication=no"
  hostnames=$(ghe-cluster-find-nodes "$GHE_HOSTNAME" "git-server")
  ghe-ssh-config "$GHE_HOSTNAME" "$hostnames" > "$ssh_config_file"
fi

cleanup() {
  for hostname in $hostnames; do
    ghe-gc-enable $ssh_config_file_opt $hostname:$port || true
  done
  ghe-ssh "$GHE_HOSTNAME" -- rm -rf $remote_tempdir
  rm -rf $tempdir
}
trap cleanup EXIT

# Disable remote GC operations
for hostname in $hostnames; do
  ghe-gc-disable $ssh_config_file_opt $hostname:$port
done

# Build a list of network paths to send to the server to calculate
# the restore routes, something like:
#
# a/nw/a8/3f/02/100000855
# a/nw/a8/bc/8d/100000880
# a/nw/a5/06/81/100000659
# a/nw/a5/84/6f/100000708
# a/nw/a5/e0/01/146
# ...
#
# One network path per line.
bm_start "$(basename $0) - Building network list"
OLDIFS=$IFS; IFS=$'\n'
for path in $network_paths; do
  # Get the network ID
  # The network id from a repository is the last component of the path
  # i.e. /data/repositories/a/nw/a5/bf/c9/37 network ID would be 37
  ghe_verbose "* Adding network_path $path to the list of networks to send"
  echo $path
done > $tmp_list
IFS=$OLDIFS
bm_end "$(basename $0) - Building network list"

# In cluster environments, we need to ensure that all repository networks are replicated back to the
# same Spokes nodes that they were present on when the backup was taken. For this, the list of
# routes of each repository network is first obtained. Afterward, an rsync file list is created for
# each Spokes node including only those repository networks for which there was a route to the
# respective Spokes node.
if $CLUSTER; then
  log_info "* Restoring repository networks to cluster nodes according to Spokes routes" 1>&3

  # The server returns a list of routes:
  #
  # a/nw/a8/3f/02/100000855 dgit-node1 dgit-node2 dgit-node3
  # a/nw/a8/bc/8d/100000880 dgit-node1 dgit-node2 dgit-node4
  # a/nw/a5/06/81/100000659 dgit-node3 dgit-node2 dgit-node4
  # ...
  #
  # One route per line.
  #
  # NOTE: The route generation is performed on the appliance as it is considerably
  # more performant than performing over an SSH pipe.
  #
  bm_start "$(basename $0) - Transferring network list"
  cat $tmp_list | ghe-ssh "$GHE_HOSTNAME" -- sponge $remote_tmp_list
  cat $tmp_list | ghe_debug
  bm_end "$(basename $0) - Transferring network list"

  bm_start "$(basename $0) - Generating routes"
  restore_routes_script="github-env ./bin/dgit-cluster-restore-routes"
  if ghe-ssh "$GHE_HOSTNAME" test -e /usr/local/share/enterprise/ghe-restore-network-routes; then
    restore_routes_script="/usr/local/share/enterprise/ghe-restore-network-routes"
  fi
  echo "cat $remote_tmp_list | $restore_routes_script | grep 'git-server-' > $remote_routes_list" | ghe-ssh "$GHE_HOSTNAME" -- /bin/bash
  ghe-ssh "$GHE_HOSTNAME" -- cat $remote_routes_list | ghe_debug
  bm_end "$(basename $0) - Generating routes"

  bm_start "$(basename $0) - Fetching routes"
  ghe-ssh "$GHE_HOSTNAME" -- gzip -c $remote_routes_list | gzip -d > $routes_list
  cat $routes_list | ghe_debug
  bm_end "$(basename $0) - Fetching routes"

  bm_start "$(basename $0) - Processing routes"

  cat $routes_list | awk -v tempdir="$tempdir" '{ for(i=2;i<=NF;i++){ print $1 > (tempdir"/"$i".rsync") }}'
  cat $routes_list | awk '{ n = split($1, p, "/"); printf p[n] " /data/repositories/" $1; $1=""; print $0}' > $to_restore
  ghe_debug "\n$(find "$tempdir" -maxdepth 1 -name '*.rsync')"
  bm_end "$(basename $0) - Processing routes"
# There is no need to collect routes and split them by Spokes server in noncluster setups because
# we need to transfer all repository networks to the primary instance unconditionally, regardless of
# the Spokes route list captured during the backup. As we already have the list of all repository
# network paths, we can simply use that as the rsync file list in noncluster environments.
else
  log_info "* Restoring all repository networks to target host unconditionally" 1>&3

  cp "$tmp_list" "$tempdir/git-server-primary.rsync"
fi

if [ -z "$(find "$tempdir" -maxdepth 1 -name '*.rsync')" ]; then
  log_warn "Warning: no routes found, skipping repositories restore ..."
  exit 0
else
  increment-progress-total-count 3
fi

# rsync all the repository networks to the git server where they belong.
# One rsync invocation per server available.
bm_start "$(basename $0) - Restoring repository networks"
rsync_commands=()
for file_list in $tempdir/git-server-*.rsync; do
  if $CLUSTER; then
    server=$(basename $file_list .rsync)
  else
    server=$host
  fi

  rsync_commands+=("
    if [ -n \"$GHE_VERBOSE\" ]; then
      echo \"* Transferring repository networks to $server ($file_list) ...\" 1>&3
    fi
      echo \"$(date -u "+%FT%TZ") RSYNC BEGIN: repositories rsync\" 1>&3
    ghe-rsync -avrR --delete \
      -e \"ssh -q $opts -p $port $ssh_config_file_opt -l $user\" \
      --rsync-path=\"sudo -u git rsync\" \
      --files-from=$file_list \
      \"$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/repositories/./\" \
      \"$server:$GHE_REMOTE_DATA_USER_DIR/repositories/\" 1>&3
    echo \"$(date -u "+%FT%TZ") RSYNC END: repositories rsync\" 1>&3
    ")
done

if [ "$GHE_PARALLEL_ENABLED" = "yes" ]; then
  "$GHE_PARALLEL_COMMAND" "${GHE_PARALLEL_RSYNC_COMMAND_OPTIONS[@]}" -- "${rsync_commands[@]}"
else
  for c in "${rsync_commands[@]}"; do
    eval "$c"
  done
fi

bm_end "$(basename $0) - Restoring repository networks"

# Tell dgit about the repositories restored
if $CLUSTER; then
  bm_start "$(basename $0) - Finalizing routes"
  ghe_verbose "Finalizing routes"
  cat $to_restore | ghe-ssh "$GHE_HOSTNAME" -- sponge $remote_to_restore
  ghe-ssh "$GHE_HOSTNAME" -- /bin/bash >&3 <<EOF
    split -l 1000 $remote_to_restore $remote_tempdir/chunk
    chunks=\$(find $remote_tempdir/ -name chunk\*)
    $PARALLEL_CMD -i /bin/sh -c "cat {} | github-env ./bin/dgit-cluster-restore-finalize 2>>$remote_warnings" -- \$chunks
EOF
  increment-progress-total-count 1
  bm_end "$(basename $0) - Finalizing routes"
fi

bm_start "$(basename $0) - Updating repository info data"
if [ -d $GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/repositories/info ]; then
  ghe_verbose "* Transferring repository info data"
  for hostname in $hostnames; do
    if ! ghe-rsync -av --delete \
      -e "ssh -q $opts -p $port $ssh_config_file_opt -l $user" \
      --rsync-path="sudo -u git rsync" \
      "$GHE_DATA_DIR/$GHE_RESTORE_SNAPSHOT/repositories/info/" \
      "$hostname:$GHE_REMOTE_DATA_USER_DIR/repositories/info" 1>&3; then
      echo "Error restoring /data/repositories/info to $hostname" 1>&2
    fi
  done
else
  ghe_verbose "* Removing repository info data"
  if $CLUSTER; then
    ghe-ssh "$GHE_HOSTNAME" ghe-cluster-each -r git -- rm -f /data/repositories/info/*
  else
    ghe-ssh "$GHE_HOSTNAME" -- sudo -u git rm -f /data/repositories/info/*
  fi
fi
bm_end "$(basename $0) - Updating repository info data"

restore_warnings="$(ghe-ssh "$GHE_HOSTNAME" -- cat "$remote_warnings" 2>/dev/null || true)"
if [ -n "$restore_warnings" ]; then
  log_warn "Warning: One or more repository networks failed to restore successfully. Please contact GitHub Enterprise Support for assistance."
  echo "$restore_warnings"
fi

bm_end "$(basename $0)"
