#!/usr/bin/env bash

# This script checks in a project or an organization for Dataproc clusters with
# unsupported image versions (1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 2.0.[0-26]).
#
# Usage examples:
#   check-unsupported-dataproc-clusters.sh --project my-project
#   check-unsupported-dataproc-clusters.sh --organization 123456
#
# Notes:
# To run this script, you need the required IAM permissions to list and get
# clusters in the project. If running against an organization, you also need the
# permission to list projects in the organization.

if [[ "$#" -ne 2 ]]; then
  cat <<EOF
Usage: check-unsupported-dataproc-clusters.sh <options...>
Options (mutually exclusive)
  --project <project-name>: for a single project
  --organization <organization-id>: for all the projects of the organization
EOF
  exit 1
fi

total_cluster_count=0
unsupported_clusters=()

echo "Listing regions"
regions=($(gcloud compute regions list --format 'value(NAME)'))
regions+=('global')
echo "Found ${#regions[@]} regions"
echo ""

function is_unsupported_version() {
  local -r version="$1"

  # All s8s clusters are supported.
  if [[ "${version}" == *-s8s-spark ]]; then
    return 1
  fi

  local subminor
  # "1.3.95-debian10" -> "1.3.95"
  subminor="${version%%-*}"
  # "1.3.95" -> "95"
  subminor="${subminor##*.}"

  if [[ "${version}" == 1.0.* \
      || "${version}" == 1.1.* \
      || "${version}" == 1.2.* \
      || "${version}" == 1.3.* \
      || "${version}" == 1.4.* \
      || "${version}" == 1.5.* \
      ||"${version}" == 2.0.* && "${subminor}" -lt 27 ]]; then
    return 0
  fi

  return 1
}

function check_clusters_in_project() {
  local -r project="$1"
  local clusters
  local version

  for region in "${regions[@]}"; do
    echo "Listing clusters in project=${project} region=${region}"
    clusters=($(gcloud dataproc clusters list \
      --project="${project}" \
      --region "${region}" \
      --format "value(NAME)" \
      --quiet 2>/dev/null))
    echo "Found ${#clusters[@]} cluster(s) in project=${project} region=${region}"
    ((total_cluster_count=total_cluster_count+${#clusters[@]}))
    for cluster in "${clusters[@]}"; do
      echo "Checking cluster ${cluster} in project=${project} region=${region}"
      version=$(gcloud dataproc clusters describe "${cluster}" \
        --project "${project}" \
        --region "${region}" \
        --format 'value(config.softwareConfig.imageVersion)' 2>/dev/null)
      if is_unsupported_version "${version}"; then
        unsupported_clusters+=("project=${project}, region=${region}, cluster=${cluster}, version=${version}")
      fi
      echo "Checked cluster ${cluster} version=${version} in project=${project} region=${region}"
    done
    echo "Checked ${#clusters[@]} cluster(s) in project=${project} region=${region}"
    echo ""
  done
}

function check_clusters_in_organization() {
  local -r org="$1"

  echo "Listing projects in organization ${org}"
  local projects
  projects=($(gcloud asset search-all-resources \
      --asset-types="cloudresourcemanager.googleapis.com/Project" \
      --scope="organizations/${org}" \
      --format='value(additionalAttributes.projectId)'))
  echo "Found ${#projects[@]} project(s) in organization ${org}"
  echo ""

  for project in "${projects[@]}"; do
    check_clusters_in_project "${project}"
  done
}

function main() {
  if [[ "$1" == "--project" ]]; then
    check_clusters_in_project "$2"
  elif [[ "$1" == "--organization" ]]; then
    check_clusters_in_organization "$2"
  else
    echo "Unknown flag: $1"
    exit 1
  fi

  echo "Finished checking, found ${#unsupported_clusters[@]} cluster(s) with unsupported image version"
  for unsupported in "${unsupported_clusters[@]}"; do
    echo "${unsupported}"
  done
}

main "$@"

