#!/bin/bash
# Copyright 2020 Google Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#
# Given a project, a GCE instance ID, and an Ops agent policy ID. This script
# automatically collects necessary information to help diagnosing issues of the
# policy:
#
# * The OS Config agent version
# * The Underlying OS Config guest policy
# * The policies that are applicable to this GCE instance
# * The agent package repos that are pulled on to a GCE instance
#
# Sample usage:
#    bash diagnose.sh --project-id=PROJECT_ID --gce-instance-id=GCE_INSTANCE_ID --policy-id=POLICY_ID > diagnose.txt
#
# Ignore the return code of command substitution in variables.
# shellcheck disable=SC2155

set -e

show_usage(){
  echo "Usage: bash diagnose.sh --project-id=PROJECT_ID --gce-instance-id=GCE_INSTANCE_ID --policy-id=POLICY_ID"
}

if [[ $# -le 2 ]]; then
  show_usage 0
  exit 1
fi

OPTS="$(getopt -o vhns: --long project-id:,gce-instance-id:,policy-id: -n 'policy-diagnose' -- "$@")"

if [[ $? != 0 ]]; then echo "Failed parsing options." >&2 ; exit 1 ; fi

echo "$OPTS"
eval set -- "$OPTS"

while true; do
  case "$1" in
    --project-id)
      PROJECT_ID="$2"; shift; shift ;;
    --gce-instance-id)
      GCE_INSTANCE_ID="$2"; shift; shift ;;
    --policy-id)
      POLICY_ID="$2"; shift; shift ;;
    -- ) shift; break ;;
    * ) break ;;
  esac
done

echo "Step 1: Get the OS Config agent version on the problematic GCE instances."
gcloud beta compute instances os-inventory \
    --project "$PROJECT_ID" \
    describe "$GCE_INSTANCE_ID" \
    | grep OSConfigAgentVersion

echo "Step 2: Get the underlying guest policy."
gcloud beta compute os-config guest-policies describe \
    --project "$PROJECT_ID" \
    "$POLICY_ID"

echo "Step 3: Look up the policies that are applicable to this specific GCE instance."
gcloud beta compute os-config guest-policies \
    --project "$PROJECT_ID" \
    lookup "$GCE_INSTANCE_ID"

echo "Step 4: Get the OS Config agent status."
gcloud compute ssh "$GCE_INSTANCE_ID" \
    --project "$PROJECT_ID" \
    -- systemctl status google-osconfig-agent

echo "Step 5: Get the OS Config agent log."
gcloud compute ssh "$GCE_INSTANCE_ID" \
    --project "$PROJECT_ID" \
    -- "if [ -f '/var/log/messages' ]; then sudo grep 'OSConfigAgent\|google-fluentd\|stackdriver-agent' /var/log/messages || true; fi"
gcloud compute ssh "$GCE_INSTANCE_ID" \
    --project "$PROJECT_ID" \
    -- "if [ -f '/var/log/syslog' ]; then sudo grep 'OSConfigAgent\|google-fluentd\|stackdriver-agent' /var/log/syslog || true; fi"

echo "Step 6: Get the agent package repos that are pulled on to a GCE instance."
gcloud compute ssh "$GCE_INSTANCE_ID" \
    --project "$PROJECT_ID" \
    -- "if [ -d '/etc/yum.repos.d' ]; then sudo cat /etc/yum.repos.d/google_osconfig_managed.repo; fi"
gcloud compute ssh "$GCE_INSTANCE_ID" \
    --project "$PROJECT_ID" \
    -- "if [ -d '/etc/apt/sources.list.d' ]; then sudo cat /etc/apt/sources.list.d/google_osconfig_managed.repo; fi"

echo "Step 7: Get the logging and monitoring agent status."
gcloud compute ssh "$GCE_INSTANCE_ID" \
    --project "$PROJECT_ID" \
    -- "sudo service google-fluentd status; sudo service stackdriver-agent status; rpm --query google-fluentd; rpm --query stackdriver-agent || true"

echo "Step 8: Ensure gcloud command tool contains the ops-agents command group."
if ! gcloud beta compute instances ops-agents --help; then
  echo "You must upgrade your gcloud command tool - use command: gcloud components update"
fi
