TerraformでLinux Azure VMにHeartbeatの死活監視を組み込む

タイトルの件、検索してもあまり見つからなかったので書き残しておきます。

いきなりTerraformの定義です。ドキュメントを元に作ってます。

www.terraform.io

variable "azure_subscription_id" {}
variable "azure_tenant_id" {}
variable "mail_address" {}

terraform {
  required_version = ">0.12.23"
}

provider "azurerm" {
  version         = ">2.7.0"
  subscription_id = var.azure_subscription_id
  tenant_id       = var.azure_tenant_id
  features {}
}


resource "azurerm_resource_group" "example" {
  name     = "example-resources"
  location = "Japan East"
}

resource "random_id" "example" {
  byte_length = 8
}

resource "azurerm_log_analytics_workspace" "example" {
  name                = "example-log-${random_id.example.dec}"
  resource_group_name = azurerm_resource_group.example.name
  location            = azurerm_resource_group.example.location
  sku                 = "PerGB2018"
}

resource "azurerm_monitor_action_group" "example" {
  name                = "example-alert-action-group"
  resource_group_name = azurerm_resource_group.example.name
  short_name          = "examplealert"

  email_receiver {
    name                    = "example"
    email_address           = var.mail_address
    use_common_alert_schema = true
  }
}

resource "azurerm_virtual_network" "example" {
  name                = "example-network"
  address_space       = ["10.0.0.0/16"]
  location            = azurerm_resource_group.example.location
  resource_group_name = azurerm_resource_group.example.name
}

resource "azurerm_subnet" "example" {
  name                 = "internal"
  resource_group_name  = azurerm_resource_group.example.name
  virtual_network_name = azurerm_virtual_network.example.name
  address_prefix       = "10.0.2.0/24"
}

resource "azurerm_network_interface" "example" {
  name                = "example-nic"
  location            = azurerm_resource_group.example.location
  resource_group_name = azurerm_resource_group.example.name

  ip_configuration {
    name                          = "internal"
    subnet_id                     = azurerm_subnet.example.id
    private_ip_address_allocation = "Dynamic"
  }
}

resource "azurerm_linux_virtual_machine" "example" {
  name                = "example-machine"
  resource_group_name = azurerm_resource_group.example.name
  location            = azurerm_resource_group.example.location
  size                = "Standard_B1s"
  admin_username      = "adminuser"
  network_interface_ids = [
    azurerm_network_interface.example.id,
  ]

  admin_ssh_key {
    username   = "adminuser"
    public_key = file("~/ssh/id_rsa.pub")
  }

  os_disk {
    caching              = "ReadWrite"
    storage_account_type = "Standard_LRS"
  }

  source_image_reference {
    publisher = "Canonical"
    offer     = "UbuntuServer"
    sku       = "16.04-LTS"
    version   = "latest"
  }
}

resource "azurerm_virtual_machine_extension" "example" {
  name = "OmsAgentForLinux"

  virtual_machine_id = azurerm_linux_virtual_machine.example.id

  publisher                  = "Microsoft.EnterpriseCloud.Monitoring"
  type                       = "OmsAgentForLinux"
  type_handler_version       = "1.9"
  auto_upgrade_minor_version = true

  settings = <<SETTINGS
    {
      "workspaceId": "${azurerm_log_analytics_workspace.example.workspace_id}",
      "azureResourceId": "${azurerm_linux_virtual_machine.example.id}"
    }
  SETTINGS

  protected_settings = <<SETTINGS
    {
      "workspaceKey": "${azurerm_log_analytics_workspace.example.primary_shared_key}"
    }
  SETTINGS

  depends_on = [azurerm_linux_virtual_machine.example]
}

resource "azurerm_monitor_metric_alert" "example" {
  name                = "alert-heartbeat-${azurerm_linux_virtual_machine.example.name}"
  resource_group_name = azurerm_resource_group.example.name
  scopes              = [azurerm_log_analytics_workspace.example.id]
  description         = "Heatbeat Alert ${azurerm_linux_virtual_machine.example.name}"

  frequency   = "PT1M"
  window_size = "PT1M"

  criteria {
    metric_namespace = "Microsoft.OperationalInsights/workspaces"
    metric_name      = "Heartbeat"
    aggregation      = "Count"
    operator         = "LessThanOrEqual"
    threshold        = 0

    dimension {
      name     = "Computer"
      operator = "Include"
      values   = [azurerm_linux_virtual_machine.example.name]
    }
  }

  action {
    action_group_id = azurerm_monitor_action_group.example.id
  }

  depends_on = [azurerm_linux_virtual_machine.example]
}

アラートの設定値は実際の設定画面と以下の公式ページに選択できる内容が書かれています。

www.terraform.io

criteria の部分に関しては、実際にアラートを設定してみて、以下のコマンド等で見てみるのも早いかもしれません。

$ az monitor metrics alert list --query '[].criteria'

Terraformを実行すると、以下のようなリソースができてきます。

Azure Monitorを確認するとAlert Ruleに設定が入っていることが確認できます。

ここで一通メールが飛んできます。これはメール通知が設定されましたというメールです。

VMを落としてみます。

数分すると以下のようなアラートメールが飛んできます。