amazon web services

Deploy Ethereum RPC Provider Load Balancer with HAProxy in Kubernetes (AWS EKS)

2023-01-152023-03-18VintaBlockchain, DevOps

To achieve high availability and better performance, we could build a HAProxy load balancer in front of multiple Ethereum RPC providers, and also automatically adjust traffic weights based on the latency and block timestamp of each RPC endpoints.

ref:
https://www.haproxy.org/

Configurations

In haproxy.cfg, we have a backend named rpc-backend, and two RPC endpoints: quicknode and alchemy as upstream servers.

global
    log stdout format raw local0 info
    stats socket ipv4@*:9999 level admin expose-fd listeners
    stats timeout 5s

defaults
    log global
    mode http
    option httplog
    option dontlognull
    timeout connect 10s
    timeout client 60s
    timeout server 60s
    timeout http-request 60s

frontend stats
    bind *:8404
    stats enable
    stats uri /
    stats refresh 10s

frontend http
    bind *:8000
    option forwardfor
    default_backend rpc-backend

backend rpc-backend
    balance leastconn
    server quicknode 127.0.0.1:8001 weight 100
    server alchemy 127.0.0.1:8002 weight 100

frontend quicknode-frontend
    bind *:8001
    option dontlog-normal
    default_backend quicknode-backend

backend quicknode-backend
    balance roundrobin
    http-request set-header Host xxx.quiknode.pro
    http-request set-path /xxx
    server quicknode xxx.quiknode.pro:443 sni str(xxx.quiknode.pro) check-ssl ssl verify none

frontend alchemy-frontend
    bind *:8002
    option dontlog-normal
    default_backend alchemy-backend

backend alchemy-backend
    balance roundrobin
    http-request set-header Host xxx.alchemy.com
    http-request set-path /xxx
    server alchemy xxx.alchemy.com:443 sni str(xxx.alchemy.com) check-ssl ssl verify none

ref:
https://docs.haproxy.org/2.7/configuration.html
https://www.haproxy.com/documentation/hapee/latest/configuration/

Test it on local:

docker run --rm -v $PWD:/usr/local/etc/haproxy \
-p 8000:8000 \
-p 8404:8404 \
-p 9999:9999 \
-i -t --name haproxy haproxy:2.7.0

docker exec -i -t -u 0 haproxy bash

echo "show stat" | socat stdio TCP:127.0.0.1:9999
echo "set weight rpc-backend/quicknode 0" | socat stdio TCP:127.0.0.1:9999

# if you're using a socket file descriptor
apt update
apt install socat -y
echo "set weight rpc-backend/alchemy 0" | socat stdio /var/lib/haproxy/haproxy.sock

ref:
https://www.redhat.com/sysadmin/getting-started-socat

Healtchcheck

Then the important part: we're going to run a simple but flexible healthcheck script, called node weighter, as a sidecar container. So the healthcheck script can access HAProxy admin socket of the HAProxy container through 127.0.0.1:9999.

The node weighter can be written in any language. Here is a TypeScript example:

in HAProxyConnector.ts which sets weights through HAProxy admin socket:

import net from "net"

export interface ServerWeight {
    backendName: string
    serverName: string
    weight: number
}

export class HAProxyConnector {
    constructor(readonly adminHost = "127.0.0.1", readonly adminPort = 9999) {}

    setWeights(serverWeights: ServerWeight[]) {
        const scaledServerWeights = this.scaleWeights(serverWeights)

        const commands = scaledServerWeights.map(server => {
            return `set weight ${server.backendName}/${server.serverName} ${server.weight}\n`
        })

        const client = net.createConnection({ host: this.adminHost, port: this.adminPort }, () => {
            console.log("HAProxyAdminSocketConnected")
        })
        client.on("error", err => {
            console.log("HAProxyAdminSocketError")
        })
        client.on("data", data => {
            console.log("HAProxyAdminSocketData")
            console.log(data.toString().trim())
        })

        client.write(commands.join(""))
    }

    private scaleWeights(serverWeights: ServerWeight[]) {
        const totalWeight = sum(serverWeights.map(server => server.weight))

        return serverWeights.map(server => {
            server.weight = Math.floor((server.weight / totalWeight) * 256)
            return server
        })
    }
}

in RPCProxyWeighter.ts which calculates weights based a custom healthcheck logic:

import { HAProxyConnector } from "./connectors/HAProxyConnector"
import config from "./config.json"

export interface Server {
    backendName: string
    serverName: string
    serverUrl: string
}

export interface ServerWithWeight {
    backendName: string
    serverName: string
    weight: number
    [metadata: string]: any
}

export class RPCProxyWeighter {
    protected readonly log = Log.getLogger(RPCProxyWeighter.name)
    protected readonly connector: HAProxyConnector

    protected readonly ADJUST_INTERVAL_SEC = 60 // 60 seconds
    protected readonly MAX_BLOCK_TIMESTAMP_DELAY_MSEC = 150 * 1000 // 150 seconds
    protected readonly MAX_LATENCY_MSEC = 3 * 1000 // 3 seconds
    protected shouldScale = false
    protected totalWeight = 0

    constructor() {
        this.connector = new HAProxyConnector(config.admin.host, config.admin.port)
    }

    async start() {
        while (true) {
            let serverWithWeights = await this.calculateWeights(config.servers)
            if (this.shouldScale) {
                serverWithWeights = this.connector.scaleWeights(serverWithWeights)
            }
            this.connector.setWeights(serverWithWeights)

            await sleep(1000 * this.ADJUST_INTERVAL_SEC)
        }
    }

    async calculateWeights(servers: Server[]) {
        this.totalWeight = 0

        const serverWithWeights = await Promise.all(
            servers.map(async server => {
                try {
                    return await this.calculateWeight(server)
                } catch (err: any) {
                    return {
                        backendName: server.backendName,
                        serverName: server.serverName,
                        weight: 0,
                    }
                }
            }),
        )

        // if all endpoints are unhealthy, overwrite weights to 100
        if (this.totalWeight === 0) {
            for (const server of serverWithWeights) {
                server.weight = 100
            }
        }

        return serverWithWeights
    }

    async calculateWeight(server: Server) {
        const healthInfo = await this.getHealthInfo(server.serverUrl)

        const serverWithWeight: ServerWithWeight = {
            ...{
                backendName: server.backendName,
                serverName: server.serverName,
                weight: 0,
            },
            ...healthInfo,
        }

        if (healthInfo.isBlockTooOld || healthInfo.isLatencyTooHigh) {
            return serverWithWeight
        }

        // normalizedLatency: the lower the better
        // blockTimestampDelayMsec: the lower the better
        // both units are milliseconds at the same scale
        // serverWithWeight.weight = 1 / healthInfo.normalizedLatency + 1 / healthInfo.blockTimestampDelayMsec

        // NOTE: if we're using `balance source` in HAProxy, the weight can only be 100% or 0%,
        // therefore, as long as the RPC endpoint is healthy, we always set the same weight
        serverWithWeight.weight = 100

        this.totalWeight += serverWithWeight.weight

        return serverWithWeight
    }

    protected async getHealthInfo(serverUrl: string): Promise<HealthInfo> {
        const provider = new ethers.providers.StaticJsonRpcProvider(serverUrl)

        // TODO: add timeout
        const start = Date.now()
        const blockNumber = await provider.getBlockNumber()
        const end = Date.now()

        const block = await provider.getBlock(blockNumber)

        const blockTimestamp = block.timestamp
        const blockTimestampDelayMsec = Math.floor(Date.now() / 1000 - blockTimestamp) * 1000
        const isBlockTooOld = blockTimestampDelayMsec >= this.MAX_BLOCK_TIMESTAMP_DELAY_MSEC

        const latency = end - start
        const normalizedLatency = this.normalizeLatency(latency)
        const isLatencyTooHigh = latency >= this.MAX_LATENCY_MSEC

        return {
            blockNumber,
            blockTimestamp,
            blockTimestampDelayMsec,
            isBlockTooOld,
            latency,
            normalizedLatency,
            isLatencyTooHigh,
        }
    }

    protected normalizeLatency(latency: number) {
        if (latency <= 40) {
            return 1
        }

        const digits = Math.floor(latency).toString().length
        const base = Math.pow(10, digits - 1)
        return Math.floor(latency / base) * base
    }
}

in config.json:

Technically, we don't need this config file. Instead, we could read the actual URLs from HAProxy admin socket directly. Though creating a JSON file that contains URLs is much simpler.

{
    "admin": {
        "host": "127.0.0.1",
        "port": 9999
    },
    "servers": [
        {
            "backendName": "rpc-backend",
            "serverName": "quicknode",
            "serverUrl": "https://xxx.quiknode.pro/xxx"
        },
        {
            "backendName": "rpc-backend",
            "serverName": "alchemy",
            "serverUrl": "https://xxx.alchemy.com/xxx"
        }
    ]
}

ref:
https://www.haproxy.com/documentation/hapee/latest/api/runtime-api/set-weight/
https://sleeplessbeastie.eu/2020/01/29/how-to-use-haproxy-stats-socket/

Deployments

apiVersion: v1
kind: ConfigMap
metadata:
  name: rpc-proxy-config-file
data:
  haproxy.cfg: |
    ...
  config.json: |
    ...
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: rpc-proxy
spec:
  replicas: 2
  selector:
    matchLabels:
      app: rpc-proxy
  template:
    metadata:
      labels:
        app: rpc-proxy
    spec:
      volumes:
        - name: rpc-proxy-config-file
          configMap:
            name: rpc-proxy-config-file
      containers:
        - name: haproxy
          image: haproxy:2.7.0
          ports:
            - containerPort: 8000
              protocol: TCP
          resources:
            requests:
              cpu: 200m
              memory: 256Mi
            limits:
              cpu: 1000m
              memory: 256Mi
          volumeMounts:
            - name: rpc-proxy-config-file
              subPath: haproxy.cfg
              mountPath: /usr/local/etc/haproxy/haproxy.cfg
              readOnly: true
        - name: node-weighter
          image: your-node-weighter
          command: ["node", "./index.js"]
          resources:
            requests:
              cpu: 200m
              memory: 256Mi
            limits:
              cpu: 1000m
              memory: 256Mi
          volumeMounts:
            - name: rpc-proxy-config-file
              subPath: config.json
              mountPath: /path/to/build/config.json
              readOnly: true
---
apiVersion: v1
kind: Service
metadata:
  name: rpc-proxy
spec:
  clusterIP: None
  selector:
    app: rpc-proxy
  ports:
    - name: http
      port: 8000
      targetPort: 8000

The RPC load balancer can then be accessed through http://rpc-proxy.default.svc.cluster.local:8000 inside the Kubernetes cluster.

ref:
https://www.containiq.com/post/kubernetes-sidecar-container
https://hub.docker.com/_/haproxy

Deploy graph-node (The Graph) in Kubernetes (AWS EKS)

2022-12-292023-03-18VintaBlockchain, DevOps

graph-node is an open source software that indexes blockchain data, as known as indexer. Though the cost of running a self-hosted graph node could be pretty high. We're going to deploy a self-hosted graph node on Amazon Elastic Kubernetes Service (EKS).

In this article, we have two approaches to deploy self-hosted graph node:

A single graph node with a single PostgreSQL database
A graph node cluster with a primary-secondary PostgreSQL cluster
- A graph node cluster consists of one index node and multiple query nodes
- For the database, we will simply use a Multi-AZ DB cluster on AWS RDS

ref:
https://github.com/graphprotocol/graph-node

Create a PostgreSQL Database on AWS RDS

Hardware requirements for running a graph-node:
https://thegraph.com/docs/en/network/indexing/#what-are-the-hardware-requirements
https://docs.thegraph.academy/official-docs/indexer/testnet/graph-protocol-testnet-baremetal/1_architectureconsiderations

A Single DB Instance

We use the following settings on staging.

Version: PostgreSQL 13.7-R1
Template: Dev/Test
Deployment option: Single DB instance
DB instance identifier: graph-node
Master username: graph_node
Auto generate a password: Yes
DB instance class: db.t3.medium (2 vCPU 4G RAM)
Storage type: gp2
Allocated storage: 500 GB
Enable storage autoscaling: No
Compute resource: Don’t connect to an EC2 compute resource
Network type: IPv4
VPC: eksctl-perp-staging-cluster/VPC
DB Subnet group: default-vpc
Public access: Yes
VPC security group: graph-node
Availability Zone: ap-northeast-1d
Initial database name: graph_node

A Multi-AZ DB Cluster

We use the following settings on production.

Version: PostgreSQL 13.7-R1
Template: Production
Deployment option: Multi-AZ DB Cluster
DB instance identifier: graph-node-cluster
Master username: graph_node
Auto generate a password: Yes
DB instance class: db.m6gd.2xlarge (8 vCPU 32G RAM)
Storage type: io1
Allocated storage: 500 GB
Provisioned IOPS: 1000
Enable storage autoscaling: No
Compute resource: Don’t connect to an EC2 compute resource
VPC: eksctl-perp-production-cluster/VPC
DB Subnet group: default-vpc
Public access: Yes
VPC security group: graph-node

Unfortunately, AWS currently do not have Reserved Instances (RIs) Plan for Multi-AZ DB clusters. Use "Multi-AZ DB instance" or "Single DB instance" instead if the cost is a big concern to you.

RDS Remote Access

You could test your database remote access. Also, make sure the security group's inbound rules include 5432 port for PostgreSQL.

brew install postgresql

psql --host=YOUR_RDB_ENDPOINT --port=5432 --username=graph_node --password --dbname=postgres
# or
createdb -h YOUR_RDB_ENDPOINT -p 5432 -U graph_node graph_node

Create a Dedicated EKS Node Group

This step is optional.

eksctl --profile=perp create nodegroup \
--cluster perp-production \
--region ap-southeast-1 \
--name "managed-graph-node-m5-xlarge" \
--node-type "m5.xlarge" \
--nodes 3 \
--nodes-min 3 \
--nodes-max 3 \
--managed \
--asg-access \
--alb-ingress-access

Deploy graph-node in Kubernetes

Deployments for a Single DB Instance

apiVersion: v1
kind: Service
metadata:
  name: graph-node
spec:
  clusterIP: None
  selector:
    app: graph-node
  ports:
    - name: jsonrpc
      port: 8000
      targetPort: 8000
    - name: websocket
      port: 8001
      targetPort: 8001
    - name: admin
      port: 8020
      targetPort: 8020
    - name: index-node
      port: 8030
      targetPort: 8030
    - name: metrics
      port: 8040
      targetPort: 8040
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: graph-node-config
data:
  # https://github.com/graphprotocol/graph-node/blob/master/docs/environment-variables.md
  GRAPH_LOG: info
  EXPERIMENTAL_SUBGRAPH_VERSION_SWITCHING_MODE: synced
---
apiVersion: v1
kind: Secret
metadata:
  name: graph-node-secret
type: Opaque
data:
  postgres_host: xxx
  postgres_user: xxx
  postgres_db: xxx
  postgres_pass: xxx
  ipfs: https://ipfs.network.thegraph.com
  ethereum: optimism:https://YOUR_RPC_ENDPOINT_1 optimism:https://YOUR_RPC_ENDPOINT_2
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
  name: graph-node
spec:
  replicas: 1
  selector:
    matchLabels:
      app: graph-node
  serviceName: graph-node
  template:
    metadata:
      labels:
        app: graph-node
      annotations:
        "cluster-autoscaler.kubernetes.io/safe-to-evict": "false"
    spec:
      containers:
        # https://github.com/graphprotocol/graph-node/releases
        # https://hub.docker.com/r/graphprotocol/graph-node
        - name: graph-node
          image: graphprotocol/graph-node:v0.29.0
          envFrom:
            - secretRef:
                name: graph-node-secret
            - configMapRef:
                name: graph-node-config
          ports:
            - containerPort: 8000
              protocol: TCP
            - containerPort: 8001
              protocol: TCP
            - containerPort: 8020
              protocol: TCP
            - containerPort: 8030
              protocol: TCP
            - containerPort: 8040
              protocol: TCP
          resources:
            requests:
              cpu: 2000m
              memory: 4G
            limits:
              cpu: 4000m
              memory: 4G

Since Kubernetes Secrets are, by default, stored unencrypted in the API server's underlying data store (etcd). Anyone with API access can retrieve or modify a Secret. They're not secret at all. So instead of storing sensitive data in Secrets, you might want to use Secrets Store CSI Driver.

ref:
https://github.com/graphprotocol/graph-node/blob/master/docs/environment-variables.md
https://hub.docker.com/r/graphprotocol/graph-node

Deployments for a Multi-AZ DB Cluster

There are two types of nodes in a graph node cluster:

Index Node: Only indexing data from the blockchain, not serving queries at all
Query Node: Only serving GraphQL queries, not indexing data at all

Indexing subgraphs doesn't require too much CPU and memory resources, but serving queries does, especially when you enable GraphQL caching.

Index Node

Technically, we could further split an index node into Ingestor and Indexer: the former fetches blockchain data from RPC providers periodically, and the latter indexes entities based on mappings. That's another story though.

apiVersion: v1
kind: ConfigMap
metadata:
  name: graph-node-cluster-index-config-file
data:
  config.toml: |
    [store]
    [store.primary]
    connection = "postgresql://USER:PASSWORD@RDS_WRITER_ENDPOINT/DB"
    weight = 1
    pool_size = 2

    [chains]
    ingestor = "graph-node-cluster-index-0"
    [chains.optimism]
    shard = "primary"
    provider = [
      { label = "optimism-rpc-proxy", url = "http://rpc-proxy-for-graph-node.default.svc.cluster.local:8000", features = ["archive"] }
      # { label = "optimism-quicknode", url = "https://YOUR_RPC_ENDPOINT_1", features = ["archive"] },
      # { label = "optimism-alchemy", url = "https://YOUR_RPC_ENDPOINT_2", features = ["archive"] },
      # { label = "optimism-infura", url = "https://YOUR_RPC_ENDPOINT_3", features = ["archive"] }
    ]

    [deployment]
    [[deployment.rule]]
    indexers = [ "graph-node-cluster-index-0" ]
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: graph-node-cluster-index-config-env
data:
  # https://github.com/graphprotocol/graph-node/blob/master/docs/environment-variables.md
  GRAPH_LOG: "info"
  GRAPH_KILL_IF_UNRESPONSIVE: "true"
  ETHEREUM_POLLING_INTERVAL: "100"
  ETHEREUM_BLOCK_BATCH_SIZE: "10"
  GRAPH_STORE_WRITE_QUEUE: "50"
  EXPERIMENTAL_SUBGRAPH_VERSION_SWITCHING_MODE: "synced"
---
apiVersion: v1
kind: Service
metadata:
  name: graph-node-cluster-index
spec:
  clusterIP: None
  selector:
    app: graph-node-cluster-index
  ports:
    - name: jsonrpc
      port: 8000
      targetPort: 8000
    - name: websocket
      port: 8001
      targetPort: 8001
    - name: admin
      port: 8020
      targetPort: 8020
    - name: index-node
      port: 8030
      targetPort: 8030
    - name: metrics
      port: 8040
      targetPort: 8040
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
  name: graph-node-cluster-index
spec:
  replicas: 1
  selector:
    matchLabels:
      app: graph-node-cluster-index
  serviceName: graph-node-cluster-index
  template:
    metadata:
      labels:
        app: graph-node-cluster-index
      annotations:
        "cluster-autoscaler.kubernetes.io/safe-to-evict": "false"
    spec:
      terminationGracePeriodSeconds: 10
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
              - matchExpressions:
                  - key: alpha.eksctl.io/nodegroup-name
                    operator: In
                    values:
                      - managed-graph-node-m5-xlarge
          preferredDuringSchedulingIgnoredDuringExecution:
            - weight: 100
              preference:
                matchExpressions:
                  - key: topology.kubernetes.io/zone
                    operator: In
                    values:
                      # should schedule index node to the zone that writer instance is located
                      - ap-southeast-1a
      volumes:
        - name: graph-node-cluster-index-config-file
          configMap:
            name: graph-node-cluster-index-config-file
      containers:
        # https://github.com/graphprotocol/graph-node/releases
        # https://hub.docker.com/r/graphprotocol/graph-node
        - name: graph-node-cluster-index
          image: graphprotocol/graph-node:v0.29.0
          command:
            [
              "/bin/sh",
              "-c",
              'graph-node --node-id $HOSTNAME --config "/config.toml" --ipfs "https://ipfs.network.thegraph.com"',
            ]
          envFrom:
            - configMapRef:
                name: graph-node-cluster-index-config-env
          ports:
            - containerPort: 8000
              protocol: TCP
            - containerPort: 8001
              protocol: TCP
            - containerPort: 8020
              protocol: TCP
            - containerPort: 8030
              protocol: TCP
            - containerPort: 8040
              protocol: TCP
          resources:
            requests:
              cpu: 1000m
              memory: 1G
            limits:
              cpu: 2000m
              memory: 1G
          volumeMounts:
            - name: graph-node-cluster-index-config-file
              subPath: config.toml
              mountPath: /config.toml
              readOnly: true

ref:
https://github.com/graphprotocol/graph-node/blob/master/docs/config.md
https://github.com/graphprotocol/graph-node/blob/master/docs/environment-variables.md

The key factors to the efficiency of syncing/indexing subgraphs are:

The latency of the RPC provider
The write thoughput of the database

I didn't find any graph-node configs or environment variables that can speed up the syncing process observably. If you know, please tell me.

ref:
https://github.com/graphprotocol/graph-node/issues/3756

If you're interested in building a RPC proxy with healthcheck of block number and latency, see Deploy Ethereum RPC Provider Load Balancer with HAProxy in Kubernetes (AWS EKS). graph-node itself cannot detect if the RPC provider's block delays.

Query Nodes

The most important config is DISABLE_BLOCK_INGESTOR: "true" which basically configures the node as a query node.

apiVersion: v1
kind: ConfigMap
metadata:
  name: graph-node-cluster-query-config-env
data:
  # https://github.com/graphprotocol/graph-node/blob/master/docs/environment-variables.md
  DISABLE_BLOCK_INGESTOR: "true" # this node won't ingest blockchain data
  GRAPH_LOG_QUERY_TIMING: "gql"
  GRAPH_GRAPHQL_QUERY_TIMEOUT: "600"
  GRAPH_QUERY_CACHE_BLOCKS: "60"
  GRAPH_QUERY_CACHE_MAX_MEM: "2000" # the actual used memory could be 3x
  GRAPH_QUERY_CACHE_STALE_PERIOD: "100"
  EXPERIMENTAL_SUBGRAPH_VERSION_SWITCHING_MODE: "synced"
---
apiVersion: v1
kind: ConfigMap
metadata:
  name: graph-node-cluster-query-config-file
data:
  config.toml: |
    [store]
    [store.primary]
    connection = "postgresql://USER:PASSWORD@RDS_WRITER_ENDPOINT/DB" # connect to RDS writer instance
    weight = 0
    pool_size = 2
    [store.primary.replicas.repl1]
    connection = "postgresql://USER:PASSWORD@RDS_READER_ENDPOINT/DB" # connect to RDS reader instances (multiple)
    weight = 1
    pool_size = 100

    [chains]
    ingestor = "graph-node-cluster-index-0"

    [deployment]
    [[deployment.rule]]
    indexers = [ "graph-node-cluster-index-0" ]

    [general]
    query = "graph-node-cluster-query*"
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: graph-node-cluster-query
spec:
  replicas: 3
  strategy:
    type: RollingUpdate
    rollingUpdate:
      maxSurge: 1
      maxUnavailable: 0
  minReadySeconds: 5
  selector:
    matchLabels:
      app: graph-node-cluster-query
  template:
    metadata:
      labels:
        app: graph-node-cluster-query
    spec:
      terminationGracePeriodSeconds: 40
      affinity:
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
              - matchExpressions:
                  - key: alpha.eksctl.io/nodegroup-name
                    operator: In
                    values:
                      - managed-graph-node-m5-xlarge
      volumes:
        - name: graph-node-cluster-query-config-file
          configMap:
            name: graph-node-cluster-query-config-file
      containers:
        # https://github.com/graphprotocol/graph-node/releases
        # https://hub.docker.com/r/graphprotocol/graph-node
        - name: graph-node-cluster-query
          image: graphprotocol/graph-node:v0.29.0
          command:
            [
              "/bin/sh",
              "-c",
              'graph-node --node-id $HOSTNAME --config "/config.toml" --ipfs "https://ipfs.network.thegraph.com"',
            ]
          envFrom:
            - configMapRef:
                name: graph-node-cluster-query-config-env
          ports:
            - containerPort: 8000
              protocol: TCP
            - containerPort: 8001
              protocol: TCP
            - containerPort: 8030
              protocol: TCP
          resources:
            requests:
              cpu: 1000m
              memory: 8G
            limits:
              cpu: 2000m
              memory: 8G
          volumeMounts:
            - name: graph-node-cluster-query-config-file
              subPath: config.toml
              mountPath: /config.toml
              readOnly: true

ref:
https://github.com/graphprotocol/graph-node/blob/master/docs/config.md
https://github.com/graphprotocol/graph-node/blob/master/docs/environment-variables.md

It's also strongly recommended to mark subgraph schemas as immutable with @entity(immutable: true). Immutable entities are much faster to write and to query, so should be used whenever possible. The query time reduces by 80% in our case.

ref:
https://thegraph.com/docs/en/developing/creating-a-subgraph/#defining-entities

Setup an Ingress for graph-node

WebSocket connections are inherently sticky. If the client requests a connection upgrade to WebSockets, the target that returns an HTTP 101 status code to accept the connection upgrade is the target used in the WebSockets connection. After the WebSockets upgrade is complete, cookie-based stickiness is not used. You don't need to enable stickiness for ALB.

apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
  name: graph-node-ingress
  annotations:
    kubernetes.io/ingress.class: alb
    alb.ingress.kubernetes.io/scheme: internet-facing
    alb.ingress.kubernetes.io/ssl-policy: ELBSecurityPolicy-FS-1-2-Res-2020-10
    alb.ingress.kubernetes.io/certificate-arn: arn:aws:acm:XXX
    alb.ingress.kubernetes.io/target-type: ip
    alb.ingress.kubernetes.io/target-group-attributes: stickiness.enabled=false
    alb.ingress.kubernetes.io/load-balancer-attributes: idle_timeout.timeout_seconds=3600,deletion_protection.enabled=true
    alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS": 443}]'
    alb.ingress.kubernetes.io/actions.subgraph-api: >
      {"type": "forward", "forwardConfig": {"targetGroups": [
        {"serviceName": "graph-node-cluster-query", "servicePort": 8000, "weight": 100}
      ]}}
    alb.ingress.kubernetes.io/actions.subgraph-ws: >
      {"type": "forward", "forwardConfig": {"targetGroups": [
        {"serviceName": "graph-node-cluster-query", "servicePort": 8001, "weight": 100}
      ]}}
    alb.ingress.kubernetes.io/actions.subgraph-hc: >
      {"type": "forward", "forwardConfig": {"targetGroups": [
        {"serviceName": "graph-node-cluster-query", "servicePort": 8030, "weight": 100}
      ]}}
spec:
  rules:
    - host: "subgraph-api.example.com"
      http:
        paths:
          - path: /
            pathType: Prefix
            backend:
              service:
                name: subgraph-api
                port:
                  name: use-annotation
    - host: "subgraph-ws.example.com"
      http:
        paths:
          - path: /
            pathType: Prefix
            backend:
              service:
                name: subgraph-ws
                port:
                  name: use-annotation
    - host: "subgraph-hc.example.com"
      http:
        paths:
          - path: /
            pathType: Prefix
            backend:
              service:
                name: subgraph-hc
                port:
                  name: use-annotation

Deploy a Subgraph

kubectl port-forward service/graph-node-cluster-index 8020:8020
npx graph create your_org/your_subgraph --node http://127.0.0.1:8020
graph deploy --node http://127.0.0.1:8020 your_org/your_subgraph

ref:
https://github.com/graphprotocol/graph-cli

Here are also some useful commands for maintenance tasks:

kubectl logs -l app=graph-node-cluster-query -f | grep "query_time_ms"

# show total pool connections according to config file
graphman --config config.toml config pools graph-node-cluster-query-zone-a-78b467665d-tqw7g

# show info
graphman --config config.toml info QmbCm38nL6C7v3FS5snzjnQyDMvV8FkM72C3o5RCfGpM9P

# reassign a deployment to a index node
graphman --config config.toml reassign QmbCm38nL6C7v3FS5snzjnQyDMvV8FkM72C3o5RCfGpM9P graph-node-cluster-index-pending-0
graphman --config config.toml reassign QmbCm38nL6C7v3FS5snzjnQyDMvV8FkM72C3o5RCfGpM9P graph-node-cluster-index-0

# stop indexing a deployment
graphman --config config.toml unassign QmbCm38nL6C7v3FS5snzjnQyDMvV8FkM72C3o5RCfGpM9P

# remove unused deployments
graphman --config config.toml unused record
graphman --config config.toml unused remove -d QmdoXB4zJVD5n38omVezMaAGEwsubhKdivgUmpBCUxXKDh

ref:
https://github.com/graphprotocol/graph-node/blob/master/docs/graphman.md

Amazon EKS: Setup Cluster Autoscaler

2022-12-272022-12-27VintaDevOps, Web Development

The Kubernetes's Cluster Autoscaler automatically adjusts the number of nodes in your cluster when pods fail or are rescheduled onto other nodes. Here we're going to deploy the AWS implementation that implements the decisions of Cluster Autoscaler by communicating with AWS products and services such as Amazon EC2.

ref:
https://docs.aws.amazon.com/eks/latest/userguide/cluster-autoscaler.html

Configure IAM Permissions

If your existing node groups were created with eksctl create nodegroup --asg-access, then this policy already exists and you can skip this step.

in cluster-autoscaler-policy-staging.json

{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Sid": "VisualEditor0",
            "Effect": "Allow",
            "Action": [
                "autoscaling:SetDesiredCapacity",
                "autoscaling:TerminateInstanceInAutoScalingGroup"
            ],
            "Resource": "*",
            "Condition": {
                "StringEquals": {
                    "aws:ResourceTag/k8s.io/cluster-autoscaler/perp-staging": "owned"
                }
            }
        },
        {
            "Sid": "VisualEditor1",
            "Effect": "Allow",
            "Action": [
                "autoscaling:DescribeAutoScalingGroups",
                "autoscaling:DescribeAutoScalingInstances",
                "autoscaling:DescribeLaunchConfigurations",
                "autoscaling:DescribeTags",
                "autoscaling:SetDesiredCapacity",
                "autoscaling:TerminateInstanceInAutoScalingGroup",
                "ec2:DescribeLaunchTemplateVersions"
            ],
            "Resource": "*"
        }
    ]
}

aws --profile perp iam create-policy \
  --policy-name AmazonEKSClusterAutoscalerPolicyStaging \
  --policy-document file://cluster-autoscaler-policy-staging.json

eksctl --profile=perp create iamserviceaccount \
  --cluster=perp-staging \
  --namespace=kube-system \
  --name=cluster-autoscaler \
  --attach-policy-arn=arn:aws:iam::xxx:policy/AmazonEKSClusterAutoscalerPolicy \
  --override-existing-serviceaccounts \
  --approve

ref:
https://docs.aws.amazon.com/eks/latest/userguide/autoscaling.html

Deploy Cluster Autoscaler

Download the deployment yaml of cluster-autoscaler:

curl -o cluster-autoscaler-autodiscover.yaml \
https://raw.githubusercontent.com/kubernetes/autoscaler/master/cluster-autoscaler/cloudprovider/aws/examples/cluster-autoscaler-autodiscover.yaml

Before you apply the file, it's recommended to check whether the version of cluster-autoscaler matches the Kubernetes major and minor version of your cluster. Find the version number on GitHub releases.

kubectl apply -f cluster-autoscaler-autodiscover.yaml
# or
kubectl set image deployment cluster-autoscaler \
  -n kube-system \
  cluster-autoscaler=k8s.gcr.io/autoscaling/cluster-autoscaler:v1.21.3

Do some tweaks:

--balance-similar-node-groups ensures that there is enough available compute across all availability zones.
--skip-nodes-with-system-pods=false ensures that there are no problems with scaling to zero.

kubectl patch deployment cluster-autoscaler \
  -n kube-system \
  -p '{"spec":{"template":{"metadata":{"annotations":{"cluster-autoscaler.kubernetes.io/safe-to-evict": "false"}}}}}'

kubectl -n kube-system edit deployment.apps/cluster-autoscaler
# change the command to the following:
# spec:
#   containers:
#   - command
#     - ./cluster-autoscaler
#     - --v=4
#     - --stderrthreshold=info
#     - --cloud-provider=aws
#     - --skip-nodes-with-local-storage=false
#     - --expander=least-waste
#     - --node-group-auto-discovery=asg:tag=k8s.io/cluster-autoscaler/enabled,k8s.io/cluster-autoscaler/perp-staging
#     - --balance-similar-node-groups
#     - --skip-nodes-with-system-pods=false

ref:
https://aws.github.io/aws-eks-best-practices/cluster-autoscaling/
https://github.com/kubernetes/autoscaler/tree/master/charts/cluster-autoscaler#additional-configuration

Amazon EKS: Setup kubernetes-external-secrets with AWS Secret Manager

2021-02-212021-03-03VintaDevOps, Web Development

kubernetes-external-secrets allows you to use external secret management systems, like AWS Secrets Manager, to securely add Secrets in Kubernetes, so Pods can access Secrets normally.

ref:
https://github.com/external-secrets/kubernetes-external-secrets

AWS Secret Manager

For instance, we create a secret named YOUR_SECRET on AWS Secret Manager in the same region as our EKS cluster, using DefaultEncryptionKey as the encryption key. The content of the secret entity look like:

{
  "KEY_1": "VALUE_1",
  "KEY_2": "VALUE_2",
}

We can retrieve the secret value:

aws secretsmanager get-secret-value --profile=perp \
--region ap-northeast-1 \
--secret-id YOUR_SECRET

kubernetes-external-secrets

For kubernetes-external-secrets to work properly, it must be granted access to AWS Secrets Manager. To achieve that, we need to create an IAM role for kubernetes-external-secrets' service account.

ref:
https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html

Configure Secrets Backends

Create an IAM OIDC provider for the cluster:

eksctl utils associate-iam-oidc-provider --profile=perp \
--region ap-northeast-1 \
--cluster perp-staging \
--approve

aws iam list-open-id-connect-providers --profile=perp

ref:
https://docs.aws.amazon.com/eks/latest/userguide/enable-iam-roles-for-service-accounts.html

Create an IAM policy that allows the role to access all secrets we created on AWS Secret Manager:

AWS_ACCOUNT_ID=$(aws sts get-caller-identity --profile=perp --query "Account" --output text)

cat <<EOF > policy.json
{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Action": [
        "secretsmanager:GetResourcePolicy",
        "secretsmanager:GetSecretValue",
        "secretsmanager:DescribeSecret",
        "secretsmanager:ListSecretVersionIds"
      ],
      "Resource": [
        "arn:aws:secretsmanager:ap-northeast-1:${AWS_ACCOUNT_ID}:secret:*"
      ]
    }
  ]
}
EOF

aws iam create-policy --profile=perp \
--policy-name perp-staging-secrets-policy --policy-document file://policy.json

Attach the above IAM policy to an IAM role, and define AssumeRole for the service account external-secrets-kubernetes-external-secrets which will be created later:

AWS_ACCOUNT_ID=$(aws sts get-caller-identity --profile=perp --query "Account" --output text)
OIDC_PROVIDER=$(aws eks describe-cluster --profile=perp --name perp-staging --region ap-northeast-1 --query "cluster.identity.oidc.issuer" --output text | sed -e "s/^https:\/\///") 

cat <<EOF > trust.json
{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Principal": {
        "Federated": "arn:aws:iam::${AWS_ACCOUNT_ID}:oidc-provider/${OIDC_PROVIDER}"
      },
      "Action": "sts:AssumeRoleWithWebIdentity",
      "Condition": {
        "StringEquals": {
          "${OIDC_PROVIDER}:aud": "sts.amazonaws.com",
          "${OIDC_PROVIDER}:sub": "system:serviceaccount:default:external-secrets-kubernetes-external-secrets"
        }
      }
    }
  ]
}
EOF

aws iam create-role --profile=perp \
--role-name perp-staging-secrets-role \
--assume-role-policy-document file://trust.json

aws iam attach-role-policy --profile=perp \
--role-name perp-staging-secrets-role \
--policy-arn YOUR_POLICY_ARN

ref:
https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html
https://gist.github.com/lukaszbudnik/f1f42bd5a57430e3c25034200ba44c2e

Deploy kubernetes-external-secrets Controller

helm repo add external-secrets https://external-secrets.github.io/kubernetes-external-secrets/

helm install external-secrets \
external-secrets/kubernetes-external-secrets \
--skip-crds \
--set env.AWS_REGION=ap-northeast-1 \
--set securityContext.fsGroup=65534 \
--set serviceAccount.annotations."eks\.amazonaws\.com/role-arn"='YOUR_ROLE_ARN'

helm list

It would automatically create a service account named external-secrets-kubernetes-external-secrets in Kubernetes.

ref:
https://github.com/external-secrets/kubernetes-external-secrets/tree/master/charts/kubernetes-external-secrets

Deploy ExternalSecret

ExternalSecret app-secrets will generate a Secret object with the same name, and the content would look like:

apiVersion: kubernetes-client.io/v1
kind: ExternalSecret
metadata:
  name: example-secret
spec:
  backendType: secretsManager
  region: ap-northeast-1
  dataFrom:
    - YOUR_SECRET
---
apiVersion: apps/v1
kind: Deployment
metadata:
  name: example-app
spec:
  replicas: 3
  selector:
    matchLabels:
      app: example-app
  template:
    metadata:
      labels:
        app: example-app
    spec:
      containers:
      - name: example-app
        image: busybox:latest
        envFrom:
        - secretRef:
            name: example-secret

kubectl get secret example-secret -o jsonpath="{.data.KEY_1}" | base64 --decode

ref:
https://gist.github.com/lukaszbudnik/f1f42bd5a57430e3c25034200ba44c2e

Amazon EKS: Create a Kubernetes cluster via ClusterConfig

2021-02-202021-03-02VintaDevOps, Web Development

Amazon Elastic Kubernetes Service (Amazon EKS) is a managed Kubernetes as a Service on AWS. IMAO, Google Cloud's GKE is still the best choice of managed Kubernetes service if you're not stuck in AWS.

ref:
https://docs.aws.amazon.com/eks/latest/userguide/eks-ug.pdf
https://docs.aws.amazon.com/eks/latest/userguide/getting-started.html

Also see:
https://vinta.ws/code/the-complete-guide-to-google-kubernetes-engine-gke.html

Installation

We need to install some command-line tools: aws, eksctl and kubectl.

brew tap weaveworks/tap
brew install awscli weaveworks/tap/eksctl kubernetes-cli

k9s and fubectl are also recommended which provides fancy terminal UIs to interact with your Kubernetes clusters.

brew install k9s

curl -LO https://rawgit.com/kubermatic/fubectl/master/fubectl.source
source <path-to>/fubectl.source

ref:
https://github.com/derailed/k9s
https://github.com/kubermatic/fubectl

Create Cluster

We use a ClusterConfig to define our cluster.

apiVersion: eksctl.io/v1alpha5
kind: ClusterConfig
metadata:
  name: perp-staging
  region: ap-northeast-1
# All workloads in the "fargate" Kubernetes namespace will be scheduled onto Fargate
fargateProfiles:
  - name: fp-default
    selectors:
      - namespace: fargate
# https://eksctl.io/usage/schema/
managedNodeGroups:
  - name: managed-ng-m5-4xlarge
    instanceType: m5.4xlarge
    instancePrefix: m5-4xlarge
    minSize: 1
    maxSize: 5
    desiredCapacity: 3
    volumeSize: 100
    iam:
      withAddonPolicies:
        cloudWatch: true
        albIngress: true
        ebs: true
        efs: true
# Enable envelope encryption for Kubernetes Secrets
secretsEncryption:
  keyARN: "arn:aws:kms:YOUR_KMS_ARN"
# Enable CloudWatch logging for Kubernetes components
cloudWatch:
  clusterLogging:
    enableTypes: ["*"]

Create the cluster.

eksctl create cluster --profile=perp -f clusterconfig.yaml

ref:
https://eksctl.io/usage/creating-and-managing-clusters/
https://github.com/weaveworks/eksctl/tree/master/examples

We can also use the same config file to update our cluster, but not all configurations are supported currently.

eksctl upgrade cluster --profile=perp -f clusterconfig.yaml

Access Cluster

aws eks --profile=perp update-kubeconfig \
--region ap-northeast-1 \
--name perp-staging \
--alias vinta@perp-staging

ref:
https://docs.aws.amazon.com/eks/latest/userguide/create-kubeconfig.html

Delete Cluster

# You might need to manually delete/detach following resources first:
# Detach non-default policies for FargatePodExecutionRole and NodeInstanceRole
# Fargate Profile
# EC2 Network Interfaces 
# EC2 ALB
eksctl delete cluster --profile=perp \
--region ap-northeast-1 \
--name perp-staging

Then you can delete the CloudFormation stack on AWS Management Console.

Cluster Authentication

kubectl get configmap aws-auth -n kube-system -o yaml

We must copy mapRoles from the above ConfigMap, and add the mapUsers section:

apiVersion: v1
kind: ConfigMap
metadata:
  name: aws-auth
  namespace: kube-system
data:
  # NOTE: mapRoles are copied from "kubectl get configmap aws-auth -n kube-system -o yaml"
  mapRoles: |
    - rolearn: YOUR_ARN_FargatePodExecutionRole
      username: system:node:{{SessionName}}
      groups:
      - system:bootstrappers
      - system:nodes
      - system:node-proxier
    - rolearn: YOUR_ARN_NodeInstanceRole
      username: system:node:{{EC2PrivateDNSName}}
      groups:
        - system:bootstrappers
        - system:nodes
  # Only IAM users listed here can access this cluster
  mapUsers: |
    - userarn: YOUR_USER_ARN
      username: YOUR_AWS_USERNAME
      groups:
        - system:masters

kubectl apply -f aws-auth.yaml
kubectl describe configmap -n kube-system aws-auth

ref:
https://docs.aws.amazon.com/eks/latest/userguide/add-user-role.html
https://aws.amazon.com/premiumsupport/knowledge-center/amazon-eks-cluster-access/

Setup Container Logging for Fargate Nodes

Create an IAM policy and attach the IAM policy to the pod execution role specified for your Fargate profile. The --role-name should be the name of FargatePodExecutionRole, you can find it under "Resources" tab in the CloudFormation stack of your EKS cluster.

curl -so permissions.json https://raw.githubusercontent.com/aws-samples/amazon-eks-fluent-logging-examples/mainline/examples/fargate/cloudwatchlogs/permissions.json

aws iam create-policy --profile=perp \
--policy-name eks-fargate-logging-policy \
--policy-document file://permissions.json

aws iam attach-role-policy --profile=perp \
--policy-arn arn:aws:iam::XXX:policy/eks-fargate-logging-policy \
--role-name eksctl-perp-staging-cluste-FargatePodExecutionRole-XXX

Configure Kubernetes to send container logs on Fargate nodes to CloudWatch via Fluent Bit.

kind: Namespace
apiVersion: v1
metadata:
  name: aws-observability
  labels:
    aws-observability: enabled
---
kind: ConfigMap
apiVersion: v1
metadata:
  name: aws-logging
  namespace: aws-observability
data:
  output.conf: |
    [OUTPUT]
        Name cloudwatch_logs
        Match *
        region ap-northeast-1
        log_group_name /aws/eks/perp-staging/containers
        log_stream_prefix fluent-bit-
        auto_create_group On

kubectl apply -f aws-logs-fargate.yaml

ref:
https://docs.aws.amazon.com/eks/latest/userguide/fargate-logging.html
https://docs.fluentbit.io/manual/pipeline/outputs/cloudwatch

Setup Container Logging for EC2 Nodes (CloudWatch Container Insights)

Deploy Fluent Bit as DaemonSet to send container logs to CloudWatch Logs.

ClusterName=perp-staging
RegionName=ap-northeast-1
FluentBitHttpPort='2020'
FluentBitReadFromHead='Off'
[[ ${FluentBitReadFromHead} = 'On' ]] && FluentBitReadFromTail='Off'|| FluentBitReadFromTail='On'
[[ -z ${FluentBitHttpPort} ]] && FluentBitHttpServer='Off' || FluentBitHttpServer='On'
curl -s https://raw.githubusercontent.com/aws-samples/amazon-cloudwatch-container-insights/latest/k8s-deployment-manifest-templates/deployment-mode/daemonset/container-insights-monitoring/quickstart/cwagent-fluent-bit-quickstart.yaml | sed 's/{{cluster_name}}/'${ClusterName}'/;s/{{region_name}}/'${RegionName}'/;s/{{http_server_toggle}}/"'${FluentBitHttpServer}'"/;s/{{http_server_port}}/"'${FluentBitHttpPort}'"/;s/{{read_from_head}}/"'${FluentBitReadFromHead}'"/;s/{{read_from_tail}}/"'${FluentBitReadFromTail}'"/' > aws-logs-ec2.yaml

kubectl apply -f aws-logs-ec2.yaml

ref:
https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/Container-Insights-setup-EKS-quickstart.html