Installing Karpenter
Karpenter automatically launches just the right compute resources to handle your cluster's applications. It is designed to let you take full advantage of the cloud with fast and simple compute provisioning for Kubernetes clusters. It is a replacement for the Cluster Autoscaler which has some issues in AWS
Updated - 13/12/2022 : The installation is now easier with the new Terraform module - see here for an example
This at the moment this example does not work on an acloudguru sandbox account. The supporting files can be found on Github
Deploy an EKS Cluster
1export CLUSTER_NAME="karpenter-demo"
2export AWS_DEFAULT_REGION="eu-west-1"
3export AWS_ACCOUNT_ID="$(aws sts get-caller-identity --query Account --output text)"
4
5eksctl create cluster -f - << EOF
6---
7apiVersion: eksctl.io/v1alpha5
8kind: ClusterConfig
9metadata:
10 name: ${CLUSTER_NAME}
11 region: ${AWS_DEFAULT_REGION}
12 version: "1.21"
13 tags:
14 karpenter.sh/discovery: ${CLUSTER_NAME}
15managedNodeGroups:
16 - instanceType: t3.small
17 name: ${CLUSTER_NAME}-ng
18 desiredCapacity: 1
19 minSize: 1
20 maxSize: 2
21iam:
22 withOIDC: true
23EOF
24
25aws eks update-kubeconfig --name ${CLUSTER_NAME} --region=${AWS_DEFAULT_REGION}
Install and setup Karpenter
1
2export CLUSTER_ENDPOINT="$(aws eks describe-cluster --name ${CLUSTER_NAME} --query "cluster.endpoint" --output text)"
3
4TEMPOUT=$(mktemp)
5
6curl -fsSL https://karpenter.sh/v0.6.3/getting-started/cloudformation.yaml > $TEMPOUT \
7&& aws cloudformation deploy \
8 --stack-name "Karpenter-${CLUSTER_NAME}" \
9 --template-file "${TEMPOUT}" \
10 --capabilities CAPABILITY_NAMED_IAM \
11 --parameter-overrides "ClusterName=${CLUSTER_NAME}"
12
13eksctl create iamidentitymapping \
14 --username system:node:{{EC2PrivateDNSName}} \
15 --cluster "${CLUSTER_NAME}" \
16 --arn "arn:aws:iam::${AWS_ACCOUNT_ID}:role/KarpenterNodeRole-${CLUSTER_NAME}" \
17 --group system:bootstrappers \
18 --group system:nodes
19
20 eksctl create iamserviceaccount \
21 --cluster "${CLUSTER_NAME}" --name karpenter --namespace karpenter \
22 --role-name "${CLUSTER_NAME}-karpenter" \
23 --attach-policy-arn "arn:aws:iam::${AWS_ACCOUNT_ID}:policy/KarpenterControllerPolicy-${CLUSTER_NAME}" \
24 --role-only \
25 --approve
26
27export KARPENTER_IAM_ROLE_ARN="arn:aws:iam::${AWS_ACCOUNT_ID}:role/${CLUSTER_NAME}-karpenter"
28
29aws iam create-service-linked-role --aws-service-name spot.amazonaws.com
30
31helm repo add karpenter https://charts.karpenter.sh/
32helm repo update
33
34helm upgrade --install --namespace karpenter --create-namespace \
35 karpenter karpenter/karpenter \
36 --version v0.6.3 \
37 --set serviceAccount.annotations."eks\.amazonaws\.com/role-arn"=${KARPENTER_IAM_ROLE_ARN} \
38 --set clusterName=${CLUSTER_NAME} \
39 --set clusterEndpoint=${CLUSTER_ENDPOINT} \
40 --set aws.defaultInstanceProfile=KarpenterNodeInstanceProfile-${CLUSTER_NAME} \
41 --wait # for the defaulting webhook to install before creating a Provisioner
42
43cat <<EOF | kubectl apply -f -
44apiVersion: karpenter.sh/v1alpha5
45kind: Provisioner
46metadata:
47 name: default
48spec:
49 requirements:
50 - key: karpenter.sh/capacity-type
51 operator: In
52 values: ["spot"]
53 limits:
54 resources:
55 cpu: 1000
56 provider:
57 subnetSelector:
58 karpenter.sh/discovery: ${CLUSTER_NAME}
59 securityGroupSelector:
60 karpenter.sh/discovery: ${CLUSTER_NAME}
61 ttlSecondsAfterEmpty: 30
62EOF
Deploy a test workload and watch the nodes deploy
1cat <<EOF | kubectl apply -f -
2apiVersion: apps/v1
3kind: Deployment
4metadata:
5 name: inflate
6spec:
7 replicas: 0
8 selector:
9 matchLabels:
10 app: inflate
11 template:
12 metadata:
13 labels:
14 app: inflate
15 spec:
16 terminationGracePeriodSeconds: 0
17 containers:
18 - name: inflate
19 image: public.ecr.aws/eks-distro/kubernetes/pause:3.2
20 resources:
21 requests:
22 cpu: 1
23EOF
24kubectl scale deployment inflate --replicas 5
25kubectl logs -f -n karpenter -l app.kubernetes.io/name=karpenter -c controller
1➜ eks git:(main) ✗ (⎈ |neil.armitage@amido.com@karpenter-demo.eu-west-1.eksctl.io:default) kubectl logs -f -n karpenter -l app.kubernetes.io/name=karpenter -c controller
22022-05-13T10:26:47.308Z INFO controller.controller.nodemetrics Starting workers {"commit": "fd19ba2", "reconciler group": "", "reconciler kind": "Node", "worker count": 1}
32022-05-13T10:26:47.308Z INFO controller.controller.provisioning Starting workers {"commit": "fd19ba2", "reconciler group": "karpenter.sh", "reconciler kind": "Provisioner", "worker count": 10}
42022-05-13T10:26:59.993Z INFO controller.provisioning Waiting for unschedulable pods {"commit": "fd19ba2", "provisioner": "default"}
52022-05-13T10:28:22.951Z INFO controller.provisioning Batched 5 pods in 1.176501626s {"commit": "fd19ba2", "provisioner": "default"}
62022-05-13T10:28:23.156Z INFO controller.provisioning Computed packing of 1 node(s) for 5 pod(s) with instance type option(s) [c1.xlarge c4.2xlarge c3.2xlarge c5d.2xlarge c6i.2xlarge c5a.2xlarge c5.2xlarge c6a.2xlarge c5ad.2xlarge c5n.2xlarge m3.2xlarge m5ad.2xlarge t3.2xlarge m5d.2xlarge m5.2xlarge m5zn.2xlarge m5n.2xlarge t3a.2xlarge m5dn.2xlarge m4.2xlarge] {"commit": "fd19ba2", "provisioner": "default"}
72022-05-13T10:28:25.448Z INFO controller.provisioning Launched instance: i-0addfe38342f0960a, hostname: ip-192-168-111-106.eu-west-1.compute.internal, type: t3.2xlarge, zone: eu-west-1c, capacityType: spot {"commit": "fd19ba2", "provisioner": "default"}
82022-05-13T10:28:25.515Z INFO controller.provisioning Bound 5 pod(s) to node ip-192-168-111-106.eu-west-1.compute.internal {"commit": "fd19ba2", "provisioner": "default"}
92022-05-13T10:28:25.515Z INFO controller.provisioning Waiting for unschedulable pods {"commit": "fd19ba2", "provisioner": "default"}
102022-05-13T10:28:26.517Z INFO controller.provisioning Batched 1 pods in 1.00078258s {"commit": "fd19ba2", "provisioner": "default"}
112022-05-13T10:28:26.519Z INFO controller.provisioning Waiting for unschedulable pods {"commit": "fd19ba2", "provisioner": "default"}
Scale the deployment down and watch the nodes terminate
1kubectl delete deployment inflate
2kubectl logs -f -n karpenter -l app.kubernetes.io/name=karpenter -c controller
1deployment.apps "inflate" deleted
22022-05-13T10:26:47.308Z INFO controller.controller.provisioning Starting workers {"commit": "fd19ba2", "reconciler group": "karpenter.sh", "reconciler kind": "Provisioner", "worker count": 10}
32022-05-13T10:26:59.993Z INFO controller.provisioning Waiting for unschedulable pods {"commit": "fd19ba2", "provisioner": "default"}
42022-05-13T10:28:22.951Z INFO controller.provisioning Batched 5 pods in 1.176501626s {"commit": "fd19ba2", "provisioner": "default"}
52022-05-13T10:28:23.156Z INFO controller.provisioning Computed packing of 1 node(s) for 5 pod(s) with instance type option(s) [c1.xlarge c4.2xlarge c3.2xlarge c5d.2xlarge c6i.2xlarge c5a.2xlarge c5.2xlarge c6a.2xlarge c5ad.2xlarge c5n.2xlarge m3.2xlarge m5ad.2xlarge t3.2xlarge m5d.2xlarge m5.2xlarge m5zn.2xlarge m5n.2xlarge t3a.2xlarge m5dn.2xlarge m4.2xlarge] {"commit": "fd19ba2", "provisioner": "default"}
62022-05-13T10:28:25.448Z INFO controller.provisioning Launched instance: i-0addfe38342f0960a, hostname: ip-192-168-111-106.eu-west-1.compute.internal, type: t3.2xlarge, zone: eu-west-1c, capacityType: spot {"commit": "fd19ba2", "provisioner": "default"}
72022-05-13T10:28:25.515Z INFO controller.provisioning Bound 5 pod(s) to node ip-192-168-111-106.eu-west-1.compute.internal {"commit": "fd19ba2", "provisioner": "default"}
82022-05-13T10:28:25.515Z INFO controller.provisioning Waiting for unschedulable pods {"commit": "fd19ba2", "provisioner": "default"}
92022-05-13T10:28:26.517Z INFO controller.provisioning Batched 1 pods in 1.00078258s {"commit": "fd19ba2", "provisioner": "default"}
102022-05-13T10:28:26.519Z INFO controller.provisioning Waiting for unschedulable pods {"commit": "fd19ba2", "provisioner": "default"}
112022-05-13T10:31:51.183Z INFO controller.node Added TTL to empty node {"commit": "fd19ba2", "node": "ip-192-168-111-106.eu-west-1.compute.internal"}
122022-05-13T10:32:21.001Z INFO controller.node Triggering termination after 30s for empty node {"commit": "fd19ba2", "node": "ip-192-168-111-106.eu-west-1.compute.internal"}
132022-05-13T10:32:21.031Z INFO controller.termination Cordoned node {"commit": "fd19ba2", "node": "ip-192-168-111-106.eu-west-1.compute.internal"}
142022-05-13T10:32:21.239Z INFO controller.termination Deleted node {"commit": "fd19ba2", "node": "ip-192-168-111-106.eu-west-1.compute.internal"}
Tidy up the cluster
1helm uninstall karpenter --namespace karpenter
2aws iam delete-role --role-name "${CLUSTER_NAME}-karpenter"
3aws cloudformation delete-stack --stack-name "Karpenter-${CLUSTER_NAME}"
4aws ec2 describe-launch-templates \
5 | jq -r ".LaunchTemplates[].LaunchTemplateName" \
6 | grep -i "Karpenter-${CLUSTER_NAME}" \
7 | xargs -I{} aws ec2 delete-launch-template --launch-template-name {}
8eksctl delete cluster --name "${CLUSTER_NAME}"
comments powered by Disqus