smore stuff
Signed-off-by: Frank Villaro-Dixon <frank.villarodixon@merkle.com>
This commit is contained in:
parent
b604483aef
commit
0fe539c32f
1 changed files with 149 additions and 30 deletions
|
@ -27,12 +27,15 @@ import (
|
||||||
|
|
||||||
batch "k8s.io/api/batch/v1"
|
batch "k8s.io/api/batch/v1"
|
||||||
coordination "k8s.io/api/coordination/v1"
|
coordination "k8s.io/api/coordination/v1"
|
||||||
core "k8s.io/api/core/v1"
|
corev1 "k8s.io/api/core/v1"
|
||||||
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||||
"k8s.io/apimachinery/pkg/runtime"
|
"k8s.io/apimachinery/pkg/runtime"
|
||||||
ctrl "sigs.k8s.io/controller-runtime"
|
ctrl "sigs.k8s.io/controller-runtime"
|
||||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||||
"sigs.k8s.io/controller-runtime/pkg/log"
|
"sigs.k8s.io/controller-runtime/pkg/log"
|
||||||
|
|
||||||
|
"github.com/google/uuid"
|
||||||
|
|
||||||
anesthesiav1alpha1 "gitlab.k3s.fr/k8s/kube-anesthesia-operator/api/v1alpha1"
|
anesthesiav1alpha1 "gitlab.k3s.fr/k8s/kube-anesthesia-operator/api/v1alpha1"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -48,10 +51,12 @@ var (
|
||||||
deadLeaseTimedelta = 1 * time.Minute
|
deadLeaseTimedelta = 1 * time.Minute
|
||||||
defaultCidrV4 = "/24"
|
defaultCidrV4 = "/24"
|
||||||
defaultCidrV6 = "/56"
|
defaultCidrV6 = "/56"
|
||||||
|
wolImage = "registry.k3s.fr/k8s/docker-wakeonlan-image/main:latest"
|
||||||
|
namespace = "default"
|
||||||
)
|
)
|
||||||
|
|
||||||
func (r *AnesthesiaNodeReconciler) getV1Node(an *anesthesiav1alpha1.AnesthesiaNode) (*core.Node, error) {
|
func (r *AnesthesiaNodeReconciler) getV1Node(an *anesthesiav1alpha1.AnesthesiaNode) (*corev1.Node, error) {
|
||||||
var node core.Node
|
var node corev1.Node
|
||||||
nodeName := client.ObjectKey{
|
nodeName := client.ObjectKey{
|
||||||
Name: an.ObjectMeta.Name,
|
Name: an.ObjectMeta.Name,
|
||||||
Namespace: "",
|
Namespace: "",
|
||||||
|
@ -63,7 +68,7 @@ func (r *AnesthesiaNodeReconciler) getV1Node(an *anesthesiav1alpha1.AnesthesiaNo
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *AnesthesiaNodeReconciler) lastHeartbeat(n *core.Node) (*time.Time, error) {
|
func (r *AnesthesiaNodeReconciler) lastHeartbeat(n *corev1.Node) (*time.Time, error) {
|
||||||
var lease coordination.Lease
|
var lease coordination.Lease
|
||||||
leaseName := client.ObjectKey{
|
leaseName := client.ObjectKey{
|
||||||
Name: n.Name,
|
Name: n.Name,
|
||||||
|
@ -88,11 +93,50 @@ func isNodeAlive(lastLeaseTime *time.Time) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func s(s string) *string {
|
||||||
|
return &s
|
||||||
|
}
|
||||||
|
|
||||||
|
func jobLabels(node *anesthesiav1alpha1.AnesthesiaNode) map[string]string {
|
||||||
|
return map[string]string{
|
||||||
|
"anesthesia.k3s.fr/node": node.Name,
|
||||||
|
"anesthesia.k3s.fr/type": "wakeup",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *AnesthesiaNodeReconciler) WolJob(runnerNode *corev1.Node, wakeupNode *anesthesiav1alpha1.AnesthesiaNode) *batch.Job {
|
||||||
|
randid := uuid.New().String()[0:6]
|
||||||
|
|
||||||
|
job := &batch.Job{
|
||||||
|
ObjectMeta: metav1.ObjectMeta{
|
||||||
|
Name: "wakeup-" + wakeupNode.Name + "-" + randid,
|
||||||
|
Namespace: namespace,
|
||||||
|
Labels: jobLabels(wakeupNode),
|
||||||
|
},
|
||||||
|
Spec: batch.JobSpec{
|
||||||
|
Template: corev1.PodTemplateSpec{
|
||||||
|
Spec: corev1.PodSpec{
|
||||||
|
NodeName: runnerNode.Name,
|
||||||
|
RestartPolicy: "OnFailure",
|
||||||
|
Containers: []corev1.Container{
|
||||||
|
{
|
||||||
|
Image: wolImage,
|
||||||
|
Name: "wakeup",
|
||||||
|
Command: []string{"sh", "-c", "for i in `seq 20`; do wol wake " + wakeupNode.Spec.Wakeup.Wol.MacAddr + "; sleep 2; done"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
return job
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
// Ensures that there is a WOL pod running on a targetNode (that is on the same subnet)
|
// Ensures that there is a WOL pod running on a targetNode (that is on the same subnet)
|
||||||
func (r *AnesthesiaNodeReconciler) ensureWolJob(an *anesthesiav1alpha1.AnesthesiaNode, targetNode string) error {
|
func (r *AnesthesiaNodeReconciler) ensureWolJob(job *batch.Job) error {
|
||||||
job := batch.Job{}
|
err := r.Create(context.TODO(), job)
|
||||||
_ = job
|
return err
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns true if both IPs are on the same /cidr subnet
|
// Returns true if both IPs are on the same /cidr subnet
|
||||||
|
@ -113,10 +157,10 @@ func areIpsOnSameNet(ip1 string, ip2 string, cidr string) bool {
|
||||||
return ip1net.IP.Equal(ip2net.IP)
|
return ip1net.IP.Equal(ip2net.IP)
|
||||||
}
|
}
|
||||||
|
|
||||||
func areNodesOnSameL2(n1 *core.Node, n2 *core.Node) bool {
|
func areNodesOnSameL2(n1 *corev1.Node, n2 *corev1.Node) bool {
|
||||||
// This is O(n^2) but I don't care one bit
|
// This is O(n^2) but I don't care one bit
|
||||||
for _, a1 := range n1.Status.Addresses {
|
for _, a1 := range n1.Status.Addresses {
|
||||||
if a1.Type == core.NodeHostName {
|
if a1.Type == corev1.NodeHostName {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -144,9 +188,9 @@ func areNodesOnSameL2(n1 *core.Node, n2 *core.Node) bool {
|
||||||
|
|
||||||
// Returns all the nodes of the cluster that are on the same L2 than node.
|
// Returns all the nodes of the cluster that are on the same L2 than node.
|
||||||
// +kubebuilder:rbac:groups=v1,resources=nodes,verbs=get;list
|
// +kubebuilder:rbac:groups=v1,resources=nodes,verbs=get;list
|
||||||
func (r *AnesthesiaNodeReconciler) findNodesOnSameL2(n *core.Node) ([]core.Node, error) {
|
func (r *AnesthesiaNodeReconciler) findNodesOnSameL2(n *corev1.Node) ([]*corev1.Node, error) {
|
||||||
var allNodes core.NodeList
|
var allNodes corev1.NodeList
|
||||||
var sameL2Nodes []core.Node
|
var sameL2Nodes []*corev1.Node
|
||||||
|
|
||||||
err := r.List(context.TODO(), &allNodes)
|
err := r.List(context.TODO(), &allNodes)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -159,12 +203,70 @@ func (r *AnesthesiaNodeReconciler) findNodesOnSameL2(n *core.Node) ([]core.Node,
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if areNodesOnSameL2(n, &nn) {
|
if areNodesOnSameL2(n, &nn) {
|
||||||
sameL2Nodes = append(sameL2Nodes, nn)
|
sameL2Nodes = append(sameL2Nodes, &nn)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return sameL2Nodes, nil
|
return sameL2Nodes, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getCandidateNodeFromList(nodes []*corev1.Node) *corev1.Node {
|
||||||
|
// XXX FIXME this is PoC-style level
|
||||||
|
|
||||||
|
for _, node := range nodes {
|
||||||
|
// Check that the node is running
|
||||||
|
return node
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// XXX kubebuilder
|
||||||
|
func (r *AnesthesiaNodeReconciler) listWakeupJobs(wakeupNode *anesthesiav1alpha1.AnesthesiaNode) (*batch.JobList, error) {
|
||||||
|
nodeJobLabels := jobLabels(wakeupNode)
|
||||||
|
var nodeJobs batch.JobList
|
||||||
|
|
||||||
|
err := r.List(context.TODO(), &nodeJobs, client.InNamespace(namespace), client.MatchingLabels(nodeJobLabels))
|
||||||
|
return &nodeJobs, err
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *AnesthesiaNodeReconciler) areRunningWakeupJobs(wakeupNode *anesthesiav1alpha1.AnesthesiaNode) (bool, error) {
|
||||||
|
nodeJobs, err := r.listWakeupJobs(wakeupNode)
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, job := range nodeJobs.Items {
|
||||||
|
if job.Status.Active > 0 {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false, nil
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *AnesthesiaNodeReconciler) removeOldJobs(wakeupNode *anesthesiav1alpha1.AnesthesiaNode) error {
|
||||||
|
|
||||||
|
nodeJobs, err := r.listWakeupJobs(wakeupNode)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("Could not list wakeupJobs: %v\n", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, job := range nodeJobs.Items {
|
||||||
|
if job.Status.Succeeded >= 1 {
|
||||||
|
fmt.Printf("Will remove the fucking job %v\n", job)
|
||||||
|
err = r.Delete(context.TODO(), &job)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("Could not delete fucking job ! %v\n", err)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
//+kubebuilder:rbac:groups=anesthesia.k3s.fr,resources=anesthesianodes,verbs=get;list;watch;create;update;patch;delete
|
//+kubebuilder:rbac:groups=anesthesia.k3s.fr,resources=anesthesianodes,verbs=get;list;watch;create;update;patch;delete
|
||||||
//+kubebuilder:rbac:groups=anesthesia.k3s.fr,resources=anesthesianodes/status,verbs=get;update;patch
|
//+kubebuilder:rbac:groups=anesthesia.k3s.fr,resources=anesthesianodes/status,verbs=get;update;patch
|
||||||
//+kubebuilder:rbac:groups=anesthesia.k3s.fr,resources=anesthesianodes/finalizers,verbs=update
|
//+kubebuilder:rbac:groups=anesthesia.k3s.fr,resources=anesthesianodes/finalizers,verbs=update
|
||||||
|
@ -183,7 +285,7 @@ func (r *AnesthesiaNodeReconciler) Reconcile(ctx context.Context, req ctrl.Reque
|
||||||
|
|
||||||
var node anesthesiav1alpha1.AnesthesiaNode
|
var node anesthesiav1alpha1.AnesthesiaNode
|
||||||
|
|
||||||
if req.Name != "think03" && req.Name != "think02" {
|
if req.Name != "think02" {
|
||||||
return ctrl.Result{}, nil
|
return ctrl.Result{}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -207,31 +309,50 @@ func (r *AnesthesiaNodeReconciler) Reconcile(ctx context.Context, req ctrl.Reque
|
||||||
nodeAlive := isNodeAlive(renewTime)
|
nodeAlive := isNodeAlive(renewTime)
|
||||||
|
|
||||||
var newActualState string
|
var newActualState string
|
||||||
if node.Spec.State == "on" && nodeAlive {
|
requeue := 2 * time.Second
|
||||||
|
if node.Spec.State == "on" && nodeAlive { // FIXME
|
||||||
newActualState = "running"
|
newActualState = "running"
|
||||||
|
err = r.removeOldJobs(&node)
|
||||||
|
if err != nil {
|
||||||
|
log.Error(err, "Couldn't remove old jobs")
|
||||||
|
return ctrl.Result{}, err
|
||||||
|
}
|
||||||
|
|
||||||
} else if node.Spec.State == "on" && !nodeAlive {
|
} else if node.Spec.State == "on" && !nodeAlive { // FIXME
|
||||||
newActualState = "starting"
|
newActualState = "starting"
|
||||||
|
|
||||||
nodes, err := r.findNodesOnSameL2(v1node)
|
runningJobs, err := r.areRunningWakeupJobs(&node)
|
||||||
|
if err != nil {
|
||||||
|
log.Error(err, "Could not get running wakeup jobs")
|
||||||
|
return ctrl.Result{}, err
|
||||||
|
}
|
||||||
|
if runningJobs {
|
||||||
|
log.Info("Wakeup job still running")
|
||||||
|
return ctrl.Result{RequeueAfter: time.Minute}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
l2Nodes, err := r.findNodesOnSameL2(v1node)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error(err, "Could not find nodes on same L2")
|
log.Error(err, "Could not find nodes on same L2")
|
||||||
|
return ctrl.Result{}, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// XXX here we should filter for running nodes
|
// XXX here we should filter for running nodes
|
||||||
if len(nodes) == 0 {
|
// i.e. we should have at least 1 running node before suspending it
|
||||||
|
if len(l2Nodes) == 0 {
|
||||||
log.Info("No nodes on the same L2 as our node")
|
log.Info("No nodes on the same L2 as our node")
|
||||||
return ctrl.Result{RequeueAfter: time.Minute}, nil
|
return ctrl.Result{RequeueAfter: time.Minute}, nil
|
||||||
}
|
}
|
||||||
var allNames []string
|
|
||||||
for _, n := range nodes {
|
candidateNode := getCandidateNodeFromList(l2Nodes)
|
||||||
allNames = append(allNames, n.Name)
|
log.Info("Candidate node: " + candidateNode.Name)
|
||||||
|
woljob := r.WolJob(candidateNode, &node)
|
||||||
|
err = r.ensureWolJob(woljob)
|
||||||
|
if err != nil {
|
||||||
|
log.Error(err, "Could not ensure WOL Job")
|
||||||
|
return ctrl.Result{RequeueAfter: 2 * time.Minute}, err
|
||||||
}
|
}
|
||||||
|
requeue = 2 * time.Minute
|
||||||
fmt.Printf("Sames nodes on same L2 as %v: %v", v1node.Name, allNames)
|
|
||||||
|
|
||||||
//r.ensureWolJob(&node, "TODO")
|
|
||||||
// TODO
|
|
||||||
|
|
||||||
} else if node.Spec.State == "off" && nodeAlive {
|
} else if node.Spec.State == "off" && nodeAlive {
|
||||||
newActualState = "shutting-down"
|
newActualState = "shutting-down"
|
||||||
|
@ -254,11 +375,9 @@ func (r *AnesthesiaNodeReconciler) Reconcile(ctx context.Context, req ctrl.Reque
|
||||||
r.Status().Update(ctx, &node)
|
r.Status().Update(ctx, &node)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Info(fmt.Sprintf("The last renew time of %v was %v", req.Name, renewTime))
|
|
||||||
|
|
||||||
// TODO(user): your logic here
|
// TODO(user): your logic here
|
||||||
|
|
||||||
return ctrl.Result{RequeueAfter: 2 * time.Second}, nil
|
return ctrl.Result{RequeueAfter: requeue}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// SetupWithManager sets up the controller with the Manager.
|
// SetupWithManager sets up the controller with the Manager.
|
||||||
|
|
Loading…
Reference in a new issue