1156 lines
35 KiB
Go
1156 lines
35 KiB
Go
// Copyright (c) HashiCorp, Inc.
|
|
// SPDX-License-Identifier: MPL-2.0
|
|
|
|
package hello
|
|
|
|
import (
|
|
"context"
|
|
"crypto/md5"
|
|
// "encoding/base64"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/hashicorp/consul-template/signals"
|
|
"github.com/hashicorp/go-hclog"
|
|
"github.com/hashicorp/nomad/drivers/shared/eventer"
|
|
"github.com/hashicorp/nomad/drivers/shared/executor"
|
|
"github.com/hashicorp/nomad/plugins/base"
|
|
"github.com/hashicorp/nomad/plugins/drivers"
|
|
"github.com/hashicorp/nomad/plugins/shared/hclspec"
|
|
"github.com/hashicorp/nomad/plugins/shared/structs"
|
|
)
|
|
|
|
const (
|
|
// pluginName is the name of the plugin
|
|
// this is used for logging and (along with the version) for uniquely
|
|
// identifying plugin binaries fingerprinted by the client
|
|
pluginName = "hello-world-example"
|
|
|
|
// pluginVersion allows the client to identify and use newer versions of
|
|
// an installed plugin
|
|
pluginVersion = "v0.1.0"
|
|
|
|
// fingerprintPeriod is the interval at which the plugin will send
|
|
// fingerprint responses
|
|
fingerprintPeriod = 30 * time.Second
|
|
|
|
// taskHandleVersion is the version of task handle which this plugin sets
|
|
// and understands how to decode
|
|
// this is used to allow modification and migration of the task schema
|
|
// used by the plugin
|
|
taskHandleVersion = 1
|
|
|
|
// TODO: should these be made configurable?
|
|
tapDeviceBaseName = "miragetap"
|
|
iptablesChainBase = "MIRAGE"
|
|
|
|
// TODO: make this configurable
|
|
// addresses in the bridge are <bridgeRangeHi>.<bridgeRangeLo>.0.0/16
|
|
bridgeRangeHi = 10
|
|
bridgeRangeLo = 99
|
|
)
|
|
|
|
var (
|
|
// pluginInfo describes the plugin
|
|
pluginInfo = &base.PluginInfoResponse{
|
|
Type: base.PluginTypeDriver,
|
|
PluginApiVersions: []string{drivers.ApiVersion010},
|
|
PluginVersion: pluginVersion,
|
|
Name: pluginName,
|
|
}
|
|
|
|
// configSpec is the specification of the plugin's configuration
|
|
// this is used to validate the configuration specified for the plugin
|
|
// on the client.
|
|
// this is not global, but can be specified on a per-client basis.
|
|
configSpec = hclspec.NewObject(map[string]*hclspec.Spec{
|
|
// TODO: define plugin's agent configuration schema.
|
|
//
|
|
// The schema should be defined using HCL specs and it will be used to
|
|
// validate the agent configuration provided by the user in the
|
|
// `plugin` stanza (https://www.nomadproject.io/docs/configuration/plugin.html).
|
|
//
|
|
// For example, for the schema below a valid configuration would be:
|
|
//
|
|
// plugin "hello-driver-plugin" {
|
|
// config {
|
|
// shell = "fish"
|
|
// }
|
|
// }
|
|
"bridge": hclspec.NewDefault(
|
|
hclspec.NewAttr("bridge", "string", false),
|
|
hclspec.NewLiteral(`"mirage"`),
|
|
),
|
|
})
|
|
|
|
// taskConfigSpec is the specification of the plugin's configuration for
|
|
// a task
|
|
// this is used to validated the configuration specified for the plugin
|
|
// when a job is submitted.
|
|
taskConfigSpec = hclspec.NewObject(map[string]*hclspec.Spec{
|
|
// TODO: define plugin's task configuration schema
|
|
//
|
|
// The schema should be defined using HCL specs and it will be used to
|
|
// validate the task configuration provided by the user when they
|
|
// submit a job.
|
|
//
|
|
// For example, for the schema below a valid task would be:
|
|
// job "example" {
|
|
// group "example" {
|
|
// task "say-hi" {
|
|
// driver = "hello-driver-plugin"
|
|
// config {
|
|
// greeting = "Hi"
|
|
// }
|
|
// }
|
|
// }
|
|
// }
|
|
"unikernel": hclspec.NewDefault(
|
|
hclspec.NewAttr("unikernel", "string", true),
|
|
hclspec.NewLiteral(`""`),
|
|
),
|
|
})
|
|
|
|
// capabilities indicates what optional features this driver supports
|
|
// this should be set according to the target run time.
|
|
capabilities = &drivers.Capabilities{
|
|
// TODO: set plugin's capabilities
|
|
//
|
|
// The plugin's capabilities signal Nomad which extra functionalities
|
|
// are supported. For a list of available options check the docs page:
|
|
// https://godoc.org/github.com/hashicorp/nomad/plugins/drivers#Capabilities
|
|
SendSignals: true,
|
|
Exec: false,
|
|
FSIsolation: drivers.FSIsolationNone,
|
|
NetIsolationModes: []drivers.NetIsolationMode{
|
|
drivers.NetIsolationModeTask,
|
|
},
|
|
MountConfigs: drivers.MountConfigSupportNone,
|
|
}
|
|
)
|
|
|
|
// Config contains configuration information for the plugin
|
|
type Config struct {
|
|
// TODO: create decoded plugin configuration struct
|
|
//
|
|
// This struct is the decoded version of the schema defined in the
|
|
// configSpec variable above. It's used to convert the HCL configuration
|
|
// passed by the Nomad agent into Go contructs.
|
|
Bridge string `codec:"bridge"`
|
|
}
|
|
|
|
// TaskConfig contains configuration information for a task that runs with
|
|
// this plugin
|
|
type TaskConfig struct {
|
|
// TODO: create decoded plugin task configuration struct
|
|
//
|
|
// This struct is the decoded version of the schema defined in the
|
|
// taskConfigSpec variable above. It's used to convert the string
|
|
// configuration for the task into Go contructs.
|
|
Unikernel string `codec:"unikernel"`
|
|
}
|
|
|
|
// TaskState is the runtime state which is encoded in the handle returned to
|
|
// Nomad client.
|
|
// This information is needed to rebuild the task state and handler during
|
|
// recovery.
|
|
type TaskState struct {
|
|
ReattachConfig *structs.ReattachConfig
|
|
TaskConfig *drivers.TaskConfig
|
|
StartedAt time.Time
|
|
|
|
// TODO: add any extra important values that must be persisted in order
|
|
// to restore a task.
|
|
//
|
|
// The plugin keeps track of its running tasks in a in-memory data
|
|
// structure. If the plugin crashes, this data will be lost, so Nomad
|
|
// will respawn a new instance of the plugin and try to restore its
|
|
// in-memory representation of the running tasks using the RecoverTask()
|
|
// method below.
|
|
Pid int
|
|
NetDevices map[string]NetDeviceConfig
|
|
}
|
|
|
|
type NetDeviceConfig struct {
|
|
TapDevice string
|
|
Ip string
|
|
Mac string
|
|
Ports []NetDevicePortConfig
|
|
}
|
|
|
|
type NetDevicePortConfig struct {
|
|
From int // port on the host
|
|
To int // port inside the bridge
|
|
}
|
|
|
|
// HelloDriverPlugin is an example driver plugin. When provisioned in a job,
|
|
// the taks will output a greet specified by the user.
|
|
type HelloDriverPlugin struct {
|
|
// eventer is used to handle multiplexing of TaskEvents calls such that an
|
|
// event can be broadcast to all callers
|
|
eventer *eventer.Eventer
|
|
|
|
// config is the plugin configuration set by the SetConfig RPC
|
|
config *Config
|
|
|
|
// nomadConfig is the client config from Nomad
|
|
nomadConfig *base.ClientDriverConfig
|
|
|
|
// tasks is the in memory datastore mapping taskIDs to driver handles
|
|
tasks *taskStore
|
|
|
|
// ctx is the context for the driver. It is passed to other subsystems to
|
|
// coordinate shutdown
|
|
ctx context.Context
|
|
|
|
// signalShutdown is called when the driver is shutting down and cancels
|
|
// the ctx passed to any subsystems
|
|
signalShutdown context.CancelFunc
|
|
|
|
// logger will log to the Nomad agent
|
|
logger hclog.Logger
|
|
}
|
|
|
|
// A unikernel manifest. Obtained by deserializing JSON returned by solo5-elftool.
|
|
type unikernelManifest struct {
|
|
Type string `json:"type"`
|
|
Version int `json:"version"`
|
|
Devices []unikernelDevice `json:"devices"`
|
|
}
|
|
|
|
type unikernelDevice struct {
|
|
Name string `json:"name"`
|
|
Type string `json:"type"` // BLOCK_BASIC, NET_BASIC
|
|
}
|
|
|
|
// NewPlugin returns a new example driver plugin
|
|
func NewPlugin(logger hclog.Logger) drivers.DriverPlugin {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
logger = logger.Named(pluginName)
|
|
|
|
return &HelloDriverPlugin{
|
|
eventer: eventer.NewEventer(ctx, logger),
|
|
config: &Config{},
|
|
tasks: newTaskStore(),
|
|
ctx: ctx,
|
|
signalShutdown: cancel,
|
|
logger: logger,
|
|
}
|
|
}
|
|
|
|
// PluginInfo returns information describing the plugin.
|
|
func (d *HelloDriverPlugin) PluginInfo() (*base.PluginInfoResponse, error) {
|
|
return pluginInfo, nil
|
|
}
|
|
|
|
// ConfigSchema returns the plugin configuration schema.
|
|
func (d *HelloDriverPlugin) ConfigSchema() (*hclspec.Spec, error) {
|
|
return configSpec, nil
|
|
}
|
|
|
|
func gatewayIp() string {
|
|
return fmt.Sprintf("%d.%d.0.1", bridgeRangeHi, bridgeRangeLo)
|
|
}
|
|
|
|
func broadcastIp() string {
|
|
return fmt.Sprintf("%d.%d.0.255", bridgeRangeHi, bridgeRangeLo)
|
|
}
|
|
|
|
func iptablesChainOut() string {
|
|
return iptablesChainBase + "_OUT"
|
|
}
|
|
|
|
func iptablesChainIn() string {
|
|
return iptablesChainBase + "_IN"
|
|
}
|
|
|
|
// SetConfig is called by the client to pass the configuration for the plugin.
|
|
func (d *HelloDriverPlugin) SetConfig(cfg *base.Config) error {
|
|
var config Config
|
|
if len(cfg.PluginConfig) != 0 {
|
|
if err := base.MsgPackDecode(cfg.PluginConfig, &config); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
// Save the configuration to the plugin
|
|
d.config = &config
|
|
|
|
// TODO: parse and validated any configuration value if necessary.
|
|
//
|
|
// If your driver agent configuration requires any complex validation
|
|
// (some dependency between attributes) or special data parsing (the
|
|
// string "10s" into a time.Interval) you can do it here and update the
|
|
// value in d.config.
|
|
//
|
|
// In the example below we check if the shell specified by the user is
|
|
// supported by the plugin.
|
|
// shell := d.config.Shell
|
|
// if shell != "bash" && shell != "fish" {
|
|
// return fmt.Errorf("invalid shell %s", d.config.Shell)
|
|
// }
|
|
|
|
// Save the Nomad agent configuration
|
|
if cfg.AgentConfig != nil {
|
|
d.nomadConfig = cfg.AgentConfig.Driver
|
|
}
|
|
|
|
// TODO: initialize any extra requirements if necessary.
|
|
//
|
|
// Here you can use the config values to initialize any resources that are
|
|
// shared by all tasks that use this driver, such as a daemon process.
|
|
|
|
// create the bridge and iptables chains if they do not exist
|
|
// TODO: handle errors
|
|
|
|
// echo "1" > /proc/sys/net/ipv4/ip_forward
|
|
// ip link add name mybridge type bridge
|
|
// ip link set dev mybridge up
|
|
// ip address add 10.0.0.1/24 broadcast 10.0.0.255 dev mybridge
|
|
exec.Command("sh", "-c", `echo 1 > /proc/sys/net/ipv4/ip_forward`).Run()
|
|
exec.Command("ip", "link", "add", "name", d.config.Bridge, "type", "bridge").Run()
|
|
exec.Command("ip", "link", "set", "dev", d.config.Bridge, "up").Run()
|
|
// TODO: remove broadcast?
|
|
exec.Command("ip", "address", "add", gatewayIp()+"/16", "broadcast",
|
|
broadcastIp(), "dev", d.config.Bridge).Run()
|
|
|
|
// iptables -t nat -N MIRAGE_OUT
|
|
// iptables -t nat -A POSTROUTING -j MIRAGE_OUT
|
|
// + a rule for each unikernel...
|
|
exec.Command("iptables", "-t", "nat", "-N", iptablesChainOut()).Run()
|
|
// FIXME: this one gets added unconditionally even if it already exists
|
|
exec.Command("iptables", "-t", "nat", "-A", "POSTROUTING", "-j", iptablesChainOut()).Run()
|
|
|
|
// iptables -t nat -N MIRAGE_IN
|
|
// iptables -t nat -A PREROUTING -m addrtype --dst-type LOCAL -j MIRAGE_IN
|
|
// + a rule for each unikernel...
|
|
exec.Command("iptables", "-t", "nat", "-N", iptablesChainIn()).Run()
|
|
// FIXME: this one gets added unconditionally even if it already exists
|
|
exec.Command("iptables", "-t", "nat", "-A", "PREROUTING",
|
|
"-m", "addrtype", "--dst-type", "LOCAL",
|
|
"-j", iptablesChainIn()).Run()
|
|
|
|
// TODO:
|
|
// iptables -t nat -I OUTPUT -o lo -j MIRAGE_IN
|
|
// sysctl -w net.ipv4.conf.all.route_localnet=1
|
|
|
|
return nil
|
|
}
|
|
|
|
// TaskConfigSchema returns the HCL schema for the configuration of a task.
|
|
func (d *HelloDriverPlugin) TaskConfigSchema() (*hclspec.Spec, error) {
|
|
return taskConfigSpec, nil
|
|
}
|
|
|
|
// Capabilities returns the features supported by the driver.
|
|
func (d *HelloDriverPlugin) Capabilities() (*drivers.Capabilities, error) {
|
|
return capabilities, nil
|
|
}
|
|
|
|
// Fingerprint returns a channel that will be used to send health information
|
|
// and other driver specific node attributes.
|
|
func (d *HelloDriverPlugin) Fingerprint(ctx context.Context) (<-chan *drivers.Fingerprint, error) {
|
|
ch := make(chan *drivers.Fingerprint)
|
|
go d.handleFingerprint(ctx, ch)
|
|
return ch, nil
|
|
}
|
|
|
|
// handleFingerprint manages the channel and the flow of fingerprint data.
|
|
func (d *HelloDriverPlugin) handleFingerprint(ctx context.Context, ch chan<- *drivers.Fingerprint) {
|
|
defer close(ch)
|
|
|
|
// Nomad expects the initial fingerprint to be sent immediately
|
|
ticker := time.NewTimer(0)
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-d.ctx.Done():
|
|
return
|
|
case <-ticker.C:
|
|
// after the initial fingerprint we can set the proper fingerprint
|
|
// period
|
|
ticker.Reset(fingerprintPeriod)
|
|
ch <- d.buildFingerprint()
|
|
}
|
|
}
|
|
}
|
|
|
|
// buildFingerprint returns the driver's fingerprint data
|
|
func (d *HelloDriverPlugin) buildFingerprint() *drivers.Fingerprint {
|
|
fp := &drivers.Fingerprint{
|
|
Attributes: map[string]*structs.Attribute{},
|
|
Health: drivers.HealthStateHealthy,
|
|
HealthDescription: drivers.DriverHealthy,
|
|
}
|
|
|
|
// TODO: implement fingerprinting logic to populate health and driver
|
|
// attributes.
|
|
//
|
|
// Fingerprinting is used by the plugin to relay two important information
|
|
// to Nomad: health state and node attributes.
|
|
//
|
|
// If the plugin reports to be unhealthy, or doesn't send any fingerprint
|
|
// data in the expected interval of time, Nomad will restart it.
|
|
//
|
|
// Node attributes can be used to report any relevant information about
|
|
// the node in which the plugin is running (specific library availability,
|
|
// installed versions of a software etc.). These attributes can then be
|
|
// used by an operator to set job constrains.
|
|
|
|
// FIXME: is this the right place to check this?
|
|
// do we need to check for the binaries existence every N seconds?
|
|
// should we do these checks in SetConfig() instead?
|
|
|
|
// check that solo5-hvt exists
|
|
hvtCmd := exec.Command("which", "solo5-hvt")
|
|
hvtPath, hvtErr := hvtCmd.Output()
|
|
if hvtErr != nil {
|
|
return &drivers.Fingerprint{
|
|
Health: drivers.HealthStateUndetected,
|
|
HealthDescription: "solo5-hvt not found",
|
|
}
|
|
}
|
|
|
|
// check that solo5-elftool exists
|
|
elftoolCmd := exec.Command("which", "solo5-elftool")
|
|
elftoolPath, elftoolErr := elftoolCmd.Output()
|
|
if elftoolErr != nil {
|
|
return &drivers.Fingerprint{
|
|
Health: drivers.HealthStateUndetected,
|
|
HealthDescription: fmt.Sprintf("solo5-elftool not found"),
|
|
}
|
|
}
|
|
|
|
// We set their paths as attributes
|
|
fp.Attributes["driver.hello.solo5-hvt"] = structs.NewStringAttribute(string(hvtPath))
|
|
fp.Attributes["driver.hello.solo5-elftool"] = structs.NewStringAttribute(string(elftoolPath))
|
|
|
|
return fp
|
|
}
|
|
|
|
func getFreshTapDeviceName() (string, error) {
|
|
cmd := exec.Command("ip", "tuntap", "show")
|
|
if err := cmd.Run(); err != nil {
|
|
return "", fmt.Errorf("running '%s' failed: %v", cmd.String(), err)
|
|
}
|
|
tuntapOut, _ := cmd.Output()
|
|
|
|
var tuntapLines []string
|
|
if string(tuntapOut) == "" {
|
|
tuntapLines = []string{}
|
|
} else {
|
|
tuntapLines = strings.Split(string(tuntapOut), "\n")
|
|
}
|
|
|
|
maxId := 0
|
|
for _, line := range(tuntapLines) {
|
|
tapName, _, found := strings.Cut(line, ":")
|
|
if !found {
|
|
break
|
|
}
|
|
tapIdStr, found := strings.CutPrefix(tapName, tapDeviceBaseName)
|
|
if !found {
|
|
break
|
|
}
|
|
tapId, err := strconv.Atoi(tapIdStr)
|
|
if err != nil {
|
|
break
|
|
}
|
|
if tapId > maxId {
|
|
maxId = tapId
|
|
}
|
|
}
|
|
|
|
return fmt.Sprintf("%s%d", tapDeviceBaseName, maxId+1), nil
|
|
}
|
|
|
|
func destroyTapDevice(name string) {
|
|
exec.Command("ip", "tuntap", "del", name, "mode", "tap").Run()
|
|
}
|
|
|
|
func createTapDevice(name string, bridge string) error {
|
|
// ip tuntap add <tap> mode tap
|
|
cmd := exec.Command("ip", "tuntap", "add", name, "mode", "tap")
|
|
if err := cmd.Run(); err != nil {
|
|
return fmt.Errorf("'%s' failed: %v", cmd.String(), err)
|
|
}
|
|
|
|
// ip link set dev <tap> up
|
|
cmd = exec.Command("ip", "link", "set", "dev", name, "up")
|
|
if err := cmd.Run(); err != nil {
|
|
destroyTapDevice(name)
|
|
return fmt.Errorf("'%s' failed: %v", cmd.String(), err)
|
|
}
|
|
|
|
// ip link set dev <tap> master <bridge>
|
|
cmd = exec.Command("ip", "link", "set", "dev", name, "master", bridge)
|
|
if err := cmd.Run(); err != nil {
|
|
destroyTapDevice(name)
|
|
return fmt.Errorf("'%s' failed: %v", cmd.String(), err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func getMacAddr(unikernel string, bridge string) string {
|
|
// deterministic mac address computation
|
|
// follow Albatross and use the VEB Kombinat Robotron prefix
|
|
// (thanks again, friends!)
|
|
ours := md5.Sum([]byte(bridge + unikernel))
|
|
return fmt.Sprintf(
|
|
"%x:%x:%x:%x:%x:%x",
|
|
0x00, 0x80, 0x41,
|
|
ours[0], ours[1], ours[2],
|
|
)
|
|
}
|
|
|
|
func nextIp(ip [2]int) ([2]int, error) {
|
|
nip := [2]int{ip[0], ip[1]}
|
|
// FIX: 255.255 is for broadcast
|
|
// .255 is for broadcast
|
|
if nip[1] < 255 {
|
|
nip[1]++
|
|
return nip, nil
|
|
}
|
|
nip[1] = 0
|
|
nip[0]++
|
|
|
|
if nip[0] < 256 {
|
|
return nip, nil
|
|
}
|
|
return [2]int{}, fmt.Errorf("nextIp")
|
|
}
|
|
|
|
func getFreshIp() (string, error) {
|
|
// iptables -t nat -L MIRAGE_OUT
|
|
// then grep for: XX.XX.XX.XX to find existing ips
|
|
// (where the first two bytes correspond to the range of our bridge)
|
|
cmd := exec.Command("iptables", "-t", "nat", "-L", iptablesChainOut())
|
|
if err := cmd.Run(); err != nil {
|
|
return "", fmt.Errorf("'%s' failed: %v", cmd.String(), err)
|
|
}
|
|
chainStr, _ := cmd.Output()
|
|
chainLines := strings.Split(string(chainStr), "\n")
|
|
|
|
r, _ := regexp.Compile(
|
|
strconv.Itoa(bridgeRangeHi) + "." +
|
|
strconv.Itoa(bridgeRangeLo) + `.(\d+).(\d+)`)
|
|
|
|
usedIps := make(map[[2]int]bool)
|
|
for i := 2; i < len(chainLines); i++ {
|
|
line := chainLines[i]
|
|
matches := r.FindStringSubmatch(line)
|
|
b1, _ := strconv.Atoi(matches[1])
|
|
b2, _ := strconv.Atoi(matches[2])
|
|
usedIps[[2]int{b1, b2}] = true
|
|
}
|
|
|
|
// XX.XX.0.1 is the gateway; start at XX.XX.0.2
|
|
ip, err := [2]int{0, 2}, error(nil)
|
|
for err == nil {
|
|
if !usedIps[ip] {
|
|
break
|
|
}
|
|
ip, err = nextIp(ip)
|
|
}
|
|
|
|
if err != nil {
|
|
return "", fmt.Errorf("no available ip found")
|
|
}
|
|
return fmt.Sprintf("%d.%d.%d.%d", bridgeRangeHi, bridgeRangeLo, ip[0], ip[1]), nil
|
|
}
|
|
|
|
func getNetDevicesPorts(
|
|
netDevicesNames []string,
|
|
taskCfg TaskConfig,
|
|
cfg *drivers.TaskConfig,
|
|
) map[string][]NetDevicePortConfig {
|
|
ports := make(map[string][]NetDevicePortConfig)
|
|
|
|
if len(netDevicesNames) == 0 {
|
|
return ports
|
|
}
|
|
|
|
// TODO: extract port<->device assignation from taskCfg
|
|
// for now, assume that this config is empty, and implement the default
|
|
// strategy
|
|
|
|
for _, name := range netDevicesNames {
|
|
ports[name] = []NetDevicePortConfig{}
|
|
}
|
|
|
|
var defaultDevice string
|
|
if _, found := ports["service"]; found {
|
|
// use 'service' as the default device name when it exists
|
|
defaultDevice = "service"
|
|
} else {
|
|
// otherwise use the first one in the list
|
|
defaultDevice = netDevicesNames[0]
|
|
}
|
|
|
|
for _, portMapping := range *cfg.Resources.Ports {
|
|
// only the default case for now
|
|
ports[defaultDevice] = append(ports[defaultDevice],
|
|
NetDevicePortConfig{
|
|
From: portMapping.Value,
|
|
To: portMapping.To,
|
|
})
|
|
}
|
|
|
|
return ports
|
|
}
|
|
|
|
// fields are "" if not specified
|
|
type NetDeviceArgs struct {
|
|
Ip string
|
|
Gateway string
|
|
}
|
|
|
|
func getNetDevicesArgs(
|
|
netDevicesNames []string,
|
|
taskCfg TaskConfig,
|
|
) map[string]NetDeviceArgs {
|
|
args := make(map[string]NetDeviceArgs)
|
|
// TODO: extract the flag<->device assignation from taskCfg
|
|
// for now, always assign --ipv4 to service and fail if there is another device
|
|
for _, name := range netDevicesNames {
|
|
if name == "service" {
|
|
args["service"] = NetDeviceArgs{
|
|
Ip: "--ipv4",
|
|
Gateway: "--ipv4-gateway",
|
|
}
|
|
}
|
|
}
|
|
return args
|
|
}
|
|
|
|
func rollbackIptablesRules(rules [][]string) {
|
|
for j := len(rules) - 1; j >= 0; j-- {
|
|
args := append([]string{"-t", "nat", "-D"}, rules[j]...)
|
|
exec.Command("iptables", args...).Run()
|
|
}
|
|
}
|
|
|
|
// run a list of iptables commands of the form "iptables -t nat -A ..." as a
|
|
// single transaction: if one rule fails, we roll back earlier rules before
|
|
// returning an error
|
|
func execIptablesRules(rules [][]string) error {
|
|
for i, rule := range rules {
|
|
args := append([]string{"-t", "nat", "-A"}, rule...)
|
|
cmd := exec.Command("iptables", args...)
|
|
if err := cmd.Run(); err != nil {
|
|
rollbackIptablesRules(rules[0:i])
|
|
return fmt.Errorf("'%s' failed: %v", cmd.String(), err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func netDeviceIptablesRules(ip string, ports []NetDevicePortConfig) [][]string {
|
|
// iptables rules to talk to the outside and setup incoming port redirections
|
|
rr := [][]string{}
|
|
addrule := func(args ...string) { rr = append(rr, args) }
|
|
|
|
// allow the ip to talk to the outside
|
|
// iptables -t nat -A MIRAGE_OUT -s <ip>/32 -j MASQUERADE
|
|
addrule(iptablesChainOut(), "-s", ip+"/32", "-j", "MASQUERADE")
|
|
|
|
// NAT port redirections
|
|
// iptables -t nat -A MIRAGE_IN ! -s <ip>/32 -p tcp -m tcp --dport 53
|
|
// -j DNAT --to-destination <ip>:53
|
|
// iptables -t nat -A MIRAGE_IN ! -s <ip>/32 -p udp -m udp --dport 53
|
|
// -j DNAT --to-destination <ip>:53
|
|
for _, port := range ports {
|
|
addrule(iptablesChainIn(), "!", "-s", ip+"/32", "-p", "tcp",
|
|
"--dport", strconv.Itoa(port.From), /* TODO:check */
|
|
"-j", "DNAT", "--to-destination", ip+":"+strconv.Itoa(port.To))
|
|
addrule(iptablesChainIn(), "!", "-s", ip+"/32", "-p", "udp",
|
|
"--dport", strconv.Itoa(port.From), /* TODO:check from/to */
|
|
"-j", "DNAT", "--to-destination", ip+":"+strconv.Itoa(port.To))
|
|
}
|
|
return rr
|
|
}
|
|
|
|
func setupNetDevice(
|
|
deviceName string,
|
|
unikernel string,
|
|
bridge string,
|
|
ports []NetDevicePortConfig,
|
|
logger hclog.Logger,
|
|
) (NetDeviceConfig, error) {
|
|
// list existing tap interfaces, and get a fresh tap interface number
|
|
tapDevice, err := getFreshTapDeviceName()
|
|
if err != nil {
|
|
return NetDeviceConfig{}, fmt.Errorf("could not get a fresh tap device name: %v", err)
|
|
}
|
|
logger.Debug("got tap device name", tapDevice)
|
|
|
|
// create new tap device
|
|
if err := createTapDevice(tapDevice, bridge); err != nil {
|
|
return NetDeviceConfig{}, fmt.Errorf("could not create tap device: %v", err)
|
|
}
|
|
logger.Debug("successfully created tap device")
|
|
|
|
// get a mac address for the unikernel
|
|
macAddr := getMacAddr(unikernel, bridge)
|
|
logger.Debug("mac address", macAddr)
|
|
|
|
// find an unused ip address
|
|
ipAddr, err := getFreshIp()
|
|
if err != nil {
|
|
destroyTapDevice(tapDevice)
|
|
return NetDeviceConfig{}, fmt.Errorf("could not get a fresh ip address: %v", err)
|
|
}
|
|
logger.Debug("got ip address", ipAddr)
|
|
|
|
// execute iptables rules
|
|
if err := execIptablesRules(netDeviceIptablesRules(ipAddr, ports)); err != nil {
|
|
destroyTapDevice(tapDevice)
|
|
return NetDeviceConfig{}, err
|
|
}
|
|
|
|
return NetDeviceConfig{
|
|
TapDevice: tapDevice,
|
|
Ip: ipAddr,
|
|
Mac: macAddr,
|
|
Ports: ports,
|
|
}, nil
|
|
}
|
|
|
|
func destroyNetDevice(device NetDeviceConfig) {
|
|
rollbackIptablesRules(netDeviceIptablesRules(device.Ip, device.Ports))
|
|
destroyTapDevice(device.TapDevice)
|
|
}
|
|
|
|
func destroyNetDevices(devices map[string]NetDeviceConfig) {
|
|
for _, d := range devices {
|
|
destroyNetDevice(d)
|
|
}
|
|
}
|
|
|
|
func setupNetDevices(
|
|
netDevicesPorts map[string][]NetDevicePortConfig,
|
|
unikernel string,
|
|
bridge string,
|
|
logger hclog.Logger,
|
|
) (map[string]NetDeviceConfig, error) {
|
|
netDevices := make(map[string]NetDeviceConfig)
|
|
for deviceName, devicePorts := range netDevicesPorts {
|
|
deviceCfg, err :=
|
|
setupNetDevice(deviceName, unikernel, bridge, devicePorts, logger)
|
|
if err != nil {
|
|
destroyNetDevices(netDevices)
|
|
return nil,
|
|
fmt.Errorf("failed to setup net device %s: %v",
|
|
deviceName, err)
|
|
}
|
|
netDevices[deviceName] = deviceCfg
|
|
}
|
|
return netDevices, nil
|
|
}
|
|
|
|
// TODO: also return the list of block device names, when we handle block
|
|
// devices
|
|
func readUnikernelManifest(
|
|
unikernel string,
|
|
logger hclog.Logger,
|
|
) ([]string /* net devices */, error) {
|
|
manifestCmd := exec.Command("solo5-elftool", "query-manifest", unikernel)
|
|
manifestStr, manifestErr := manifestCmd.Output()
|
|
if manifestErr != nil {
|
|
return nil, fmt.Errorf("failed to query unikernel manifest: %v", manifestErr)
|
|
}
|
|
|
|
logger.Debug("unikernel manifest", manifestStr)
|
|
|
|
var manifest unikernelManifest
|
|
if err := json.Unmarshal(manifestStr, &manifest); err != nil {
|
|
return nil, fmt.Errorf("could not parse manifest: %v", err)
|
|
}
|
|
|
|
netDevicesNames := []string{}
|
|
for _, d := range manifest.Devices {
|
|
switch d.Type {
|
|
case "NET_BASIC":
|
|
netDevicesNames = append(netDevicesNames, d.Name)
|
|
// case "BLOCK_BASIC":
|
|
// TODO
|
|
default:
|
|
return nil, fmt.Errorf("unhandled device type: %s", d.Type)
|
|
}
|
|
}
|
|
return netDevicesNames, nil
|
|
}
|
|
|
|
// StartTask returns a task handle and a driver network if necessary.
|
|
func (d *HelloDriverPlugin) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drivers.DriverNetwork, error) {
|
|
if _, ok := d.tasks.Get(cfg.ID); ok {
|
|
return nil, nil, fmt.Errorf("task with ID %q already started", cfg.ID)
|
|
}
|
|
|
|
var driverConfig TaskConfig
|
|
if err := cfg.DecodeDriverConfig(&driverConfig); err != nil {
|
|
return nil, nil, fmt.Errorf("failed to decode driver config: %v", err)
|
|
}
|
|
|
|
d.logger.Info("starting task", "driver_cfg", hclog.Fmt("%+v", driverConfig))
|
|
handle := drivers.NewTaskHandle(taskHandleVersion)
|
|
handle.Config = cfg
|
|
|
|
// TODO: implement driver specific mechanism to start the task.
|
|
//
|
|
// Once the task is started you will need to store any relevant runtime
|
|
// information in a taskHandle and TaskState. The taskHandle will be
|
|
// stored in-memory in the plugin and will be used to interact with the
|
|
// task.
|
|
//
|
|
// The TaskState will be returned to the Nomad client inside a
|
|
// drivers.TaskHandle instance. This TaskHandle will be sent back to plugin
|
|
// if the task ever needs to be recovered, so the TaskState should contain
|
|
// enough information to handle that.
|
|
//
|
|
// In the example below we use an executor to fork a process to run our
|
|
// greeter. The executor is then stored in the handle so we can access it
|
|
// later and the the plugin.Client is used to generate a reattach
|
|
// configuration that can be used to recover communication with the task.
|
|
executorConfig := &executor.ExecutorConfig{
|
|
LogFile: filepath.Join(cfg.TaskDir().Dir, "executor.out"),
|
|
LogLevel: "debug",
|
|
}
|
|
|
|
logger := d.logger.With("task_name", handle.Config.Name, "alloc_id", handle.Config.AllocID)
|
|
|
|
netDevicesNames, err := readUnikernelManifest(driverConfig.Unikernel, logger)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
netDevicesPorts := getNetDevicesPorts(netDevicesNames, driverConfig, cfg)
|
|
netDevicesArgs := getNetDevicesArgs(netDevicesNames, driverConfig)
|
|
netDevices, err :=
|
|
setupNetDevices(netDevicesPorts, driverConfig.Unikernel,
|
|
d.config.Bridge, logger)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
|
|
solo5Args := []string{}
|
|
unikernelArgs := []string{}
|
|
// --net:<service from manifest>=<tapDevice>
|
|
// --net-mac:<service from manifest>=<macAddr>
|
|
for name, device := range netDevices {
|
|
solo5Args = append(solo5Args,
|
|
fmt.Sprintf("--net:%s=%s", name, device.TapDevice))
|
|
solo5Args = append(solo5Args,
|
|
fmt.Sprintf("--net-mac:%s=%s", name, device.Mac))
|
|
}
|
|
// --ipv4=<ipAddr>/16 --ipv4-gateway=<gatewayIp()> (by default)
|
|
// + user-specified equivalent arguments for additional net devices
|
|
for name, arg := range netDevicesArgs {
|
|
if arg.Ip != "" {
|
|
unikernelArgs = append(unikernelArgs,
|
|
fmt.Sprintf("%s=%s/16", arg.Ip, netDevices[name].Ip))
|
|
}
|
|
if arg.Gateway != "" {
|
|
unikernelArgs = append(unikernelArgs,
|
|
fmt.Sprintf("%s=%s", arg.Gateway, gatewayIp()))
|
|
}
|
|
}
|
|
|
|
// TODO: collect additional arguments from the config file
|
|
|
|
execu, pluginClient, err := executor.CreateExecutor(logger, d.nomadConfig, executorConfig)
|
|
if err != nil {
|
|
return nil, nil, fmt.Errorf("failed to create executor: %v", err)
|
|
}
|
|
|
|
cfgJson, _ := json.MarshalIndent(cfg, "", " ")
|
|
// cfgStr := base64.StdEncoding.EncodeToString(cfgJson)
|
|
logger.Debug("task config", string(cfgJson))
|
|
|
|
netCfgStr, _ := exec.Command("ip", "a").Output()
|
|
logger.Debug("host network config", string(netCfgStr))
|
|
|
|
args := append(solo5Args, []string{"--", driverConfig.Unikernel}...)
|
|
args = append(args, unikernelArgs...)
|
|
|
|
logger.Debug("solo5 command", exec.Command("solo5-hvt", args...).String())
|
|
|
|
execCmd := &executor.ExecCommand{
|
|
Cmd: "solo5-hvt",
|
|
Args: args,
|
|
StdoutPath: cfg.StdoutPath,
|
|
StderrPath: cfg.StderrPath,
|
|
}
|
|
|
|
ps, err := execu.Launch(execCmd)
|
|
if err != nil {
|
|
pluginClient.Kill()
|
|
return nil, nil, fmt.Errorf("failed to launch command with executor: %v", err)
|
|
}
|
|
|
|
h := &taskHandle{
|
|
exec: execu,
|
|
pid: ps.Pid,
|
|
netDevices: netDevices,
|
|
pluginClient: pluginClient,
|
|
taskConfig: cfg,
|
|
procState: drivers.TaskStateRunning,
|
|
startedAt: time.Now().Round(time.Millisecond),
|
|
logger: d.logger,
|
|
}
|
|
|
|
driverState := TaskState{
|
|
ReattachConfig: structs.ReattachConfigFromGoPlugin(pluginClient.ReattachConfig()),
|
|
Pid: ps.Pid,
|
|
NetDevices: netDevices,
|
|
TaskConfig: cfg,
|
|
StartedAt: h.startedAt,
|
|
}
|
|
|
|
if err := handle.SetDriverState(&driverState); err != nil {
|
|
d.logger.Error("failed to start task, error setting driver state", "error", err)
|
|
return nil, nil, fmt.Errorf("failed to set driver state: %v", err)
|
|
}
|
|
|
|
d.tasks.Set(cfg.ID, h)
|
|
go h.run()
|
|
return handle, nil, nil
|
|
}
|
|
|
|
// RecoverTask recreates the in-memory state of a task from a TaskHandle.
|
|
func (d *HelloDriverPlugin) RecoverTask(handle *drivers.TaskHandle) error {
|
|
if handle == nil {
|
|
return errors.New("error: handle cannot be nil")
|
|
}
|
|
|
|
if _, ok := d.tasks.Get(handle.Config.ID); ok {
|
|
return nil
|
|
}
|
|
|
|
var taskState TaskState
|
|
if err := handle.GetDriverState(&taskState); err != nil {
|
|
return fmt.Errorf("failed to decode task state from handle: %v", err)
|
|
}
|
|
|
|
var driverConfig TaskConfig
|
|
if err := taskState.TaskConfig.DecodeDriverConfig(&driverConfig); err != nil {
|
|
return fmt.Errorf("failed to decode driver config: %v", err)
|
|
}
|
|
|
|
// TODO: implement driver specific logic to recover a task.
|
|
//
|
|
// Recovering a task involves recreating and storing a taskHandle as if the
|
|
// task was just started.
|
|
//
|
|
// In the example below we use the executor to re-attach to the process
|
|
// that was created when the task first started.
|
|
plugRC, err := structs.ReattachConfigToGoPlugin(taskState.ReattachConfig)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to build ReattachConfig from taskConfig state: %v", err)
|
|
}
|
|
|
|
execImpl, pluginClient, err := executor.ReattachToExecutor(plugRC, d.logger)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to reattach to executor: %v", err)
|
|
}
|
|
|
|
h := &taskHandle{
|
|
exec: execImpl,
|
|
pid: taskState.Pid,
|
|
netDevices: taskState.NetDevices,
|
|
pluginClient: pluginClient,
|
|
taskConfig: taskState.TaskConfig,
|
|
procState: drivers.TaskStateRunning,
|
|
startedAt: taskState.StartedAt,
|
|
exitResult: &drivers.ExitResult{},
|
|
}
|
|
|
|
d.tasks.Set(taskState.TaskConfig.ID, h)
|
|
|
|
go h.run()
|
|
return nil
|
|
}
|
|
|
|
// WaitTask returns a channel used to notify Nomad when a task exits.
|
|
func (d *HelloDriverPlugin) WaitTask(ctx context.Context, taskID string) (<-chan *drivers.ExitResult, error) {
|
|
handle, ok := d.tasks.Get(taskID)
|
|
if !ok {
|
|
return nil, drivers.ErrTaskNotFound
|
|
}
|
|
|
|
ch := make(chan *drivers.ExitResult)
|
|
go d.handleWait(ctx, handle, ch)
|
|
return ch, nil
|
|
}
|
|
|
|
func (d *HelloDriverPlugin) handleWait(ctx context.Context, handle *taskHandle, ch chan *drivers.ExitResult) {
|
|
defer close(ch)
|
|
var result *drivers.ExitResult
|
|
|
|
// TODO: implement driver specific logic to notify Nomad the task has been
|
|
// completed and what was the exit result.
|
|
//
|
|
// When a result is sent in the result channel Nomad will stop the task and
|
|
// emit an event that an operator can use to get an insight on why the task
|
|
// stopped.
|
|
//
|
|
// In the example below we block and wait until the executor finishes
|
|
// running, at which point we send the exit code and signal in the result
|
|
// channel.
|
|
ps, err := handle.exec.Wait(ctx)
|
|
if err != nil {
|
|
result = &drivers.ExitResult{
|
|
Err: fmt.Errorf("executor: error waiting on process: %v", err),
|
|
}
|
|
} else {
|
|
result = &drivers.ExitResult{
|
|
ExitCode: ps.ExitCode,
|
|
Signal: ps.Signal,
|
|
}
|
|
}
|
|
|
|
for {
|
|
select {
|
|
case <-ctx.Done():
|
|
return
|
|
case <-d.ctx.Done():
|
|
return
|
|
case ch <- result:
|
|
}
|
|
}
|
|
}
|
|
|
|
// StopTask stops a running task with the given signal and within the timeout window.
|
|
func (d *HelloDriverPlugin) StopTask(taskID string, timeout time.Duration, signal string) error {
|
|
handle, ok := d.tasks.Get(taskID)
|
|
if !ok {
|
|
return drivers.ErrTaskNotFound
|
|
}
|
|
|
|
// TODO: implement driver specific logic to stop a task.
|
|
//
|
|
// The StopTask function is expected to stop a running task by sending the
|
|
// given signal to it. If the task does not stop during the given timeout,
|
|
// the driver must forcefully kill the task.
|
|
//
|
|
// In the example below we let the executor handle the task shutdown
|
|
// process for us, but you might need to customize this for your own
|
|
// implementation.
|
|
if err := handle.exec.Shutdown(signal, timeout); err != nil {
|
|
if handle.pluginClient.Exited() {
|
|
return nil
|
|
}
|
|
return fmt.Errorf("executor Shutdown failed: %v", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// DestroyTask cleans up and removes a task that has terminated.
|
|
func (d *HelloDriverPlugin) DestroyTask(taskID string, force bool) error {
|
|
handle, ok := d.tasks.Get(taskID)
|
|
if !ok {
|
|
return drivers.ErrTaskNotFound
|
|
}
|
|
|
|
if handle.IsRunning() && !force {
|
|
return errors.New("cannot destroy running task")
|
|
}
|
|
|
|
// TODO: implement driver specific logic to destroy a complete task.
|
|
//
|
|
// Destroying a task includes removing any resources used by task and any
|
|
// local references in the plugin. If force is set to true the task should
|
|
// be destroyed even if it's currently running.
|
|
//
|
|
// In the example below we use the executor to force shutdown the task
|
|
// (timeout equals 0).
|
|
if !handle.pluginClient.Exited() {
|
|
if err := handle.exec.Shutdown("", 0); err != nil {
|
|
handle.logger.Error("destroying executor failed", "err", err)
|
|
}
|
|
|
|
handle.pluginClient.Kill()
|
|
}
|
|
|
|
// remove the relevant iptables rules and the tap device
|
|
destroyNetDevices(handle.netDevices)
|
|
|
|
// TODO: destroy the bridge and the iptables chains if no one is using them
|
|
// anymore
|
|
|
|
d.tasks.Delete(taskID)
|
|
return nil
|
|
}
|
|
|
|
// InspectTask returns detailed status information for the referenced taskID.
|
|
func (d *HelloDriverPlugin) InspectTask(taskID string) (*drivers.TaskStatus, error) {
|
|
handle, ok := d.tasks.Get(taskID)
|
|
if !ok {
|
|
return nil, drivers.ErrTaskNotFound
|
|
}
|
|
|
|
return handle.TaskStatus(), nil
|
|
}
|
|
|
|
// TaskStats returns a channel which the driver should send stats to at the given interval.
|
|
func (d *HelloDriverPlugin) TaskStats(ctx context.Context, taskID string, interval time.Duration) (<-chan *drivers.TaskResourceUsage, error) {
|
|
handle, ok := d.tasks.Get(taskID)
|
|
if !ok {
|
|
return nil, drivers.ErrTaskNotFound
|
|
}
|
|
|
|
// TODO: implement driver specific logic to send task stats.
|
|
//
|
|
// This function returns a channel that Nomad will use to listen for task
|
|
// stats (e.g., CPU and memory usage) in a given interval. It should send
|
|
// stats until the context is canceled or the task stops running.
|
|
//
|
|
// In the example below we use the Stats function provided by the executor,
|
|
// but you can build a set of functions similar to the fingerprint process.
|
|
return handle.exec.Stats(ctx, interval)
|
|
}
|
|
|
|
// TaskEvents returns a channel that the plugin can use to emit task related events.
|
|
func (d *HelloDriverPlugin) TaskEvents(ctx context.Context) (<-chan *drivers.TaskEvent, error) {
|
|
return d.eventer.TaskEvents(ctx)
|
|
}
|
|
|
|
// SignalTask forwards a signal to a task.
|
|
// This is an optional capability.
|
|
func (d *HelloDriverPlugin) SignalTask(taskID string, signal string) error {
|
|
handle, ok := d.tasks.Get(taskID)
|
|
if !ok {
|
|
return drivers.ErrTaskNotFound
|
|
}
|
|
|
|
// TODO: implement driver specific signal handling logic.
|
|
//
|
|
// The given signal must be forwarded to the target taskID. If this plugin
|
|
// doesn't support receiving signals (capability SendSignals is set to
|
|
// false) you can just return nil.
|
|
sig := os.Interrupt
|
|
if s, ok := signals.SignalLookup[signal]; ok {
|
|
sig = s
|
|
} else {
|
|
d.logger.Warn("unknown signal to send to task, using SIGINT instead", "signal", signal, "task_id", handle.taskConfig.ID)
|
|
|
|
}
|
|
return handle.exec.Signal(sig)
|
|
}
|
|
|
|
// ExecTask returns the result of executing the given command inside a task.
|
|
// This is an optional capability.
|
|
func (d *HelloDriverPlugin) ExecTask(taskID string, cmd []string, timeout time.Duration) (*drivers.ExecTaskResult, error) {
|
|
// TODO: implement driver specific logic to execute commands in a task.
|
|
return nil, errors.New("This driver does not support exec")
|
|
}
|