1. 准备工作
1.1 准备环境
root@nicktming:~/go/src/github.com/nicktming/mydocker# git clone https://github.com/nicktming/mydocker.git
root@nicktming:~/go/src/github.com/nicktming/mydocker# git checkout code-5.3
root@nicktming:~/go/src/github.com/nicktming/mydocker# git checkout -b dev-5.4
1.2 准备busybox.tar
// 前提条件
root@nicktming:/nicktming# pwd
/nicktming
root@nicktming:/nicktming# ls
busybox.tar
1.3 预期效果
root@nicktming:~/go/src/github.com/nicktming/mydocker# git clone https://github.com/nicktming/mydocker.git
root@nicktming:~/go/src/github.com/nicktming/mydocker# git checkout code-5.4
root@nicktming:~/go/src/github.com/nicktming/mydocker# go build .
root@nicktming:~/go/src/github.com/nicktming/mydocker# ./mydocker run -d /bin/top
root@nicktming:~/go/src/github.com/nicktming/mydocker# ./mydocker ps
missing mydocker_pid env skip nsenter
ID NAME PID STATUS COMMAND CREATED
15552033304408860601 15552033304408860601 21338 running /bin/top 2019-04-14 08:55:30
root@nicktming:~/go/src/github.com/nicktming/mydocker# ps -ef | grep 21338
root 21338 1 0 08:55 pts/3 00:00:00 /bin/top
root 31541 29996 0 10:43 pts/4 00:00:00 grep --color=auto 21338
root@nicktming:~/go/src/github.com/nicktming/mydocker# ./mydocker exec 15552033304408860601 /bin/sh
missing mydocker_pid env skip nsenter
2019/04/14 10:41:44 containerName:15552033304408860601,command:/bin/sh
got mydocker_pid=21338
got mydocker_cmd=/bin/sh
setns on ipc namespace succeeded
setns on uts namespace succeeded
setns on net namespace succeeded
setns on pid namespace succeeded
setns on mnt namespace succeeded
/ # ps -l
PID USER TIME COMMAND
1 root 0:00 /bin/top
7 root 0:00 /bin/sh
8 root 0:00 ps -l
/ # ps -ef
PID USER TIME COMMAND
1 root 0:00 /bin/top
7 root 0:00 /bin/sh
9 root 0:00 ps -ef
/ # exit
root@nicktming:~/go/src/github.com/nicktming/mydocker#
2. cgo
可以直接在
Go
源代码里写C
代码. 可以直接把C
代码放到go
代码里面并打上注释,紧接着加个import "C"
就可以了.
例子1
在
test/cgo/test-2.go
测试代码如下:
package main
/*
int add(int a, int b) {
return a + b;
}
*/
import "C"
import "fmt"
func main() {
a := C.int(1)
b := C.int(2)
value := C.add(a, b)
fmt.Printf("value:%d\n", int(value))
}
测试代码:
root@nicktming:~/go/src/github.com/nicktming/mydocker/test/cgo# go run test-1.go
value:3
例子2
__attribute__ constructor/destructor
若函数被设定为constructor
属性,则该函数会在main
函数执行之前被自动的执行.拥有此类属性的函数经常隐式的用在程序的初始化数据方面.
package main
/*
#include<stdio.h>
__attribute__((constructor)) void before_main() {
printf("before main\n");
}
*/
import "C"
import "log"
func main() {
log.Printf("hello world!")
}
执行如下:
root@nicktming:~/go/src/github.com/nicktming/mydocker/test/cgo# go run test-8.go
before main
2019/04/14 09:33:43 hello world!
3. 命令实现
setns
是一个系统调用,可以根据提供的PID
再次进入到指定的Namespace
中。它需要先打开/proc/[pid]/ns/
文件夹下对应的文件,然后使当前进程进入到指定的Namespace
中。系统调用描述非常简单,但是有一点对于Go
来说很麻烦。对于Mount Namespace
来说, 一个具有多线程的进程是无法使用setns
调用进入到对应的命名空间的。但是,Go
每启动一个程序就会进入多线程状态,因此无法简简单单地在Go
里面直接调用系统调用,使当前的进程进入对应的Mount Namespace
. 这里需要借助C
来实现这个功能. (来自自己动手写Docker)
3.1 创建一个namespace
隔离的进程
test/cgo/test-3.go
如下:
func main() {
cmd := exec.Command("/bin/sh")
cmd.SysProcAttr = &syscall.SysProcAttr{
Cloneflags: syscall.CLONE_NEWUTS | syscall.CLONE_NEWPID | syscall.CLONE_NEWNS | syscall.CLONE_NEWNET | syscall.CLONE_NEWIPC,
}
cmd.Stdout = os.Stdout
cmd.Stdin = os.Stdin
cmd.Stderr = os.Stderr
if err := cmd.Run(); err != nil {
log.Fatal(err)
}
}
运行如下:
---------------------------------------------------terminal 01---------------------------------------------------
root@nicktming:~/go/src/github.com/nicktming/mydocker/test/cgo# pwd
/root/go/src/github.com/nicktming/mydocker/test/cgo
root@nicktming:~/go/src/github.com/nicktming/mydocker/test/cgo# ls
test-1.go test-2.go test-3.go
root@nicktming:~/go/src/github.com/nicktming/mydocker/test/cgo# echo $$
18438
root@nicktming:~/go/src/github.com/nicktming/mydocker/test/cgo# ls -l /proc/18438/ns
total 0
lrwxrwxrwx 1 root root 0 Apr 13 15:43 ipc -> ipc:[4026531839]
lrwxrwxrwx 1 root root 0 Apr 13 15:43 mnt -> mnt:[4026531840]
lrwxrwxrwx 1 root root 0 Apr 13 15:43 net -> net:[4026531956]
lrwxrwxrwx 1 root root 0 Apr 13 15:43 pid -> pid:[4026531836]
lrwxrwxrwx 1 root root 0 Apr 13 15:43 user -> user:[4026531837]
lrwxrwxrwx 1 root root 0 Apr 13 15:43 uts -> uts:[4026531838]
root@nicktming:~/go/src/github.com/nicktming/mydocker/test/cgo# go run test-3.go
# echo $$
1
# ps -l
F S UID PID PPID C PRI NI ADDR SZ WCHAN TTY TIME CMD
4 S 0 18438 18403 0 80 0 - 5361 wait pts/4 00:00:00 bash
4 S 0 21994 18438 0 80 0 - 62084 wait pts/4 00:00:00 go
4 S 0 22013 21994 0 80 0 - 808 wait pts/4 00:00:00 test-3
0 S 0 22017 22013 0 80 0 - 1111 wait pts/4 00:00:00 sh
0 R 0 22033 22017 0 80 0 - 2185 - pts/4 00:00:00 ps
# ls -l /proc/22017/ns
total 0
lrwxrwxrwx 1 root root 0 Apr 13 15:44 ipc -> ipc:[4026532167]
lrwxrwxrwx 1 root root 0 Apr 13 15:44 mnt -> mnt:[4026532165]
lrwxrwxrwx 1 root root 0 Apr 13 15:44 net -> net:[4026532170]
lrwxrwxrwx 1 root root 0 Apr 13 15:44 pid -> pid:[4026532168]
lrwxrwxrwx 1 root root 0 Apr 13 15:44 user -> user:[4026531837]
lrwxrwxrwx 1 root root 0 Apr 13 15:44 uts -> uts:[4026532166]
可以看到只有
user namespace
不在同一个, 其余都在同一个namespace
中, 接下来测试一下是否可以进入到同一个namespace
.
3.2 进入到隔离容器
---------------------------------------------------terminal 02---------------------------------------------------
root@nicktming:~/go/src/github.com/nicktming/mydocker/test/cgo# export mydocker_pid=22017
root@nicktming:~/go/src/github.com/nicktming/mydocker/test/cgo# go run test-2.go
setns on ipc namespace succeeded
setns on uts namespace succeeded
setns on net namespace succeeded
setns on pid namespace succeeded
setns on mnt namespace failed: Invalid argument
# ls -l /proc/self/ns
total 0
lrwxrwxrwx 1 root root 0 Apr 13 15:48 ipc -> ipc:[4026532167]
lrwxrwxrwx 1 root root 0 Apr 13 15:48 mnt -> mnt:[4026531840]
lrwxrwxrwx 1 root root 0 Apr 13 15:48 net -> net:[4026532170]
lrwxrwxrwx 1 root root 0 Apr 13 15:48 pid -> pid:[4026532168]
lrwxrwxrwx 1 root root 0 Apr 13 15:48 user -> user:[4026531837]
lrwxrwxrwx 1 root root 0 Apr 13 15:48 uts -> uts:[4026532166]
#
可以看到除了
mnt
外后其余都进入到22017
进程的namespace
中了.mnt
原因还没有找到.
3.3 利用c代码直接测试
--------------------------------------terminal 01-----------------------------------
root@nicktming:~/go/src/github.com/nicktming/mydocker/test/cgo# echo $$
17464
root@nicktming:~/go/src/github.com/nicktming/mydocker/test/cgo# ls -l /proc/17464/ns
total 0
lrwxrwxrwx 1 root root 0 Apr 13 20:42 ipc -> ipc:[4026531839]
lrwxrwxrwx 1 root root 0 Apr 13 20:42 mnt -> mnt:[4026531840]
lrwxrwxrwx 1 root root 0 Apr 13 20:42 net -> net:[4026531956]
lrwxrwxrwx 1 root root 0 Apr 13 20:42 pid -> pid:[4026531836]
lrwxrwxrwx 1 root root 0 Apr 13 20:42 user -> user:[4026531837]
lrwxrwxrwx 1 root root 0 Apr 13 20:42 uts -> uts:[4026531838]
root@nicktming:~/go/src/github.com/nicktming/mydocker/test/cgo# go run test-3.go
# echo $$
1
# ps -l
F S UID PID PPID C PRI NI ADDR SZ WCHAN TTY TIME CMD
4 S 0 17464 17428 0 80 0 - 5368 wait pts/1 00:00:00 bash
4 S 0 18982 17464 0 80 0 - 29316 futex_ pts/1 00:00:00 go
4 S 0 19001 18982 0 80 0 - 808 wait pts/1 00:00:00 test-3
0 S 0 19005 19001 0 80 0 - 1111 wait pts/1 00:00:00 sh
0 R 0 19014 19005 0 80 0 - 2185 - pts/1 00:00:00 ps
# ls -l /proc/self/ns
total 0
lrwxrwxrwx 1 root root 0 Apr 13 20:43 ipc -> ipc:[4026532283]
lrwxrwxrwx 1 root root 0 Apr 13 20:43 mnt -> mnt:[4026532281]
lrwxrwxrwx 1 root root 0 Apr 13 20:43 net -> net:[4026532286]
lrwxrwxrwx 1 root root 0 Apr 13 20:43 pid -> pid:[4026532284]
lrwxrwxrwx 1 root root 0 Apr 13 20:43 user -> user:[4026531837]
lrwxrwxrwx 1 root root 0 Apr 13 20:43 uts -> uts:[4026532282]
可以看到除了
user
其余的namespace
都不同.
#include <errno.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
void main() {
char *mydocker_pid;
mydocker_pid = getenv("mydocker_pid");
fprintf(stdout, "mydocker_pid: %s\n", mydocker_pid);
int i;
char nspath[1024];
char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt" };
for (i=0; i<5; i++) {
sprintf(nspath, "/proc/%s/ns/%s", mydocker_pid, namespaces[i]);
int fd = open(nspath, O_RDONLY);
if (setns(fd, 0) == -1) {
fprintf(stderr, "setns on %s namespace failed: %s\n", namespaces[i], strerror(errno));
} else {
fprintf(stdout, "setns on %s namespace succeeded\n", namespaces[i]);
}
close(fd);
}
int res = system("/bin/sh");
exit(0);
return;
}
编译测试:
root@nicktming:~/go/src/github.com/nicktming/mydocker/test/cgo# export mydocker_pid=19005
root@nicktming:~/go/src/github.com/nicktming/mydocker/test/cgo# gcc -o test-7 test-7.c
root@nicktming:~/go/src/github.com/nicktming/mydocker/test/cgo# ./test-7
mydocker_pid: 19005
setns on ipc namespace succeeded
setns on uts namespace succeeded
setns on net namespace succeeded
setns on pid namespace succeeded
setns on mnt namespace succeeded
3.4 用cgo实现
增加
test/ccode/test.go
文件
package ccode
/*
#include <errno.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
__attribute__((constructor)) void enter_namespace(void) {
char *mydocker_pid;
mydocker_pid = getenv("mydocker_pid");
int i;
char nspath[1024];
char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt" };
for (i=0; i<5; i++) {
sprintf(nspath, "/proc/%s/ns/%s", mydocker_pid, namespaces[i]);
int fd = open(nspath, O_RDONLY);
if (setns(fd, 0) == -1) {
fprintf(stderr, "setns on %s namespace failed: %s\n", namespaces[i], strerror(errno));
} else {
fprintf(stdout, "setns on %s namespace succeeded\n", namespaces[i]);
}
close(fd);
}
int res = system("/bin/sh");
exit(0);
return;
}
*/
import "C"
接着
test/cgo/test-6.go
中
package main
import (
"log"
_ "github.com/nicktming/mydocker/test/ccode"
)
func main() {
log.Printf("hello world!")
}
测试如下:
root@nicktming:~/go/src/github.com/nicktming/mydocker/test/cgo# export mydocker_pid=19005
root@nicktming:~/go/src/github.com/nicktming/mydocker/test/cgo# go run test-6.go
setns on ipc namespace succeeded
setns on uts namespace succeeded
setns on net namespace succeeded
setns on pid namespace succeeded
setns on mnt namespace succeeded
# exit
4. 实现
在理解了上面的内容后, 实现就会变得比较简单一些.
1. 根据容器名去
/var/run/mydocker/容器名/config.json
中寻找pid
.
2. 从命令行中获取exec
需要执行的命令command
.
3. 执行C
代码进入pid
中的namespace
并且调用command
执行. 最终command
fork
出来的进程会与pid
拥有一样的namespace
(mnt ipc uts net pid
)
4.1 增加C 代码
新增
nsenter/nsenter.go
如下:
package nsenter
/*
#include <errno.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
__attribute__((constructor)) void enter_namespace(void) {
char *mydocker_pid;
mydocker_pid = getenv("mydocker_pid");
if (mydocker_pid) {
fprintf(stdout, "got mydocker_pid=%s\n", mydocker_pid);
} else {
fprintf(stdout, "missing mydocker_pid env skip nsenter");
return;
}
char *mydocker_cmd;
mydocker_cmd = getenv("mydocker_cmd");
if (mydocker_cmd) {
fprintf(stdout, "got mydocker_cmd=%s\n", mydocker_cmd);
} else {
fprintf(stdout, "missing mydocker_cmd env skip nsenter");
return;
}
int i;
char nspath[1024];
char *namespaces[] = { "ipc", "uts", "net", "pid", "mnt" };
for (i=0; i<5; i++) {
sprintf(nspath, "/proc/%s/ns/%s", mydocker_pid, namespaces[i]);
int fd = open(nspath, O_RDONLY);
if (setns(fd, 0) == -1) {
fprintf(stderr, "setns on %s namespace failed: %s\n", namespaces[i], strerror(errno));
} else {
fprintf(stdout, "setns on %s namespace succeeded\n", namespaces[i]);
}
close(fd);
}
int res = system(mydocker_cmd);
exit(0);
return;
}
*/
import "C"
4.2 增加exec
命令
在
command/command.go
中增加该命令.
var ExecCommand = cli.Command{
Name: "exec",
Action: func(c *cli.Context) error {
if os.Getenv("mydocker_pid") != "" {
log.Printf("pid callback pid %s", os.Getgid())
return nil
}
containerName := c.Args().Get(0)
command := c.Args().Get(1)
log.Printf("containerName:%s,command:%s\n", containerName, command)
Exec(containerName, command)
return nil
},
}
新增加
command/exec.go
并添加函数:
package command
import (
"fmt"
"os"
"os/exec"
)
func Exec(containerName, command string) {
containerInfo, err := GetContainerInfo(containerName)
if err != nil {
fmt.Errorf("GetContainerInfo error:%v\n", err)
return
}
pid := containerInfo.Pid
os.Setenv("mydocker_pid", pid)
os.Setenv("mydocker_cmd", command)
//nsenter.EnterNamespace()
cmd := exec.Command("/proc/self/exe", "exec")
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
if err := cmd.Run(); err != nil {
fmt.Errorf("Exec container %s error %v", containerName, err)
}
}
4.3 修改main方法
在
main
方法中加入了ExecCommand
命令并且引入_ "github.com/nicktming/mydocker/nsenter"
, 这样每次运行main
方法的时候都需要运行那段C
代码.
package main
import (
...
_ "github.com/nicktming/mydocker/nsenter"
)
func main() {
...
app.Commands = []cli.Command{
...
command.ExecCommand,
}
...
}
4.4 测试
root@nicktming:~/go/src/github.com/nicktming/mydocker# ./mydocker run -d /bin/top
missing mydocker_pid env skip nsenter
2019/04/14 08:55:30 rootPath:
2019/04/14 08:55:30 rootPath is empaty, set cmd.Dir by default: /nicktming/mnt
2019/04/14 08:55:30 containerId:15552033304408860601
2019/04/14 08:55:30 jsonInfo:{"pid":"21338","id":"15552033304408860601","name":"15552033304408860601","command":"/bin/top","createTime":"2019-04-14 08:55:30","status":"running"}
2019/04/14 08:55:30 remove /nicktming/writerLayer, err:remove /nicktming/writerLayer: device or resource busy
root@nicktming:~/go/src/github.com/nicktming/mydocker# ./mydocker ps
missing mydocker_pid env skip nsenter
ID NAME PID STATUS COMMAND CREATED
15552033304408860601 15552033304408860601 21338 running /bin/top 2019-04-14 08:55:30
root@nicktming:~/go/src/github.com/nicktming/mydocker# ./mydocker exec 15552033304408860601 /bin/sh
missing mydocker_pid env skip nsenter
2019/04/14 08:55:52 containerName:15552033304408860601,command:/bin/sh
got mydocker_pid=21338
got mydocker_cmd=/bin/sh
setns on ipc namespace succeeded
setns on uts namespace succeeded
setns on net namespace succeeded
setns on pid namespace succeeded
setns on mnt namespace succeeded
/ # ps -l
PID USER TIME COMMAND
1 root 0:00 /bin/top
4 root 0:00 /bin/sh
5 root 0:00 ps -l
/ # ps -ef
PID USER TIME COMMAND
1 root 0:00 /bin/top
4 root 0:00 /bin/sh
6 root 0:00 ps -ef
/ # exit
root@nicktming:~/go/src/github.com/nicktming/mydocker#
可以看到每次执行一个命令的时候都会先去执行那段
C
代码, 这样就会比较好理解为什么在Exec
方法中需要执行自身cmd := exec.Command("/proc/self/exe", "exec")
, 因为第一个exec
只是去设置环境变量(因为顺序是先执行C代码后执行设置环境变量), 所以在第一次执行exec
命令的时候设置好了环境变量后, 第二次执行exec
命令的时候先执行C
代码的时候会通过C
代码中的system(mydocker_cmd)
执行用户命令, 而后续的ExecCommand
中的Action
代码其实无须再执行, 所以通过判断环境变量是否设置就行(if os.Getenv("mydocker_pid") != ""
). 因为第二次的exec
只是为了执行C
代码.
6. 时序图
enter-namesapce.png
7. 参考
1. https://www.cnblogs.com/alantu2018/p/8465919.html
2. 自己动手写docker.(基本参考此书,加入一些自己的理解,加深对docker
的理解)
8. 全部内容
mydocker.png
1. [mydocker]---环境说明
2. [mydocker]---urfave cli 理解
3. [mydocker]---Linux Namespace
4. [mydocker]---Linux Cgroup
5. [mydocker]---构造容器01-实现run命令
6. [mydocker]---构造容器02-实现资源限制01
7. [mydocker]---构造容器02-实现资源限制02
8. [mydocker]---构造容器03-实现增加管道
9. [mydocker]---通过例子理解存储驱动AUFS
10. [mydocker]---通过例子理解chroot 和 pivot_root
11. [mydocker]---一步步实现使用busybox创建容器
12. [mydocker]---一步步实现使用AUFS包装busybox
13. [mydocker]---一步步实现volume操作
14. [mydocker]---实现保存镜像
15. [mydocker]---实现容器的后台运行
16. [mydocker]---实现查看运行中容器
17. [mydocker]---实现查看容器日志
18. [mydocker]---实现进入容器Namespace
19. [mydocker]---实现停止容器
20. [mydocker]---实现删除容器
21. [mydocker]---实现容器层隔离
22. [mydocker]---实现通过容器制作镜像
23. [mydocker]---实现cp操作
24. [mydocker]---实现容器指定环境变量
25. [mydocker]---网际协议IP
26. [mydocker]---网络虚拟设备veth bridge iptables
27. [mydocker]---docker的四种网络模型与原理实现(1)
28. [mydocker]---docker的四种网络模型与原理实现(2)
29. [mydocker]---容器地址分配
30. [mydocker]---网络net/netlink api 使用解析
31. [mydocker]---网络实现
32. [mydocker]---网络实现测试