盒子
盒子
文章目录
  1. 0x01 Struct Relation
  2. 0x02 Code Analyze
    1. 0x0A inet_init
    2. 0x0B Socket Create
    3. 0x0C Setsockopt Syscall
  3. 0x03 Summary

Linux Kernel源码分析 - socket create && setsockopt

0x01 Struct Relation

以tcp socket为例子,udp等都类似。并以系统调用setsockopt为入口做代码分析。

相关的结构体有这么几种(tcp_sock、inet_connection_sock、inet_sock、sock):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
struct tcp_sock {
/* inet_connection_sock has to be the first member of tcp_sock */
struct inet_connection_sock inet_conn;
u16 tcp_header_len; /* Bytes of tcp header to send */
u16 gso_segs; /* Max number of segs per GSO packet */
//...
};

struct inet_connection_sock {
/* inet_sock has to be the first member! */
struct inet_sock icsk_inet;
struct request_sock_queue icsk_accept_queue;
struct inet_bind_bucket *icsk_bind_hash;
unsigned long icsk_timeout;
#define ICSK_CA_PRIV_SIZE (13 * sizeof(u64))
//...
};

struct inet_sock {
/* sk and pinet6 has to be the first two members of inet_sock */
struct sock sk;
#if IS_ENABLED(CONFIG_IPV6)
struct ipv6_pinfo *pinet6;
#endif
/* Socket demultiplex comparisons on incoming packets. */
#define inet_daddr sk.__sk_common.skc_daddr
#define inet_rcv_saddr sk.__sk_common.skc_rcv_saddr
#define inet_dport sk.__sk_common.skc_dport
#define inet_num sk.__sk_common.skc_num
//...
};

struct sock {
/*
* Now struct inet_timewait_sock also uses sock_common, so please just
* don't add nothing before this first member (__sk_common) --acme
*/
struct sock_common __sk_common;
#define sk_node __sk_common.skc_node
#define sk_nulls_node __sk_common.skc_nulls_node
#define sk_refcnt __sk_common.skc_refcnt
#define sk_tx_queue_mapping __sk_common.skc_tx_queue_mapping
#ifdef CONFIG_XPS
#define sk_rx_queue_mapping __sk_common.skc_rx_queue_mapping
#endif

struct socket *sk_socket;
//...
};

这里我要说一嘴,日常开发中socket结构体用的是最多的,但是在内核代码中,socket结构体没有上述的几个结构体重要,它更像是一个连接用户态和内核态的存在,处于中间地带,这里在后续的分析中会体现出来。

以上的代码基本就可以看出相互之间的父子关系了(tcp_sock -> inet_connection_sock -> inet_sock -> sock -> sock_common),如下图所示:

结构体关系图

有了以上的基础,再来继续看后续的代码。

0x02 Code Analyze

0x0A inet_init

该函数是注册内核网络中一系列结构体和回调函数的初始化函数,相当于网络模块的初始化函数。看如下代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
static int __init inet_init(void)
{
struct inet_protosw *q;
struct list_head *r;
int rc = -EINVAL;

sock_skb_cb_check_size(sizeof(struct inet_skb_parm));

rc = proto_register(&tcp_prot, 1); // (1)

/*
* Tell SOCKET that we are alive...
*/

(void)sock_register(&inet_family_ops); // (2)

/* Register the socket-side information for inet_create. */
for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)
INIT_LIST_HEAD(r);

for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q)
inet_register_protosw(q); // (3)

}

// (1)
int proto_register(struct proto *prot, int alloc_slab)
{
int ret = -ENOBUFS;

if (alloc_slab) {
prot->slab = kmem_cache_create_usercopy(prot->name,
prot->obj_size, 0, // 这里的obj_size为sizeof(struct tcp_sock),由此可见已经提前申请了整个sock大小了
SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT |
prot->slab_flags,
prot->useroffset, prot->usersize,
NULL);
}

ret = assign_proto_idx(prot); // 找空闲的索引并赋值prot->inuse_idx属性
list_add(&prot->node, &proto_list); // 把tcp_prot加入全局链表proto_list
return ret;

out:
return ret;
}

struct proto tcp_prot = {
.name = "TCP",
.owner = THIS_MODULE,
.close = tcp_close,
.pre_connect = tcp_v4_pre_connect,
.connect = tcp_v4_connect,
.disconnect = tcp_disconnect,
.accept = inet_csk_accept,
.init = tcp_v4_init_sock,
.sockets_allocated = &tcp_sockets_allocated,
.max_header = MAX_TCP_HEADER,
.obj_size = sizeof(struct tcp_sock),
.slab_flags = SLAB_TYPESAFE_BY_RCU,

#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_tcp_setsockopt, // x86兼容模式下的set/getsockopt
.compat_getsockopt = compat_tcp_getsockopt,
#endif
};

// (2)
int sock_register(const struct net_proto_family *ops)
{
int err;

spin_lock(&net_family_lock);
if (rcu_dereference_protected(net_families[ops->family],
lockdep_is_held(&net_family_lock))) // 查询是否已经存在
err = -EEXIST;
else {
rcu_assign_pointer(net_families[ops->family], ops); // 将inet_family_ops加入到全局变量net_families数组中
err = 0;
}
spin_unlock(&net_family_lock);

pr_info("NET: Registered protocol family %d\n", ops->family);
return err;
}

static const struct net_proto_family inet_family_ops = {
.family = PF_INET,
.create = inet_create,
.owner = THIS_MODULE,
};

// (3)
void inet_register_protosw(struct inet_protosw *p)
{
struct list_head *lh;
struct inet_protosw *answer;
int protocol = p->protocol;
struct list_head *last_perm;

spin_lock_bh(&inetsw_lock);

/* If we are trying to override a permanent protocol, bail. */
last_perm = &inetsw[p->type]; // inetsw为一个全局链表数组,取对应索引的链表指针
list_for_each(lh, &inetsw[p->type]) { //遍历所取的全局链表索引处的双向链表
answer = list_entry(lh, struct inet_protosw, list); // 根据双向链表获取inet_protosw结构体指针
/* Check only the non-wild match. */
if ((INET_PROTOSW_PERMANENT & answer->flags) == 0) //与传入的inet_protosw参数做对比
break;
if (protocol == answer->protocol)
goto out_permanent;
last_perm = lh;
}

list_add_rcu(&p->list, last_perm); // 将传入的inet_protosw加入全局链表数组!
out:
spin_unlock_bh(&inetsw_lock);

return;
}

static struct list_head inetsw[SOCK_MAX];
/*类似于这样的双向链表数组
-------------------------------
| ----- ----- ----- |
-> | | -> | | -> | | --
| 0 | | | | |
-- | | <- | | <- | | <-
| ----- ----- ----- |
|_____________________________|


--------------------
| ----- ----- |
-> | | -> | | --
| 1 | | -|-----> (struct inet_protosw->list)
-- | | <- | | <-
| ----- ----- |
|___________________|
*/

static struct inet_protosw inetsw_array[] =
{
{
.type = SOCK_STREAM,
.protocol = IPPROTO_TCP,
.prot = &tcp_prot, // 包含了tcp_prot
.ops = &inet_stream_ops, // 重要的一些回调函数
.flags = INET_PROTOSW_PERMANENT |
INET_PROTOSW_ICSK,
},
//...
};

const struct proto_ops inet_stream_ops = {
.family = PF_INET,
.owner = THIS_MODULE,
.release = inet_release,
.bind = inet_bind,
.connect = inet_stream_connect,
.setsockopt = sock_common_setsockopt,
.getsockopt = sock_common_getsockopt,
#ifdef CONFIG_MMU
.mmap = tcp_mmap,
#endif
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_sock_common_setsockopt,
.compat_getsockopt = compat_sock_common_getsockopt,
.compat_ioctl = inet_compat_ioctl,
#endif
};

inet_init函数简单来说就是注册了三种结构体(tcp_prot、inet_family_ops、inetsw_array),如下图所示:

inet_init

本文主要分析setsockopt系统调用,setsockopt是需要传入一个socket参数的,因此还需要分析socket的起源。

0x0B Socket Create

先上整体代码的关键部分流程图,后续审计代码可以结合该图一起看:

socket_create代码流程图

调用流程为net/socket.c: SYSCALL_DEFINE3 -> __sys_socket -> sock_create,看如下代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
int __sys_socket(int family, int type, int protocol)
{
int retval;
struct socket *sock;
int flags;

retval = sock_create(family, type, protocol, &sock); // (1)

return sock_map_fd(sock, flags & (O_CLOEXEC | O_NONBLOCK)); // 通过初始化好的socket来申请相应的file结构体,并互相索引,最终file绑定fd(文件描述符)
}

int sock_create(int family, int type, int protocol, struct socket **res)
{
return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
}

int __sock_create(struct net *net, int family, int type, int protocol,
struct socket **res, int kern)
{
int err;
struct socket *sock;
const struct net_proto_family *pf;

sock = sock_alloc(); // 申请一块socket结构体(看代码其实是inode结构体container_of得来的)

sock->type = type;


if (rcu_access_pointer(net_families[family]) == NULL) // 判断全局变量net_families是否为空
request_module("net-pf-%d", family);

rcu_read_lock();
pf = rcu_dereference(net_families[family]); // 取出net_families中的net_proto_family结构体,这里应该是前面提到的inet_family_ops结构体
err = -EAFNOSUPPORT;

rcu_read_unlock();

err = pf->create(net, sock, protocol, kern); // 这里执行了inet_family_ops->create = inet_create !!!!

*res = sock;

return 0;
}

上面的代码中pf->create这一块比较关键,主要是创建sock相关的结构体并初始化,看如下代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
static int inet_create(struct net *net, struct socket *sock, int protocol,
int kern)
{
struct sock *sk;
struct inet_protosw *answer;
struct inet_sock *inet;
struct proto *answer_prot;
unsigned char answer_flags;
int try_loading_module = 0;
int err;

sock->state = SS_UNCONNECTED;

/* Look for the requested type/protocol pair. */
lookup_protocol:
err = -ESOCKTNOSUPPORT;
rcu_read_lock();
list_for_each_entry_rcu(answer, &inetsw[sock->type], list) { // 遍历inet_protosw结构体,也就是前文所提到的inetsw_array,重点关注其中的SOCK_STREAM

err = 0;
/* Check the non-wild match. */
if (protocol == answer->protocol) {
if (protocol != IPPROTO_IP)
break;
} else {
/* Check for the two wild cases. */
if (IPPROTO_IP == protocol) {
protocol = answer->protocol;
break;
}
if (IPPROTO_IP == answer->protocol)
break;
}
err = -EPROTONOSUPPORT;
}

err = -EPERM;

sock->ops = answer->ops; // inet_stream_ops回调函数赋值给了该socket
answer_prot = answer->prot;
answer_flags = answer->flags;
rcu_read_unlock();

WARN_ON(!answer_prot->slab);

err = -ENOBUFS;
sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern); // 申请了一块sock结构体内存,最关键的是sk->sk_prot = answer_prot(这里即tcp_prot)
if (!sk)
goto out;

err = 0;

inet = inet_sk(sk); // sock类型转换成inet_sock类型
inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
inet->nodefrag = 0;
inet->inet_id = 0;

sock_init_data(sock, sk); // 重要函数,主要是初始化sk(sock)结构体中的属性值,并且将sock(socket)中的部分值赋值给sk,而且sock和socket也会互相赋值指针(互相索引)。

inet->uc_ttl = -1;
inet->mc_loop = 1;
inet->mc_ttl = 1;

if (sk->sk_prot->init) {
err = sk->sk_prot->init(sk); // tcp_prot->init为tcp_v4_init_sock
}

out:
return err;
}

static int tcp_v4_init_sock(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk); // sock转化为inet_connection_sock

tcp_init_sock(sk);

icsk->icsk_af_ops = &ipv4_specific; // 赋值回调函数结构体

#ifdef CONFIG_TCP_MD5SIG
tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
#endif

return 0;
}

const struct inet_connection_sock_af_ops ipv4_specific = {
.net_header_len = sizeof(struct iphdr),
.setsockopt = ip_setsockopt,
.getsockopt = ip_getsockopt,
.sockaddr_len = sizeof(struct sockaddr_in),
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_ip_setsockopt,
.compat_getsockopt = compat_ip_getsockopt,
#endif
};

sock_init_data中可以看出用户态赋值给socket结构体的属性值,又悄悄地转移给了sock,最终用户态数据的体现显而易见的是sock结构体,该结构体也是后续与其他关键结构体交互的关键(inet_sock等),因此我会说socket更像是连接用户和内核的连接体。

inet_create申请完sock结构体并初始化属性值,又从sock转变为inet_sock初始化属性值,再从sock转变为inet_connection_sock初始化属性值,最终还从sock转变为tcp_sock初始化属性值。基本上可以得出inet_create函数将从tcp_socksock的从父到子结构体都做了一遍初始化(结构体属性值赋值、回调函数赋值),以供后续的操作使用。这里的后续操作指的便是setsockopt了。

0x0C Setsockopt Syscall

这里以兼容模式(32位下)举例,setsockopt系统调用的流程为net/compat.c:COMPAT_SYSCALL_DEFINE5 -> __compat_sys_setsockopt,代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
COMPAT_SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
char __user *, optval, unsigned int, optlen)
{
return __compat_sys_setsockopt(fd, level, optname, optval, optlen);
}

static int __compat_sys_setsockopt(int fd, int level, int optname,
char __user *optval, unsigned int optlen)
{
int err;
struct socket *sock;

sock = sockfd_lookup(fd, &err); // 通过文件描述符找到相应的file,再用file找到对应的socket结构体

if (level == SOL_SOCKET)
err = compat_sock_setsockopt(sock, level,
optname, optval, optlen); // (1)
else if (sock->ops->compat_setsockopt)
err = sock->ops->compat_setsockopt(sock, level,
optname, optval, optlen); // (2)
else
err = sock->ops->setsockopt(sock, level,
optname, optval, optlen); // (3)
sockfd_put(sock);
}
return err;
}

这里假设用户态的代码为setsockopt(fd, SOL_IP, IPT_SO_SET_REPLACE, &data, sizeof(data));,那么上面的代码就走向了(2),根据前面分析的代码sock->ops已经被赋值为inet_stream_ops

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
const struct proto_ops inet_stream_ops = {
.family = PF_INET,
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_sock_common_setsockopt,
.compat_getsockopt = compat_sock_common_getsockopt,
#endif
};

const struct inet_connection_sock_af_ops ipv4_specific = {
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_ip_setsockopt,
.compat_getsockopt = compat_ip_getsockopt,
#endif
};

int compat_sock_common_setsockopt(struct socket *sock, int level, int optname,
char __user *optval, unsigned int optlen)
{
struct sock *sk = sock->sk; // 通过socket取sock

if (sk->sk_prot->compat_setsockopt != NULL) // 由前面可知这里的sk_prot为tcp_prot
return sk->sk_prot->compat_setsockopt(sk, level, optname,
optval, optlen);
}

int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen)
{
if (level != SOL_TCP)
return inet_csk_compat_setsockopt(sk, level, optname,
optval, optlen);
}

int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen)
{
const struct inet_connection_sock *icsk = inet_csk(sk); // 通过sock取inet_connection_sock

if (icsk->icsk_af_ops->compat_setsockopt) // 由前面可知这里的icsk_af_ops为ipv4_specific
return icsk->icsk_af_ops->compat_setsockopt(sk, level, optname,
optval, optlen);
}

int compat_ip_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen)
{
int err;

err = do_ip_setsockopt(sk, level, optname, optval, optlen);
#ifdef CONFIG_NETFILTER
if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
optname != IP_IPSEC_POLICY &&
optname != IP_XFRM_POLICY &&
!ip_mroute_opt(optname))
err = compat_nf_setsockopt(sk, PF_INET, optname, optval,
optlen); // !!!
#endif
return err;
}

int compat_nf_setsockopt(struct sock *sk, u_int8_t pf,
int val, char __user *opt, unsigned int len)
{
return compat_nf_sockopt(sk, pf, val, opt, &len, 0);
}

static int compat_nf_sockopt(struct sock *sk, u_int8_t pf, int val,
char __user *opt, int *len, int get)
{
struct nf_sockopt_ops *ops;
int ret;

ops = nf_sockopt_find(sk, pf, val, get); // 通过比对全局链表nf_sockopts中的回调函数结构体,取出相对应的,再根据后续是get还是set进一步调用结构体中的回调函数

if (get) {
if (ops->compat_get)
ret = ops->compat_get(sk, val, opt, len);
} else {
//...
}

module_put(ops->owner);
return ret;
}

这一部分主要结合前面的socket create代码部分一起,梳理清楚整体函数调用流程。具体详细的代码分析这里就略过了,不再细说。

socket关键属性值

0x03 Summary

简单说一下,其实从上面socket createsetsockopt前期阶段的这一部分就可以看出来,为什么在用户态编写c程序的时候,需要先创建socket后使用setsockopt并且在setsockopt的参数中需要有socket的描述符,因为在setsockopt的代码中出现的一些回调函数都是在socket create过程中创建并关联起来的,有一个先来后到的顺序。在漏洞利用的过程当中其实就是个逆推写代码的过程,通过分析内核代码,去推导出pocc程序该怎么构造。

其次,不单是socket这一块的内核源码,别的模块的源码也是同样的错综复杂的。结构体、回调函数、父子继承等等都是有许多关联性的,而且都分散在各个代码文件中,需要仔细捋清楚,这是需要耐心和细心的。即使再复杂的代码也一定能读明白。

最后,代码审计还是有一些小技巧的,例如看代码要抓关键点,不全看,也就是说代码块的阅读的详细程度需要把控好。其次是代码和原理结合起来一起看,先理清代码程序执行流程,再结合着理论原理一起看,基本都能够理解清楚代码块的含义…

支持一下
扫一扫,支持v1nke
  • 微信扫一扫
  • 支付宝扫一扫