github.com/nicocha30/gvisor-ligolo@v0.0.0-20230726075806-989fa2c0a413/pkg/sentry/fsimpl/proc/tasks_sys.go (about) 1 // Copyright 2019 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package proc 16 17 import ( 18 "bytes" 19 "fmt" 20 "math" 21 22 "github.com/nicocha30/gvisor-ligolo/pkg/abi/linux" 23 "github.com/nicocha30/gvisor-ligolo/pkg/context" 24 "github.com/nicocha30/gvisor-ligolo/pkg/errors/linuxerr" 25 "github.com/nicocha30/gvisor-ligolo/pkg/hostarch" 26 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/fsimpl/kernfs" 27 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/inet" 28 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel" 29 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/kernel/auth" 30 "github.com/nicocha30/gvisor-ligolo/pkg/sentry/vfs" 31 "github.com/nicocha30/gvisor-ligolo/pkg/sync" 32 "github.com/nicocha30/gvisor-ligolo/pkg/tcpip/network/ipv4" 33 "github.com/nicocha30/gvisor-ligolo/pkg/usermem" 34 ) 35 36 // +stateify savable 37 type tcpMemDir int 38 39 const ( 40 tcpRMem tcpMemDir = iota 41 tcpWMem 42 ) 43 44 // newSysDir returns the dentry corresponding to /proc/sys directory. 45 func (fs *filesystem) newSysDir(ctx context.Context, root *auth.Credentials, k *kernel.Kernel) kernfs.Inode { 46 return fs.newStaticDir(ctx, root, map[string]kernfs.Inode{ 47 "kernel": fs.newStaticDir(ctx, root, map[string]kernfs.Inode{ 48 "hostname": fs.newInode(ctx, root, 0444, &hostnameData{}), 49 "sem": fs.newInode(ctx, root, 0444, newStaticFile(fmt.Sprintf("%d\t%d\t%d\t%d\n", linux.SEMMSL, linux.SEMMNS, linux.SEMOPM, linux.SEMMNI))), 50 "shmall": fs.newInode(ctx, root, 0444, ipcData(linux.SHMALL)), 51 "shmmax": fs.newInode(ctx, root, 0444, ipcData(linux.SHMMAX)), 52 "shmmni": fs.newInode(ctx, root, 0444, ipcData(linux.SHMMNI)), 53 "msgmni": fs.newInode(ctx, root, 0444, ipcData(linux.MSGMNI)), 54 "msgmax": fs.newInode(ctx, root, 0444, ipcData(linux.MSGMAX)), 55 "msgmnb": fs.newInode(ctx, root, 0444, ipcData(linux.MSGMNB)), 56 "yama": fs.newStaticDir(ctx, root, map[string]kernfs.Inode{ 57 "ptrace_scope": fs.newYAMAPtraceScopeFile(ctx, k, root), 58 }), 59 }), 60 "vm": fs.newStaticDir(ctx, root, map[string]kernfs.Inode{ 61 "max_map_count": fs.newInode(ctx, root, 0444, newStaticFile("2147483647\n")), 62 "mmap_min_addr": fs.newInode(ctx, root, 0444, &mmapMinAddrData{k: k}), 63 "overcommit_memory": fs.newInode(ctx, root, 0444, newStaticFile("0\n")), 64 }), 65 "net": fs.newSysNetDir(ctx, root, k), 66 }) 67 } 68 69 // newSysNetDir returns the dentry corresponding to /proc/sys/net directory. 70 func (fs *filesystem) newSysNetDir(ctx context.Context, root *auth.Credentials, k *kernel.Kernel) kernfs.Inode { 71 var contents map[string]kernfs.Inode 72 73 // TODO(gvisor.dev/issue/1833): Support for using the network stack in the 74 // network namespace of the calling process. 75 if stack := k.RootNetworkNamespace().Stack(); stack != nil { 76 contents = map[string]kernfs.Inode{ 77 "ipv4": fs.newStaticDir(ctx, root, map[string]kernfs.Inode{ 78 "ip_forward": fs.newInode(ctx, root, 0444, &ipForwarding{stack: stack}), 79 "ip_local_port_range": fs.newInode(ctx, root, 0644, &portRange{stack: stack}), 80 "tcp_recovery": fs.newInode(ctx, root, 0644, &tcpRecoveryData{stack: stack}), 81 "tcp_rmem": fs.newInode(ctx, root, 0644, &tcpMemData{stack: stack, dir: tcpRMem}), 82 "tcp_sack": fs.newInode(ctx, root, 0644, &tcpSackData{stack: stack}), 83 "tcp_wmem": fs.newInode(ctx, root, 0644, &tcpMemData{stack: stack, dir: tcpWMem}), 84 85 // The following files are simple stubs until they are implemented in 86 // netstack, most of these files are configuration related. We use the 87 // value closest to the actual netstack behavior or any empty file, all 88 // of these files will have mode 0444 (read-only for all users). 89 "ip_local_reserved_ports": fs.newInode(ctx, root, 0444, newStaticFile("")), 90 "ipfrag_time": fs.newInode(ctx, root, 0444, newStaticFile("30")), 91 "ip_nonlocal_bind": fs.newInode(ctx, root, 0444, newStaticFile("0")), 92 "ip_no_pmtu_disc": fs.newInode(ctx, root, 0444, newStaticFile("1")), 93 94 // tcp_allowed_congestion_control tell the user what they are able to 95 // do as an unprivledged process so we leave it empty. 96 "tcp_allowed_congestion_control": fs.newInode(ctx, root, 0444, newStaticFile("")), 97 "tcp_available_congestion_control": fs.newInode(ctx, root, 0444, newStaticFile("reno")), 98 "tcp_congestion_control": fs.newInode(ctx, root, 0444, newStaticFile("reno")), 99 100 // Many of the following stub files are features netstack doesn't 101 // support. The unsupported features return "0" to indicate they are 102 // disabled. 103 "tcp_base_mss": fs.newInode(ctx, root, 0444, newStaticFile("1280")), 104 "tcp_dsack": fs.newInode(ctx, root, 0444, newStaticFile("0")), 105 "tcp_early_retrans": fs.newInode(ctx, root, 0444, newStaticFile("0")), 106 "tcp_fack": fs.newInode(ctx, root, 0444, newStaticFile("0")), 107 "tcp_fastopen": fs.newInode(ctx, root, 0444, newStaticFile("0")), 108 "tcp_fastopen_key": fs.newInode(ctx, root, 0444, newStaticFile("")), 109 "tcp_invalid_ratelimit": fs.newInode(ctx, root, 0444, newStaticFile("0")), 110 "tcp_keepalive_intvl": fs.newInode(ctx, root, 0444, newStaticFile("0")), 111 "tcp_keepalive_probes": fs.newInode(ctx, root, 0444, newStaticFile("0")), 112 "tcp_keepalive_time": fs.newInode(ctx, root, 0444, newStaticFile("7200")), 113 "tcp_mtu_probing": fs.newInode(ctx, root, 0444, newStaticFile("0")), 114 "tcp_no_metrics_save": fs.newInode(ctx, root, 0444, newStaticFile("1")), 115 "tcp_probe_interval": fs.newInode(ctx, root, 0444, newStaticFile("0")), 116 "tcp_probe_threshold": fs.newInode(ctx, root, 0444, newStaticFile("0")), 117 "tcp_retries1": fs.newInode(ctx, root, 0444, newStaticFile("3")), 118 "tcp_retries2": fs.newInode(ctx, root, 0444, newStaticFile("15")), 119 "tcp_rfc1337": fs.newInode(ctx, root, 0444, newStaticFile("1")), 120 "tcp_slow_start_after_idle": fs.newInode(ctx, root, 0444, newStaticFile("1")), 121 "tcp_synack_retries": fs.newInode(ctx, root, 0444, newStaticFile("5")), 122 "tcp_syn_retries": fs.newInode(ctx, root, 0444, newStaticFile("3")), 123 "tcp_timestamps": fs.newInode(ctx, root, 0444, newStaticFile("1")), 124 }), 125 "core": fs.newStaticDir(ctx, root, map[string]kernfs.Inode{ 126 "default_qdisc": fs.newInode(ctx, root, 0444, newStaticFile("pfifo_fast")), 127 "message_burst": fs.newInode(ctx, root, 0444, newStaticFile("10")), 128 "message_cost": fs.newInode(ctx, root, 0444, newStaticFile("5")), 129 "optmem_max": fs.newInode(ctx, root, 0444, newStaticFile("0")), 130 "rmem_default": fs.newInode(ctx, root, 0444, newStaticFile("212992")), 131 "rmem_max": fs.newInode(ctx, root, 0444, newStaticFile("212992")), 132 "somaxconn": fs.newInode(ctx, root, 0444, newStaticFile("128")), 133 "wmem_default": fs.newInode(ctx, root, 0444, newStaticFile("212992")), 134 "wmem_max": fs.newInode(ctx, root, 0444, newStaticFile("212992")), 135 }), 136 } 137 } 138 139 return fs.newStaticDir(ctx, root, contents) 140 } 141 142 // mmapMinAddrData implements vfs.DynamicBytesSource for 143 // /proc/sys/vm/mmap_min_addr. 144 // 145 // +stateify savable 146 type mmapMinAddrData struct { 147 kernfs.DynamicBytesFile 148 149 k *kernel.Kernel 150 } 151 152 var _ dynamicInode = (*mmapMinAddrData)(nil) 153 154 // Generate implements vfs.DynamicBytesSource.Generate. 155 func (d *mmapMinAddrData) Generate(ctx context.Context, buf *bytes.Buffer) error { 156 fmt.Fprintf(buf, "%d\n", d.k.Platform.MinUserAddress()) 157 return nil 158 } 159 160 // hostnameData implements vfs.DynamicBytesSource for /proc/sys/kernel/hostname. 161 // 162 // +stateify savable 163 type hostnameData struct { 164 kernfs.DynamicBytesFile 165 } 166 167 var _ dynamicInode = (*hostnameData)(nil) 168 169 // Generate implements vfs.DynamicBytesSource.Generate. 170 func (*hostnameData) Generate(ctx context.Context, buf *bytes.Buffer) error { 171 utsns := kernel.UTSNamespaceFromContext(ctx) 172 buf.WriteString(utsns.HostName()) 173 buf.WriteString("\n") 174 return nil 175 } 176 177 // tcpSackData implements vfs.WritableDynamicBytesSource for 178 // /proc/sys/net/tcp_sack. 179 // 180 // +stateify savable 181 type tcpSackData struct { 182 kernfs.DynamicBytesFile 183 184 stack inet.Stack `state:"wait"` 185 enabled *bool 186 } 187 188 var _ vfs.WritableDynamicBytesSource = (*tcpSackData)(nil) 189 190 // Generate implements vfs.DynamicBytesSource.Generate. 191 func (d *tcpSackData) Generate(ctx context.Context, buf *bytes.Buffer) error { 192 if d.enabled == nil { 193 sack, err := d.stack.TCPSACKEnabled() 194 if err != nil { 195 return err 196 } 197 d.enabled = &sack 198 } 199 200 val := "0\n" 201 if *d.enabled { 202 // Technically, this is not quite compatible with Linux. Linux stores these 203 // as an integer, so if you write "2" into tcp_sack, you should get 2 back. 204 // Tough luck. 205 val = "1\n" 206 } 207 _, err := buf.WriteString(val) 208 return err 209 } 210 211 // Write implements vfs.WritableDynamicBytesSource.Write. 212 func (d *tcpSackData) Write(ctx context.Context, _ *vfs.FileDescription, src usermem.IOSequence, offset int64) (int64, error) { 213 if offset != 0 { 214 // No need to handle partial writes thus far. 215 return 0, linuxerr.EINVAL 216 } 217 if src.NumBytes() == 0 { 218 return 0, nil 219 } 220 221 // Limit the amount of memory allocated. 222 src = src.TakeFirst(hostarch.PageSize - 1) 223 224 var v int32 225 n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts) 226 if err != nil { 227 return 0, err 228 } 229 if d.enabled == nil { 230 d.enabled = new(bool) 231 } 232 *d.enabled = v != 0 233 return n, d.stack.SetTCPSACKEnabled(*d.enabled) 234 } 235 236 // tcpRecoveryData implements vfs.WritableDynamicBytesSource for 237 // /proc/sys/net/ipv4/tcp_recovery. 238 // 239 // +stateify savable 240 type tcpRecoveryData struct { 241 kernfs.DynamicBytesFile 242 243 stack inet.Stack `state:"wait"` 244 } 245 246 var _ vfs.WritableDynamicBytesSource = (*tcpRecoveryData)(nil) 247 248 // Generate implements vfs.DynamicBytesSource.Generate. 249 func (d *tcpRecoveryData) Generate(ctx context.Context, buf *bytes.Buffer) error { 250 recovery, err := d.stack.TCPRecovery() 251 if err != nil { 252 return err 253 } 254 255 _, err = buf.WriteString(fmt.Sprintf("%d\n", recovery)) 256 return err 257 } 258 259 // Write implements vfs.WritableDynamicBytesSource.Write. 260 func (d *tcpRecoveryData) Write(ctx context.Context, _ *vfs.FileDescription, src usermem.IOSequence, offset int64) (int64, error) { 261 if offset != 0 { 262 // No need to handle partial writes thus far. 263 return 0, linuxerr.EINVAL 264 } 265 if src.NumBytes() == 0 { 266 return 0, nil 267 } 268 269 // Limit the amount of memory allocated. 270 src = src.TakeFirst(hostarch.PageSize - 1) 271 272 var v int32 273 n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts) 274 if err != nil { 275 return 0, err 276 } 277 if err := d.stack.SetTCPRecovery(inet.TCPLossRecovery(v)); err != nil { 278 return 0, err 279 } 280 return n, nil 281 } 282 283 // tcpMemData implements vfs.WritableDynamicBytesSource for 284 // /proc/sys/net/ipv4/tcp_rmem and /proc/sys/net/ipv4/tcp_wmem. 285 // 286 // +stateify savable 287 type tcpMemData struct { 288 kernfs.DynamicBytesFile 289 290 dir tcpMemDir 291 stack inet.Stack `state:"wait"` 292 293 // mu protects against concurrent reads/writes to FDs based on the dentry 294 // backing this byte source. 295 mu sync.Mutex `state:"nosave"` 296 } 297 298 var _ vfs.WritableDynamicBytesSource = (*tcpMemData)(nil) 299 300 // Generate implements vfs.DynamicBytesSource.Generate. 301 func (d *tcpMemData) Generate(ctx context.Context, buf *bytes.Buffer) error { 302 d.mu.Lock() 303 defer d.mu.Unlock() 304 305 size, err := d.readSizeLocked() 306 if err != nil { 307 return err 308 } 309 _, err = buf.WriteString(fmt.Sprintf("%d\t%d\t%d\n", size.Min, size.Default, size.Max)) 310 return err 311 } 312 313 // Write implements vfs.WritableDynamicBytesSource.Write. 314 func (d *tcpMemData) Write(ctx context.Context, _ *vfs.FileDescription, src usermem.IOSequence, offset int64) (int64, error) { 315 if offset != 0 { 316 // No need to handle partial writes thus far. 317 return 0, linuxerr.EINVAL 318 } 319 if src.NumBytes() == 0 { 320 return 0, nil 321 } 322 d.mu.Lock() 323 defer d.mu.Unlock() 324 325 // Limit the amount of memory allocated. 326 src = src.TakeFirst(hostarch.PageSize - 1) 327 size, err := d.readSizeLocked() 328 if err != nil { 329 return 0, err 330 } 331 buf := []int32{int32(size.Min), int32(size.Default), int32(size.Max)} 332 n, err := usermem.CopyInt32StringsInVec(ctx, src.IO, src.Addrs, buf, src.Opts) 333 if err != nil { 334 return 0, err 335 } 336 newSize := inet.TCPBufferSize{ 337 Min: int(buf[0]), 338 Default: int(buf[1]), 339 Max: int(buf[2]), 340 } 341 if err := d.writeSizeLocked(newSize); err != nil { 342 return 0, err 343 } 344 return n, nil 345 } 346 347 // Precondition: d.mu must be locked. 348 func (d *tcpMemData) readSizeLocked() (inet.TCPBufferSize, error) { 349 switch d.dir { 350 case tcpRMem: 351 return d.stack.TCPReceiveBufferSize() 352 case tcpWMem: 353 return d.stack.TCPSendBufferSize() 354 default: 355 panic(fmt.Sprintf("unknown tcpMemFile type: %v", d.dir)) 356 } 357 } 358 359 // Precondition: d.mu must be locked. 360 func (d *tcpMemData) writeSizeLocked(size inet.TCPBufferSize) error { 361 switch d.dir { 362 case tcpRMem: 363 return d.stack.SetTCPReceiveBufferSize(size) 364 case tcpWMem: 365 return d.stack.SetTCPSendBufferSize(size) 366 default: 367 panic(fmt.Sprintf("unknown tcpMemFile type: %v", d.dir)) 368 } 369 } 370 371 // ipForwarding implements vfs.WritableDynamicBytesSource for 372 // /proc/sys/net/ipv4/ip_forward. 373 // 374 // +stateify savable 375 type ipForwarding struct { 376 kernfs.DynamicBytesFile 377 378 stack inet.Stack `state:"wait"` 379 enabled bool 380 } 381 382 var _ vfs.WritableDynamicBytesSource = (*ipForwarding)(nil) 383 384 // Generate implements vfs.DynamicBytesSource.Generate. 385 func (ipf *ipForwarding) Generate(ctx context.Context, buf *bytes.Buffer) error { 386 val := "0\n" 387 if ipf.enabled { 388 // Technically, this is not quite compatible with Linux. Linux stores these 389 // as an integer, so if you write "2" into tcp_sack, you should get 2 back. 390 // Tough luck. 391 val = "1\n" 392 } 393 buf.WriteString(val) 394 395 return nil 396 } 397 398 // Write implements vfs.WritableDynamicBytesSource.Write. 399 func (ipf *ipForwarding) Write(ctx context.Context, _ *vfs.FileDescription, src usermem.IOSequence, offset int64) (int64, error) { 400 if offset != 0 { 401 // No need to handle partial writes thus far. 402 return 0, linuxerr.EINVAL 403 } 404 if src.NumBytes() == 0 { 405 return 0, nil 406 } 407 408 // Limit input size so as not to impact performance if input size is large. 409 src = src.TakeFirst(hostarch.PageSize - 1) 410 411 var v int32 412 n, err := usermem.CopyInt32StringInVec(ctx, src.IO, src.Addrs, &v, src.Opts) 413 if err != nil { 414 return 0, err 415 } 416 ipf.enabled = v != 0 417 if err := ipf.stack.SetForwarding(ipv4.ProtocolNumber, ipf.enabled); err != nil { 418 return 0, err 419 } 420 return n, nil 421 } 422 423 // portRange implements vfs.WritableDynamicBytesSource for 424 // /proc/sys/net/ipv4/ip_local_port_range. 425 // 426 // +stateify savable 427 type portRange struct { 428 kernfs.DynamicBytesFile 429 430 stack inet.Stack `state:"wait"` 431 432 // start and end store the port range. We must save/restore this here, 433 // since a netstack instance is created on restore. 434 start *uint16 435 end *uint16 436 } 437 438 var _ vfs.WritableDynamicBytesSource = (*portRange)(nil) 439 440 // Generate implements vfs.DynamicBytesSource.Generate. 441 func (pr *portRange) Generate(ctx context.Context, buf *bytes.Buffer) error { 442 if pr.start == nil { 443 start, end := pr.stack.PortRange() 444 pr.start = &start 445 pr.end = &end 446 } 447 _, err := fmt.Fprintf(buf, "%d %d\n", *pr.start, *pr.end) 448 return err 449 } 450 451 // Write implements vfs.WritableDynamicBytesSource.Write. 452 func (pr *portRange) Write(ctx context.Context, _ *vfs.FileDescription, src usermem.IOSequence, offset int64) (int64, error) { 453 if offset != 0 { 454 // No need to handle partial writes thus far. 455 return 0, linuxerr.EINVAL 456 } 457 if src.NumBytes() == 0 { 458 return 0, nil 459 } 460 461 // Limit input size so as not to impact performance if input size is 462 // large. 463 src = src.TakeFirst(hostarch.PageSize - 1) 464 465 ports := make([]int32, 2) 466 n, err := usermem.CopyInt32StringsInVec(ctx, src.IO, src.Addrs, ports, src.Opts) 467 if err != nil { 468 return 0, err 469 } 470 471 // Port numbers must be uint16s. 472 if ports[0] < 0 || ports[1] < 0 || ports[0] > math.MaxUint16 || ports[1] > math.MaxUint16 { 473 return 0, linuxerr.EINVAL 474 } 475 476 if err := pr.stack.SetPortRange(uint16(ports[0]), uint16(ports[1])); err != nil { 477 return 0, err 478 } 479 if pr.start == nil { 480 pr.start = new(uint16) 481 pr.end = new(uint16) 482 } 483 *pr.start = uint16(ports[0]) 484 *pr.end = uint16(ports[1]) 485 return n, nil 486 }