github.com/m3db/m3@v1.5.0/src/x/mmap/mmap_linux.go (about) 1 // Copyright (c) 2017 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package mmap 22 23 import ( 24 "fmt" 25 "syscall" 26 ) 27 28 // Fd mmaps a file 29 func Fd(fd, offset, length int64, opts Options) (Descriptor, error) { 30 // MAP_PRIVATE because we only want to ever mmap immutable things and we don't 31 // ever want to propagate writes back to the underlying file 32 // Set HugeTLB to disabled because its not supported for files 33 opts.HugeTLB.Enabled = false 34 return mmap(fd, offset, length, syscall.MAP_PRIVATE, opts) 35 } 36 37 // Bytes requests a private (non-shared) region of anonymous (not backed by a file) memory from the O.S 38 func Bytes(length int64, opts Options) (Descriptor, error) { 39 // offset is 0 because we're not indexing into a file 40 // fd is -1 and MAP_ANON because we're asking for an anonymous region of memory not tied to a file 41 // MAP_PRIVATE because we don't plan on sharing this region of memory with other processes 42 return mmap(-1, 0, length, syscall.MAP_ANON|syscall.MAP_PRIVATE, opts) 43 } 44 45 func mmap(fd, offset, length int64, flags int, opts Options) (Descriptor, error) { 46 if length == 0 { 47 // Return an empty slice (but not nil so callers who 48 // use nil to mean something special like not initialized 49 // get back an actual ref) 50 return Descriptor{ 51 Bytes: make([]byte, 0), 52 }, nil 53 } 54 55 var prot int 56 if opts.Read { 57 prot = prot | syscall.PROT_READ 58 } 59 if opts.Write { 60 prot = prot | syscall.PROT_WRITE 61 } 62 63 flagsWithoutHugeTLB := flags 64 shouldUseHugeTLB := opts.HugeTLB.Enabled && length >= opts.HugeTLB.Threshold 65 if shouldUseHugeTLB { 66 // We use the MAP_HUGETLB flag instead of MADV_HUGEPAGE because transparent 67 // hugepages only work with anonymous, private pages. Please see the MADV_HUGEPAGE 68 // section of http://man7.org/linux/man-pages/man2/madvise.2.html and the MAP_HUGETLB 69 // section of http://man7.org/linux/man-pages/man2/mmap.2.html for more details. 70 flags = flags | syscall.MAP_HUGETLB 71 } 72 73 var ( 74 b []byte 75 err error 76 warning error 77 ) 78 b, err = syscall.Mmap(int(fd), offset, int(length), prot, flags) 79 // Sometimes allocations that specify huge pages will fail because the O.S 80 // isn't configured properly or there are not enough available huge pages in 81 // the pool. You can try and allocate more by executing: 82 // echo 20 > /proc/sys/vm/nr_hugepages 83 // See this document for more details: https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt 84 // Regardless, we don't want to fail hard in that scenario. Instead, we try 85 // and mmap without the hugeTLB flag. 86 if err != nil && shouldUseHugeTLB { 87 // In case we succeed the second time, make sure we can propagate the previous 88 // error back to the caller as a warning 89 warning = fmt.Errorf( 90 "error while trying to mmap with hugeTLB flag: %s, hugeTLB disabled", err.Error()) 91 b, err = syscall.Mmap(int(fd), offset, int(length), prot, flagsWithoutHugeTLB) 92 } 93 if err != nil { 94 return Descriptor{}, fmt.Errorf("mmap error: %v", err) 95 } 96 97 if reporter := opts.ReporterOptions.Reporter; reporter != nil { 98 opts.ReporterOptions.Context.Size = length 99 if err := reporter.ReportMap(opts.ReporterOptions.Context); err != nil { 100 // Allow the reporter to deny an mmap to allow enforcement of proper 101 // reporting if it wants to. 102 syscall.Munmap(b) 103 return Descriptor{}, err 104 } 105 } 106 107 return Descriptor{ 108 Bytes: b, 109 Warning: warning, 110 ReporterOptions: opts.ReporterOptions, 111 }, nil 112 } 113 114 // Munmap munmaps a byte slice that is backed by an mmap 115 func Munmap(desc Descriptor) error { 116 if len(desc.Bytes) == 0 { 117 // Never actually mmapd this, just returned empty slice 118 return nil 119 } 120 121 if err := syscall.Munmap(desc.Bytes); err != nil { 122 return fmt.Errorf("munmap error: %v", err) 123 } 124 125 if reporter := desc.ReporterOptions.Reporter; reporter != nil { 126 if err := reporter.ReportUnmap(desc.ReporterOptions.Context); err != nil { 127 // Allow the reporter to return an error from unmap to allow 128 // enforcement of proper reporting if it wants to. 129 return err 130 } 131 } 132 133 return nil 134 } 135 136 // MadviseDontNeed frees mmapped memory. 137 // `MADV_DONTNEED` informs the kernel to free the mmapped pages right away instead of waiting for memory pressure. 138 // NB(bodu): DO NOT FREE anonymously mapped memory or else it will null all of the underlying bytes as the 139 // memory is not file backed. 140 func MadviseDontNeed(desc Descriptor) error { 141 // Do nothing if there's no data. 142 if len(desc.Bytes) == 0 { 143 return nil 144 } 145 return syscall.Madvise(desc.Bytes, syscall.MADV_DONTNEED) 146 }