github.com/m3db/m3@v1.5.0/src/x/mmap/mmap_linux.go (about)

     1  // Copyright (c) 2017 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package mmap
    22  
    23  import (
    24  	"fmt"
    25  	"syscall"
    26  )
    27  
    28  // Fd mmaps a file
    29  func Fd(fd, offset, length int64, opts Options) (Descriptor, error) {
    30  	// MAP_PRIVATE because we only want to ever mmap immutable things and we don't
    31  	// ever want to propagate writes back to the underlying file
    32  	// Set HugeTLB to disabled because its not supported for files
    33  	opts.HugeTLB.Enabled = false
    34  	return mmap(fd, offset, length, syscall.MAP_PRIVATE, opts)
    35  }
    36  
    37  // Bytes requests a private (non-shared) region of anonymous (not backed by a file) memory from the O.S
    38  func Bytes(length int64, opts Options) (Descriptor, error) {
    39  	// offset is 0 because we're not indexing into a file
    40  	// fd is -1 and MAP_ANON because we're asking for an anonymous region of memory not tied to a file
    41  	// MAP_PRIVATE because we don't plan on sharing this region of memory with other processes
    42  	return mmap(-1, 0, length, syscall.MAP_ANON|syscall.MAP_PRIVATE, opts)
    43  }
    44  
    45  func mmap(fd, offset, length int64, flags int, opts Options) (Descriptor, error) {
    46  	if length == 0 {
    47  		// Return an empty slice (but not nil so callers who
    48  		// use nil to mean something special like not initialized
    49  		// get back an actual ref)
    50  		return Descriptor{
    51  			Bytes: make([]byte, 0),
    52  		}, nil
    53  	}
    54  
    55  	var prot int
    56  	if opts.Read {
    57  		prot = prot | syscall.PROT_READ
    58  	}
    59  	if opts.Write {
    60  		prot = prot | syscall.PROT_WRITE
    61  	}
    62  
    63  	flagsWithoutHugeTLB := flags
    64  	shouldUseHugeTLB := opts.HugeTLB.Enabled && length >= opts.HugeTLB.Threshold
    65  	if shouldUseHugeTLB {
    66  		// We use the MAP_HUGETLB flag instead of MADV_HUGEPAGE because transparent
    67  		// hugepages only work with anonymous, private pages. Please see the MADV_HUGEPAGE
    68  		// section of http://man7.org/linux/man-pages/man2/madvise.2.html and the MAP_HUGETLB
    69  		// section of http://man7.org/linux/man-pages/man2/mmap.2.html for more details.
    70  		flags = flags | syscall.MAP_HUGETLB
    71  	}
    72  
    73  	var (
    74  		b       []byte
    75  		err     error
    76  		warning error
    77  	)
    78  	b, err = syscall.Mmap(int(fd), offset, int(length), prot, flags)
    79  	// Sometimes allocations that specify huge pages will fail because the O.S
    80  	// isn't configured properly or there are not enough available huge pages in
    81  	// the pool. You can try and allocate more by executing:
    82  	// 		echo 20 > /proc/sys/vm/nr_hugepages
    83  	// See this document for more details: https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt
    84  	// Regardless, we don't want to fail hard in that scenario. Instead, we try
    85  	// and mmap without the hugeTLB flag.
    86  	if err != nil && shouldUseHugeTLB {
    87  		// In case we succeed the second time, make sure we can propagate the previous
    88  		// error back to the caller as a warning
    89  		warning = fmt.Errorf(
    90  			"error while trying to mmap with hugeTLB flag: %s, hugeTLB disabled", err.Error())
    91  		b, err = syscall.Mmap(int(fd), offset, int(length), prot, flagsWithoutHugeTLB)
    92  	}
    93  	if err != nil {
    94  		return Descriptor{}, fmt.Errorf("mmap error: %v", err)
    95  	}
    96  
    97  	if reporter := opts.ReporterOptions.Reporter; reporter != nil {
    98  		opts.ReporterOptions.Context.Size = length
    99  		if err := reporter.ReportMap(opts.ReporterOptions.Context); err != nil {
   100  			// Allow the reporter to deny an mmap to allow enforcement of proper
   101  			// reporting if it wants to.
   102  			syscall.Munmap(b)
   103  			return Descriptor{}, err
   104  		}
   105  	}
   106  
   107  	return Descriptor{
   108  		Bytes:           b,
   109  		Warning:         warning,
   110  		ReporterOptions: opts.ReporterOptions,
   111  	}, nil
   112  }
   113  
   114  // Munmap munmaps a byte slice that is backed by an mmap
   115  func Munmap(desc Descriptor) error {
   116  	if len(desc.Bytes) == 0 {
   117  		// Never actually mmapd this, just returned empty slice
   118  		return nil
   119  	}
   120  
   121  	if err := syscall.Munmap(desc.Bytes); err != nil {
   122  		return fmt.Errorf("munmap error: %v", err)
   123  	}
   124  
   125  	if reporter := desc.ReporterOptions.Reporter; reporter != nil {
   126  		if err := reporter.ReportUnmap(desc.ReporterOptions.Context); err != nil {
   127  			// Allow the reporter to return an error from unmap to allow
   128  			// enforcement of proper reporting if it wants to.
   129  			return err
   130  		}
   131  	}
   132  
   133  	return nil
   134  }
   135  
   136  // MadviseDontNeed frees mmapped memory.
   137  // `MADV_DONTNEED` informs the kernel to free the mmapped pages right away instead of waiting for memory pressure.
   138  // NB(bodu): DO NOT FREE anonymously mapped memory or else it will null all of the underlying bytes as the
   139  // memory is not file backed.
   140  func MadviseDontNeed(desc Descriptor) error {
   141  	// Do nothing if there's no data.
   142  	if len(desc.Bytes) == 0 {
   143  		return nil
   144  	}
   145  	return syscall.Madvise(desc.Bytes, syscall.MADV_DONTNEED)
   146  }