-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathLargeBlockOfMemory.h
249 lines (218 loc) · 8.72 KB
/
LargeBlockOfMemory.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
/* Copyright (c) 2011 Stanford University
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR(S) DISCLAIM ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL AUTHORS BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#ifndef RAMCLOUD_LARGEBLOCKOFMEMORY_H
#define RAMCLOUD_LARGEBLOCKOFMEMORY_H
#include <cinttypes>
#include <string>
#include <cstring>
#include <limits.h>
#include <sys/file.h>
#include <sys/mman.h>
#include <boost/type_traits.hpp>
#include <boost/utility/enable_if.hpp>
#include "Common.h"
/**
* We'd like to share class state across all instances of all templated
* versions of #LargeBlockOfMemory. However, due to templating, using
* static members won't work (LargeBlockOfMemory<x> is a different type
* from LargeBlockOfMemory<y> and doesn't share static members). Thus
* we hack around this with a simple namespace.
*/
namespace LargeBlockOfMemoryInternal {
extern uint64_t nextProbeBase;
}
/**
* A wrapper for a large block of memory. Returned memory is guaranteed to be
* at least one gigabyte aligned (at least the first 30 address bits will be 0).
* \tparam T
* The type of object that will be stored in this block of memory.
*/
template<typename T = void>
struct LargeBlockOfMemory {
/**
* Allocates anonymous backing pages for a block of memory, pins them,
* and zeros them. The memory is aligned to a gigabyte boundary.
* \param length
* The number of bytes of memory to allocate.
* \throw FatalError
* If the memory could not be allocated.
*/
explicit LargeBlockOfMemory(size_t length, int extraFlags = 0)
: length(length)
, block(static_cast<T*>(mmapGigabyteAligned(length, MAP_ANONYMOUS | extraFlags)))
{
if (block == MAP_FAILED) {
if (length == 0)
return;
LOG(ERROR, "Could not allocate log bytes");
}
}
/**
* Creates a file of the desired length and mmaps pages from it, pins them,
* and zeros them. This is intended to be used with hugetlbfs to get
* superpage-backed memory. Memory is aligned to a gigabyte boundary.
* \param filePath
* Path to the file to create and mmap. This file must not already
* exist.
* \param length
* The number of bytes of memory to allocate.
* \throw FatalError
* If the memory could not be allocated, i.e. if the file already
* existed, could not be created, could not be truncated, or could
* not be mmapped.
*/
LargeBlockOfMemory(std::string filePath, size_t length)
: length(length),
block(NULL)
{
const char* path = filePath.c_str();
// Open the file, being careful that it did not previously
// exist, as we don't want to stomp on other processes.
int fd = open(path, O_CREAT | O_EXCL | O_RDWR, 0600);
if (fd == -1) {
DIE("Could not open file [%s]: %s", path, strerror(errno));
}
// This isn't strictly necessary for hugetblfs, but it lets us
// use this same code on any mmaped file.
if (ftruncate(fd, length) != 0) {
unlink(path);
close(fd);
DIE("Could not ftruncate file [%s] to %lu bytes", path, length);
}
block = reinterpret_cast<T*>(mmapGigabyteAligned(length, 0, fd));
if (reinterpret_cast<void*>(block) == MAP_FAILED) {
unlink(path);
close(fd);
DIE("Could not mmap file [%s]", path);
}
// Remove the file from the directory. Our memory will remain allocated,
// however.
unlink(path);
close(fd);
LOG(NOTICE,
"Mmapped %lu-byte region from [%s] at %p\n",
length, path, reinterpret_cast<void*>(block));
// Fault in each mapping.
uint64_t pageSize = sysconf(_SC_PAGESIZE);
for (uint64_t i = 0; i < length; i += pageSize)
reinterpret_cast<uint8_t*>(block)[i] = 0;
}
~LargeBlockOfMemory()
{
if (block != NULL && munmap(block, length) != 0)
LOG(WARNING, "munmap of large block failed with %d", errno);
}
void swap(LargeBlockOfMemory<T>& other) {
std::swap(this->length, other.length);
std::swap(this->block, other.block);
}
/// Returns #block.
T* operator*() { return block; }
/// Returns #block.
T* operator->() { return block; }
/// Returns #block.
T* get() { return block; }
/// The number of bytes valid starting at #block.
size_t length;
/// Just for convenience.
static const uint64_t GIGABYTE = (uint64_t)1 << 30;
/**
* A page-aligned block of #length bytes of data.
* May be NULL if length is 0.
*/
T* block;
private:
/**
* Mmap the desired amount of space with gigabyte alignment (lower 30
* bits of the address are 0). Also, ensure that all mappings are faulted
* in by touching each page. This is used to give the log memory that's
* well-aligned, which makes things like computing base addresses of
* Segments from random pointers really easy if Segments are aligned as
* well (to a power-of-two less than or equal to 1GB).
*
* One gigabyte alignment should be enough for anybody. Come find me in 30
* years and tell me how foolishly shortsighted I was.
*
* \param[in] length
* Length of the memory area to be mapped in bytes.
* \param[in] extraFlags
* Extra flags to be passed to mmap(2).
* \param[in] fd
* Optional file descriptor (if mmaping a file, for instance).
*/
void*
mmapGigabyteAligned(size_t length, int extraFlags, int fd = -1)
{
const int maxTries = 10000;
int i;
uint64_t tryBase = LargeBlockOfMemoryInternal::nextProbeBase;
for (i = 0; i < maxTries; i++) {
void *base = mmap(reinterpret_cast<void*>(tryBase),
length,
PROT_READ | PROT_WRITE,
MAP_SHARED | extraFlags,
fd,
0);
if (base == reinterpret_cast<void*>(tryBase))
break;
if (base != MAP_FAILED) {
if (munmap(base, length)) {
LOG(ERROR, "couldn't munmap undesirable mapping!");
return MAP_FAILED;
}
}
tryBase += GIGABYTE;
}
if (i == maxTries) {
LOG(ERROR, "Couldn't mmap gigabyte-aligned region");
return MAP_FAILED;
}
void* block = reinterpret_cast<void*>(tryBase);
// Do not pin and fault in pages if we're testing, since that just
// slows things down considerably (we usually don't touch anywhere near
// all of the memory we allocate).
#if !TESTING
#ifdef MLOCK_PAGES
// Pin the pages. Don't do this with the mmap() MAP_LOCKED flag since
// that slows down probing considerably (Linux might be locking down
// pages before it knows that it can actually give us the entire
// range?).
if (mlock(block, length)) {
munmap(block, length);
LOG(ERROR, "Couldn't pin down the memory!");
return MAP_FAILED;
}
#endif
// Force the OS to populate backing pages. MAP_POPULATE doesn't seem
// to do the trick and using it makes polling mmap for aligned base
// addresses much slower.
uint64_t pageSize = sysconf(_SC_PAGESIZE);
for (uint64_t i = 0; i < length; i += pageSize) {
reinterpret_cast<uint8_t*>(block)[i] = 0;
if (!(i & ((1 << 30) - 1))) {
LOG(NOTICE, "Populating pages; progress %lu of %lu MB",
i / (1 << 20), length / (1 << 20));
}
}
#endif // !TESTING
// Cache last mapped address to avoid re-probing same addresses later.
LargeBlockOfMemoryInternal::nextProbeBase =
(tryBase + length + GIGABYTE - 1) & ~(GIGABYTE - 1);
return block;
}
LargeBlockOfMemory(const LargeBlockOfMemory&) = delete;
LargeBlockOfMemory& operator=(const LargeBlockOfMemory&) = delete;
};
#endif // RAMCLOUD_LARGEBLOCKOFMEMORY_H