#include <linux/math64.h>
#include <linux/fault-inject.h>
#include <linux/stacktrace.h>
+#include <linux/prefetch.h>
#include <trace/events/kmem.h>
return *(void **)(object + s->offset);
}
+static void prefetch_freepointer(const struct kmem_cache *s, void *object)
+{
+ prefetch(object + s->offset);
+}
+
static inline void *get_freepointer_safe(struct kmem_cache *s, void *object)
{
void *p;
const char *n)
{
VM_BUG_ON(!irqs_disabled());
-#ifdef CONFIG_CMPXCHG_DOUBLE
+#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
+ defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
if (s->flags & __CMPXCHG_DOUBLE) {
- if (cmpxchg_double(&page->freelist,
+ if (cmpxchg_double(&page->freelist, &page->counters,
freelist_old, counters_old,
freelist_new, counters_new))
return 1;
void *freelist_new, unsigned long counters_new,
const char *n)
{
-#ifdef CONFIG_CMPXCHG_DOUBLE
+#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
+ defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
if (s->flags & __CMPXCHG_DOUBLE) {
- if (cmpxchg_double(&page->freelist,
+ if (cmpxchg_double(&page->freelist, &page->counters,
freelist_old, counters_old,
freelist_new, counters_new))
return 1;
*/
static void print_section(char *text, u8 *addr, unsigned int length)
{
- int i, offset;
- int newline = 1;
- char ascii[17];
-
- ascii[16] = 0;
-
- for (i = 0; i < length; i++) {
- if (newline) {
- printk(KERN_ERR "%8s 0x%p: ", text, addr + i);
- newline = 0;
- }
- printk(KERN_CONT " %02x", addr[i]);
- offset = i % 16;
- ascii[offset] = isgraph(addr[i]) ? addr[i] : '.';
- if (offset == 15) {
- printk(KERN_CONT " %s\n", ascii);
- newline = 1;
- }
- }
- if (!newline) {
- i %= 16;
- while (i < 16) {
- printk(KERN_CONT " ");
- ascii[i] = ' ';
- i++;
- }
- printk(KERN_CONT " %s\n", ascii);
- }
+ print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
+ length, 1);
}
static struct track *get_track(struct kmem_cache *s, void *object,
va_end(args);
printk(KERN_ERR "========================================"
"=====================================\n");
- printk(KERN_ERR "BUG %s: %s\n", s->name, buf);
+ printk(KERN_ERR "BUG %s (%s): %s\n", s->name, print_tainted(), buf);
printk(KERN_ERR "----------------------------------------"
"-------------------------------------\n\n");
}
p, p - addr, get_freepointer(s, p));
if (p > addr + 16)
- print_section("Bytes b4", p - 16, 16);
-
- print_section("Object", p, min_t(unsigned long, s->objsize, PAGE_SIZE));
+ print_section("Bytes b4 ", p - 16, 16);
+ print_section("Object ", p, min_t(unsigned long, s->objsize,
+ PAGE_SIZE));
if (s->flags & SLAB_RED_ZONE)
- print_section("Redzone", p + s->objsize,
+ print_section("Redzone ", p + s->objsize,
s->inuse - s->objsize);
if (s->offset)
if (off != s->size)
/* Beginning of the filler is the free pointer */
- print_section("Padding", p + off, s->size - off);
+ print_section("Padding ", p + off, s->size - off);
dump_stack();
}
memset(p + s->objsize, val, s->inuse - s->objsize);
}
-static u8 *check_bytes8(u8 *start, u8 value, unsigned int bytes)
-{
- while (bytes) {
- if (*start != value)
- return start;
- start++;
- bytes--;
- }
- return NULL;
-}
-
-static u8 *check_bytes(u8 *start, u8 value, unsigned int bytes)
-{
- u64 value64;
- unsigned int words, prefix;
-
- if (bytes <= 16)
- return check_bytes8(start, value, bytes);
-
- value64 = value | value << 8 | value << 16 | value << 24;
- value64 = (value64 & 0xffffffff) | value64 << 32;
- prefix = 8 - ((unsigned long)start) % 8;
-
- if (prefix) {
- u8 *r = check_bytes8(start, value, prefix);
- if (r)
- return r;
- start += prefix;
- bytes -= prefix;
- }
-
- words = bytes / 8;
-
- while (words) {
- if (*(u64 *)start != value64)
- return check_bytes8(start, value, 8);
- start += 8;
- words--;
- }
-
- return check_bytes8(start, value, bytes % 8);
-}
-
static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
void *from, void *to)
{
u8 *fault;
u8 *end;
- fault = check_bytes(start, value, bytes);
+ fault = memchr_inv(start, value, bytes);
if (!fault)
return 1;
if (!remainder)
return 1;
- fault = check_bytes(end - remainder, POISON_INUSE, remainder);
+ fault = memchr_inv(end - remainder, POISON_INUSE, remainder);
if (!fault)
return 1;
while (end > fault && end[-1] == POISON_INUSE)
end--;
slab_err(s, page, "Padding overwritten. 0x%p-0x%p", fault, end - 1);
- print_section("Padding", end - remainder, remainder);
+ print_section("Padding ", end - remainder, remainder);
restore_bytes(s, "slab padding", POISON_INUSE, end - remainder, end);
return 0;
page->freelist);
if (!alloc)
- print_section("Object", (void *)object, s->objsize);
+ print_section("Object ", (void *)object, s->objsize);
dump_stack();
}
struct page *page, int tail)
{
n->nr_partial++;
- if (tail)
+ if (tail == DEACTIVATE_TO_TAIL)
list_add_tail(&page->lru, &n->partial);
else
list_add(&page->lru, &n->partial);
} else {
page->freelist = t;
available = put_cpu_partial(s, page, 0);
+ stat(s, CPU_PARTIAL_NODE);
}
if (kmem_cache_debug(s) || available > s->cpu_partial / 2)
break;
struct zone *zone;
enum zone_type high_zoneidx = gfp_zone(flags);
void *object;
+ unsigned int cpuset_mems_cookie;
/*
* The defrag ratio allows a configuration of the tradeoffs between
get_cycles() % 1024 > s->remote_node_defrag_ratio)
return NULL;
- get_mems_allowed();
- zonelist = node_zonelist(slab_node(current->mempolicy), flags);
- for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
- struct kmem_cache_node *n;
-
- n = get_node(s, zone_to_nid(zone));
-
- if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
- n->nr_partial > s->min_partial) {
- object = get_partial_node(s, n, c);
- if (object) {
- put_mems_allowed();
- return object;
+ do {
+ cpuset_mems_cookie = get_mems_allowed();
+ zonelist = node_zonelist(slab_node(current->mempolicy), flags);
+ for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
+ struct kmem_cache_node *n;
+
+ n = get_node(s, zone_to_nid(zone));
+
+ if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
+ n->nr_partial > s->min_partial) {
+ object = get_partial_node(s, n, c);
+ if (object) {
+ /*
+ * Return the object even if
+ * put_mems_allowed indicated that
+ * the cpuset mems_allowed was
+ * updated in parallel. It's a
+ * harmless race between the alloc
+ * and the cpuset update.
+ */
+ put_mems_allowed(cpuset_mems_cookie);
+ return object;
+ }
}
}
- }
- put_mems_allowed();
+ } while (!put_mems_allowed(cpuset_mems_cookie));
#endif
return NULL;
}
enum slab_modes l = M_NONE, m = M_NONE;
void *freelist;
void *nextfree;
- int tail = 0;
+ int tail = DEACTIVATE_TO_HEAD;
struct page new;
struct page old;
if (page->freelist) {
stat(s, DEACTIVATE_REMOTE_FREES);
- tail = 1;
+ tail = DEACTIVATE_TO_TAIL;
}
c->tid = next_tid(c->tid);
if (m == M_PARTIAL) {
add_partial(n, page, tail);
- stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
+ stat(s, tail);
} else if (m == M_FULL) {
{
struct kmem_cache_node *n = NULL;
struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
- struct page *page;
+ struct page *page, *discard_page = NULL;
while ((page = c->partial)) {
enum slab_modes { M_PARTIAL, M_FREE };
new.frozen = 0;
- if (!new.inuse && (!n || n->nr_partial < s->min_partial))
+ if (!new.inuse && (!n || n->nr_partial > s->min_partial))
m = M_FREE;
else {
struct kmem_cache_node *n2 = get_node(s,
}
if (l != m) {
- if (l == M_PARTIAL)
+ if (l == M_PARTIAL) {
remove_partial(n, page);
- else
- add_partial(n, page, 1);
+ stat(s, FREE_REMOVE_PARTIAL);
+ } else {
+ add_partial(n, page,
+ DEACTIVATE_TO_TAIL);
+ stat(s, FREE_ADD_PARTIAL);
+ }
l = m;
}
"unfreezing slab"));
if (m == M_FREE) {
- stat(s, DEACTIVATE_EMPTY);
- discard_slab(s, page);
- stat(s, FREE_SLAB);
+ page->next = discard_page;
+ discard_page = page;
}
}
if (n)
spin_unlock(&n->list_lock);
+
+ while (discard_page) {
+ page = discard_page;
+ discard_page = discard_page->next;
+
+ stat(s, DEACTIVATE_EMPTY);
+ discard_slab(s, page);
+ stat(s, FREE_SLAB);
+ }
}
/*
local_irq_restore(flags);
pobjects = 0;
pages = 0;
+ stat(s, CPU_PARTIAL_DRAIN);
}
}
page->next = oldpage;
} while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
- stat(s, CPU_PARTIAL_FREE);
return pobjects;
}
__flush_cpu_slab(s, smp_processor_id());
}
+static bool has_cpu_slab(int cpu, void *info)
+{
+ struct kmem_cache *s = info;
+ struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
+
+ return c->page || c->partial;
+}
+
static void flush_all(struct kmem_cache *s)
{
- on_each_cpu(flush_cpu_slab, s, 1);
+ on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1, GFP_ATOMIC);
}
/*
}
/*
+ * Check the page->freelist of a page and either transfer the freelist to the per cpu freelist
+ * or deactivate the page.
+ *
+ * The page is still frozen if the return value is not NULL.
+ *
+ * If this function returns NULL then the page has been unfrozen.
+ */
+static inline void *get_freelist(struct kmem_cache *s, struct page *page)
+{
+ struct page new;
+ unsigned long counters;
+ void *freelist;
+
+ do {
+ freelist = page->freelist;
+ counters = page->counters;
+ new.counters = counters;
+ VM_BUG_ON(!new.frozen);
+
+ new.inuse = page->objects;
+ new.frozen = freelist != NULL;
+
+ } while (!cmpxchg_double_slab(s, page,
+ freelist, counters,
+ NULL, new.counters,
+ "get_freelist"));
+
+ return freelist;
+}
+
+/*
* Slow path. The lockless freelist is empty or we need to perform
* debugging duties.
*
{
void **object;
unsigned long flags;
- struct page new;
- unsigned long counters;
local_irq_save(flags);
#ifdef CONFIG_PREEMPT
goto new_slab;
}
- stat(s, ALLOC_SLOWPATH);
-
- do {
- object = c->page->freelist;
- counters = c->page->counters;
- new.counters = counters;
- VM_BUG_ON(!new.frozen);
+ /* must check again c->freelist in case of cpu migration or IRQ */
+ object = c->freelist;
+ if (object)
+ goto load_freelist;
- /*
- * If there is no object left then we use this loop to
- * deactivate the slab which is simple since no objects
- * are left in the slab and therefore we do not need to
- * put the page back onto the partial list.
- *
- * If there are objects left then we retrieve them
- * and use them to refill the per cpu queue.
- */
+ stat(s, ALLOC_SLOWPATH);
- new.inuse = c->page->objects;
- new.frozen = object != NULL;
-
- } while (!__cmpxchg_double_slab(s, c->page,
- object, counters,
- NULL, new.counters,
- "__slab_alloc"));
+ object = get_freelist(s, c->page);
if (!object) {
c->page = NULL;
object = __slab_alloc(s, gfpflags, node, addr, c);
else {
+ void *next_object = get_freepointer_safe(s, object);
+
/*
* The cmpxchg will only match if there was no additional
* operation and if we are on the right processor.
* Since this is without lock semantics the protection is only against
* code executing on this cpu *not* from access by other cpus.
*/
- if (unlikely(!irqsafe_cpu_cmpxchg_double(
+ if (unlikely(!this_cpu_cmpxchg_double(
s->cpu_slab->freelist, s->cpu_slab->tid,
object, tid,
- get_freepointer_safe(s, object), next_tid(tid)))) {
+ next_object, next_tid(tid)))) {
note_cmpxchg_failure("slab_alloc", s, tid);
goto redo;
}
+ prefetch_freepointer(s, next_object);
stat(s, ALLOC_FASTPATH);
}
* If we just froze the page then put it onto the
* per cpu partial list.
*/
- if (new.frozen && !was_frozen)
+ if (new.frozen && !was_frozen) {
put_cpu_partial(s, page, 1);
-
+ stat(s, CPU_PARTIAL_FREE);
+ }
/*
* The list lock was not taken therefore no list
* activity can be necessary.
*/
if (unlikely(!prior)) {
remove_full(s, page);
- add_partial(n, page, 0);
+ add_partial(n, page, DEACTIVATE_TO_TAIL);
stat(s, FREE_ADD_PARTIAL);
}
}
if (likely(page == c->page)) {
set_freepointer(s, object, c->freelist);
- if (unlikely(!irqsafe_cpu_cmpxchg_double(
+ if (unlikely(!this_cpu_cmpxchg_double(
s->cpu_slab->freelist, s->cpu_slab->tid,
c->freelist, tid,
object, next_tid(tid)))) {
init_kmem_cache_node(n, kmem_cache_node);
inc_slabs_node(kmem_cache_node, node, page->objects);
- add_partial(n, page, 0);
+ add_partial(n, page, DEACTIVATE_TO_HEAD);
}
static void free_kmem_cache_nodes(struct kmem_cache *s)
}
}
-#ifdef CONFIG_CMPXCHG_DOUBLE
+#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
+ defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0)
/* Enable fast mode */
s->flags |= __CMPXCHG_DOUBLE;
* per node list when we run out of per cpu objects. We only fetch 50%
* to keep some capacity around for frees.
*/
- if (s->size >= PAGE_SIZE)
+ if (kmem_cache_debug(s))
+ s->cpu_partial = 0;
+ else if (s->size >= PAGE_SIZE)
s->cpu_partial = 2;
else if (s->size >= 1024)
s->cpu_partial = 6;
struct kmem_cache *temp_kmem_cache_node;
unsigned long kmalloc_size;
+ if (debug_guardpage_minorder())
+ slub_max_order = 0;
+
kmem_size = offsetof(struct kmem_cache, node) +
nr_node_ids * sizeof(struct kmem_cache_node *);
if (kmem_cache_open(s, n,
size, align, flags, ctor)) {
list_add(&s->list, &slab_caches);
+ up_write(&slub_lock);
if (sysfs_slab_add(s)) {
+ down_write(&slub_lock);
list_del(&s->list);
kfree(n);
kfree(s);
goto err;
}
- up_write(&slub_lock);
return s;
}
kfree(n);
for_each_possible_cpu(cpu) {
struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
+ int node = ACCESS_ONCE(c->node);
struct page *page;
- if (!c || c->node < 0)
+ if (node < 0)
continue;
-
- if (c->page) {
- if (flags & SO_TOTAL)
- x = c->page->objects;
+ page = ACCESS_ONCE(c->page);
+ if (page) {
+ if (flags & SO_TOTAL)
+ x = page->objects;
else if (flags & SO_OBJECTS)
- x = c->page->inuse;
+ x = page->inuse;
else
x = 1;
total += x;
- nodes[c->node] += x;
+ nodes[node] += x;
}
page = c->partial;
if (page) {
x = page->pobjects;
- total += x;
- nodes[c->node] += x;
+ total += x;
+ nodes[node] += x;
}
- per_cpu[c->node]++;
+ per_cpu[node]++;
}
}
};
#define SLAB_ATTR_RO(_name) \
- static struct slab_attribute _name##_attr = __ATTR_RO(_name)
+ static struct slab_attribute _name##_attr = \
+ __ATTR(_name, 0400, _name##_show, NULL)
#define SLAB_ATTR(_name) \
static struct slab_attribute _name##_attr = \
- __ATTR(_name, 0644, _name##_show, _name##_store)
+ __ATTR(_name, 0600, _name##_show, _name##_store)
static ssize_t slab_size_show(struct kmem_cache *s, char *buf)
{
err = strict_strtoul(buf, 10, &objects);
if (err)
return err;
+ if (objects && kmem_cache_debug(s))
+ return -EINVAL;
s->cpu_partial = objects;
flush_all(s);
STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
STAT_ATTR(CPU_PARTIAL_ALLOC, cpu_partial_alloc);
STAT_ATTR(CPU_PARTIAL_FREE, cpu_partial_free);
+STAT_ATTR(CPU_PARTIAL_NODE, cpu_partial_node);
+STAT_ATTR(CPU_PARTIAL_DRAIN, cpu_partial_drain);
#endif
static struct attribute *slab_attrs[] = {
&cmpxchg_double_cpu_fail_attr.attr,
&cpu_partial_alloc_attr.attr,
&cpu_partial_free_attr.attr,
+ &cpu_partial_node_attr.attr,
+ &cpu_partial_drain_attr.attr,
#endif
#ifdef CONFIG_FAILSLAB
&failslab_attr.attr,
static int __init slab_proc_init(void)
{
- proc_create("slabinfo", S_IRUGO, NULL, &proc_slabinfo_operations);
+ proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations);
return 0;
}
module_init(slab_proc_init);