前面分析了Slub分配算法的缓存区创建及对象分配,现继续分配算法的对象回收。
Slub分配算法中对象释放的接口为kmem_cache_free():
【file:/mm/slub.c】 void kmem_cache_free(struct kmem_cache *s, void *x) { s = cache_from_obj(s, x); if (!s) return; slab_free(s, virt_to_head_page(x), x, _RET_IP_); trace_kmem_cache_free(_RET_IP_, x); }
该函数中,cache_from_obj()主要是用于获取回收对象的kmem_cache,而slab_free()主要是用于将对象回收,至于trace_kmem_cache_free()则是对对象的回收做轨迹跟踪的。
具体看一下cache_from_obj()的实现:
【file:/mm/slub.h】 static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x) { struct kmem_cache *cachep; struct page *page; /* * When kmemcg is not being used, both assignments should return the * same value. but we don't want to pay the assignment price in that * case. If it is not compiled in, the compiler should be smart enough * to not do even the assignment. In that case, slab_equal_or_root * will also be a constant. */ if (!memcg_kmem_enabled() && !unlikely(s->flags & SLAB_DEBUG_FREE)) return s; page = virt_to_head_page(x); cachep = page->slab_cache; if (slab_equal_or_root(cachep, s)) return cachep; pr_err("%s: Wrong slab cache. %s but object is from %s\n", __FUNCTION__, cachep->name, s->name); WARN_ON_ONCE(1); return s; }
kmem_cache在kmem_cache_free()的入参已经传入了,但是这里仍然要去重新判断获取该结构,主要是由于当内核将各缓冲区链起来的时候,其通过对象地址经virt_to_head_page()转换后获取的page页面结构远比用户传入的值得可信。所以在该函数中则先会if (!memcg_kmem_enabled() && !unlikely(s->flags & SLAB_DEBUG_FREE))判断是否memcg未开启且kmem_cache未设置SLAB_DEBUG_FREE,如果是的话,接着通过virt_to_head_page()经由对象地址获得其页面page管理结构;再经由slab_equal_or_root()判断调用者传入的kmem_cache是否与释放的对象所属的cache相匹配,如果匹配,则将由对象得到kmem_cache返回;否则最后只好将调用者传入的kmem_cache返回。
详细分析一下slub的对象回收实现函数slab_free():
【file:/mm/slub.c】 /* * Fastpath with forced inlining to produce a kfree and kmem_cache_free that * can perform fastpath freeing without additional function calls. * * The fastpath is only possible if we are freeing to the current cpu slab * of this processor. This typically the case if we have just allocated * the item before. * * If fastpath is not possible then fall back to __slab_free where we deal * with all sorts of special processing. */ static __always_inline void slab_free(struct kmem_cache *s, struct page *page, void *x, unsigned long addr) { void **object = (void *)x; struct kmem_cache_cpu *c; unsigned long tid; slab_free_hook(s, x); redo: /* * Determine the currently cpus per cpu slab. * The cpu may change afterward. However that does not matter since * data is retrieved via this pointer. If we are on the same cpu * during the cmpxchg then the free will succedd. */ preempt_disable(); c = __this_cpu_ptr(s->cpu_slab); tid = c->tid; preempt_enable(); if (likely(page == c->page)) { set_freepointer(s, object, c->freelist); if (unlikely(!this_cpu_cmpxchg_double( s->cpu_slab->freelist, s->cpu_slab->tid, c->freelist, tid, object, next_tid(tid)))) { note_cmpxchg_failure("slab_free", s, tid); goto redo; } stat(s, FREE_FASTPATH); } else __slab_free(s, page, x, addr); }
函数最先的是slab_free_hook()对象释放处理钩子调用处理,主要是用于去注册kmemleak中的对象;接着是redo的标签,该标签主要是用于释放过程中出现因抢占而发生CPU迁移的时候,跳转重新处理的点;在redo里面,将先通过preempt_disable()禁止抢占,然后__this_cpu_ptr()获取本地CPU的kmem_cache_cpu管理结构以及其中的事务ID(tid),然后preempt_enable()恢复抢占;if(likely(page == c->page))如果当前释放的对象与本地CPU的缓存区相匹配,将会set_freepointer()设置该对象尾随的空闲对象指针数据,然后类似分配时,经由this_cpu_cmpxchg_double()原子操作,将对象归还回去;但是如果当前释放的对象与本地CPU的缓存区不匹配,意味着不可以快速释放对象,此时将会通过__slab_free()慢通道将对象释放。
接着分析一下__slab_free()的实现:
【file:/mm/slub.c】 /* * Slow patch handling. This may still be called frequently since objects * have a longer lifetime than the cpu slabs in most processing loads. * * So we still attempt to reduce cache line usage. Just take the slab * lock and free the item. If there is no additional partial page * handling required then we can return immediately. */ static void __slab_free(struct kmem_cache *s, struct page *page, void *x, unsigned long addr) { void *prior; void **object = (void *)x; int was_frozen; struct page new; unsigned long counters; struct kmem_cache_node *n = NULL; unsigned long uninitialized_var(flags); stat(s, FREE_SLOWPATH); if (kmem_cache_debug(s) && !(n = free_debug_processing(s, page, x, addr, &flags))) return; do { if (unlikely(n)) { spin_unlock_irqrestore(&n->list_lock, flags); n = NULL; } prior = page->freelist; counters = page->counters; set_freepointer(s, object, prior); new.counters = counters; was_frozen = new.frozen; new.inuse--; if ((!new.inuse || !prior) && !was_frozen) { if (kmem_cache_has_cpu_partial(s) && !prior) { /* * Slab was on no list before and will be * partially empty * We can defer the list move and instead * freeze it. */ new.frozen = 1; } else { /* Needs to be taken off a list */ n = get_node(s, page_to_nid(page)); /* * Speculatively acquire the list_lock. * If the cmpxchg does not succeed then we may * drop the list_lock without any processing. * * Otherwise the list_lock will synchronize with * other processors updating the list of slabs. */ spin_lock_irqsave(&n->list_lock, flags); } } } while (!cmpxchg_double_slab(s, page, prior, counters, object, new.counters, "__slab_free")); if (likely(!n)) { /* * If we just froze the page then put it onto the * per cpu partial list. */ if (new.frozen && !was_frozen) { put_cpu_partial(s, page, 1); stat(s, CPU_PARTIAL_FREE); } /* * The list lock was not taken therefore no list * activity can be necessary. */ if (was_frozen) stat(s, FREE_FROZEN); return; } if (unlikely(!new.inuse && n->nr_partial > s->min_partial)) goto slab_empty; /* * Objects left in the slab. If it was not on the partial list before * then add it. */ if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) { if (kmem_cache_debug(s)) remove_full(s, n, page); add_partial(n, page, DEACTIVATE_TO_TAIL); stat(s, FREE_ADD_PARTIAL); } spin_unlock_irqrestore(&n->list_lock, flags); return; slab_empty: if (prior) { /* * Slab on the partial list. */ remove_partial(n, page); stat(s, FREE_REMOVE_PARTIAL); } else { /* Slab must be on the full list */ remove_full(s, n, page); } spin_unlock_irqrestore(&n->list_lock, flags); stat(s, FREE_SLAB); discard_slab(s, page); }
该函数最先的if (kmem_cache_debug(s) && !(n = free_debug_processing(s, page, x, addr, &flags)))主要用于kmem_cache_debug()判断是否开启调试,如果开启,将通过free_debug_processing()进行调试检测以及获取经检验过的合法的kmem_cache_node节点缓冲区管理结构;接着进入do-while循环,如果kmem_cache_node从free_debug_processing()返回出来,则n不为空,那么将会释放其在free_debug_processing()内加的锁进行释放,并将n置空;然后获取缓冲区的信息以及设置对象末尾的空闲对象指针,同时更新缓冲区中对象使用数。
往下if ((!new.inuse || !prior) && !was_frozen)的判断,如果缓冲区中被使用的对象为0或者空闲队列为空,且缓冲区未处于冻结态(即缓冲区未处于每CPU对象缓存中),那么意味着该释放的对象是缓冲区中最后一个被使用的对象,对象释放之后的缓冲区是可以被释放回伙伴管理算法的;接着if (kmem_cache_has_cpu_partial(s) && !prior)的判断,表示每CPU存在partial半满队列同时空闲队列不为空,那么该缓冲区将会设置frozen标识,用于后期将其放置到每CPU的partial队列中,反之,那么意味着该缓冲区将会从链表中移出,接着将会get_node()获取节点缓冲区管理结构,同时spin_lock_irqsave()加锁持有该slab的节点管理结构;最后通过cmpxchg_double_slab()将对象释放,如果执行失败,将返回重试。
接下来if (likely(!n))判断中kmem_cache_node不为空,如果if (new.frozen && !was_frozen)前面未冻结该缓冲区,这将会把该缓冲区put_cpu_partial()挂入到每CPU的partial队列中,同时stat()更新统计信息;如果if (was_frozen)冻结了该缓冲区,则仅需stat()更新统计信息。
而if (unlikely(!new.inuse && n->nr_partial > s->min_partial)) 如果缓冲区无对象被使用,且节点的半满slab缓冲区数量超过了最小临界点,则该页面将需要被释放掉,那么将会跳转至slab_empty执行缓冲区释放操作。
此外if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) 该缓冲区因对象的释放,处于半满状态(即仍有对象被占用的情况),则其将从full链表中remove_full()移出,并add_partial()添加至半满partial队列中。
最后spin_unlock_irqrestore()释放中断锁并恢复中断环境。
至于slab_empty标签中的缓冲区释放流程,则是根据其空闲队列是否空,然后选择地去将该页面从对应的full或者partial链表中摘除,然后spin_unlock_irqrestore()释放锁,最终通过discard_slab()将缓冲区释放。
回顾该函数,下面侧重看一下free_debug_processing()的处理及discard_slab()的实现。
【file:/mm/slub.c】 static noinline struct kmem_cache_node *free_debug_processing( struct kmem_cache *s, struct page *page, void *object, unsigned long addr, unsigned long *flags) { struct kmem_cache_node *n = get_node(s, page_to_nid(page)); spin_lock_irqsave(&n->list_lock, *flags); slab_lock(page); if (!check_slab(s, page)) goto fail; if (!check_valid_pointer(s, page, object)) { slab_err(s, page, "Invalid object pointer 0x%p", object); goto fail; } if (on_freelist(s, page, object)) { object_err(s, page, object, "Object already free"); goto fail; } if (!check_object(s, page, object, SLUB_RED_ACTIVE)) goto out; if (unlikely(s != page->slab_cache)) { if (!PageSlab(page)) { slab_err(s, page, "Attempt to free object(0x%p) " "outside of slab", object); } else if (!page->slab_cache) { printk(KERN_ERR "SLUB <none>: no slab for object 0x%p.\n", object); dump_stack(); } else object_err(s, page, object, "page slab pointer corrupt."); goto fail; } if (s->flags & SLAB_STORE_USER) set_track(s, object, TRACK_FREE, addr); trace(s, page, object, 0); init_object(s, object, SLUB_RED_INACTIVE); out: slab_unlock(page); /* * Keep node_lock to preserve integrity * until the object is actually freed */ return n; fail: slab_unlock(page); spin_unlock_irqrestore(&n->list_lock, *flags); slab_fix(s, "Object at 0x%p not freed", object); return NULL; }
该调测处理函数主要检测有:check_slab()检查slab的kmem_cache与page中的slab信息是否匹配,如果不匹配,可能发生了破坏或者数据不符;check_valid_pointer()检查对象地址的合法性,表示地址确切地为某对象的首地址,而非对象的中间位置;on_freelist()检测该对象是否已经被释放,避免造成重复释放;check_object()主要是根据内存标识SLAB_RED_ZONE及SLAB_POISON的设置,对对象空间进行完整性检测;至于if (unlikely(s != page->slab_cache))判断主要是为了确保用户传入的kmem_cache与页面所属的kmem_cache类型是匹配的,否则将记录错误日志。此外还根据if (s->flags & SLAB_STORE_USER) 如果设置了SLAB_STORE_USER标识,将记录对象释放的track信息。最后将trace()记录对象的轨迹信息,同时还init_object()将重新初始化对象。代码末尾的out及fail则是对检测处理的成功及释放的后处理。
至于discard_slab()的实现:
【file:/mm/slub.c】 static void discard_slab(struct kmem_cache *s, struct page *page) { dec_slabs_node(s, page_to_nid(page), page->objects); free_slab(s, page); }
如果discard_slab()释放缓冲区,将会先dec_slabs_node()更新统计,然后通过free_slab()进行处理。
【file:/mm/slub.c】 static void free_slab(struct kmem_cache *s, struct page *page) { if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) { struct rcu_head *head; if (need_reserve_slab_rcu) { int order = compound_order(page); int offset = (PAGE_SIZE << order) - s->reserved; VM_BUG_ON(s->reserved != sizeof(*head)); head = page_address(page) + offset; } else { /* * RCU free overloads the RCU head over the LRU */ head = (void *)&page->lru; } call_rcu(head, rcu_free_slab); } else __free_slab(s, page); }
如果设置了SLAB_DESTROY_BY_RCU标识,将会通过RCU的方式将内存页面释放掉,否则将会通过__free_slab()普通方式释放。
而__free_slab()的实现:
【file:/mm/slub.c】 static void __free_slab(struct kmem_cache *s, struct page *page) { int order = compound_order(page); int pages = 1 << order; if (kmem_cache_debug(s)) { void *p; slab_pad_check(s, page); for_each_object(p, s, page_address(page), page->objects) check_object(s, page, p, SLUB_RED_INACTIVE); } kmemcheck_free_shadow(page, compound_order(page)); mod_zone_page_state(page_zone(page), (s->flags & SLAB_RECLAIM_ACCOUNT) ? NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, -pages); __ClearPageSlabPfmemalloc(page); __ClearPageSlab(page); memcg_release_pages(s, order); page_mapcount_reset(page); if (current->reclaim_state) current->reclaim_state->reclaimed_slab += pages; __free_memcg_kmem_pages(page, order); }
其将通过compound_order()获取页面阶数转而获得释放的页面数;然后kmem_cache_debug()判断该slab是否开启了调测,如果开启,将会对该slab缓冲区进行一次检测,主要是检测是否有内存破坏以记录相关信息;接着kmemcheck_free_shadow()释放影子内存;mod_zone_page_state()修改内存页面的状态,同时__ClearPageSlabPfmemalloc()和__ClearPageSlab()清除页面的slab信息;最后memcg_release_pages()释放memcg中的页面处理,接着page_mapcount_reset()重置页面映射计数,最后则是__free_memcg_kmem_pages()将页面释放。
至于__free_memcg_kmem_pages()的实现则是将memcg去注册页面,然后经由__free_pages()将页面归还到伙伴管理算法中。
【file:/mm/page_alloc.c】 /* * __free_memcg_kmem_pages and free_memcg_kmem_pages will free * pages allocated with __GFP_KMEMCG. * * Those pages are accounted to a particular memcg, embedded in the * corresponding page_cgroup. To avoid adding a hit in the allocator to search * for that information only to find out that it is NULL for users who have no * interest in that whatsoever, we provide these functions. * * The caller knows better which flags it relies on. */ void __free_memcg_kmem_pages(struct page *page, unsigned int order) { memcg_kmem_uncharge_pages(page, order); __free_pages(page, order); }
至此对象释放分析完毕。