【Linux内存源码分析】SLUB分配算法(5)

前面分析了Slub分配算法的缓存区创建及对象分配,现继续分配算法的对象回收。

       Slub分配算法中对象释放的接口为kmem_cache_free():

【file:/mm/slub.c】
void kmem_cache_free(struct kmem_cache *s, void *x)
{
	s = cache_from_obj(s, x);
	if (!s)
		return;
	slab_free(s, virt_to_head_page(x), x, _RET_IP_);
	trace_kmem_cache_free(_RET_IP_, x);
}

该函数中,cache_from_obj()主要是用于获取回收对象的kmem_cache,而slab_free()主要是用于将对象回收,至于trace_kmem_cache_free()则是对对象的回收做轨迹跟踪的。

具体看一下cache_from_obj()的实现:

【file:/mm/slub.h】
static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
{
	struct kmem_cache *cachep;
	struct page *page;

	/*
	 * When kmemcg is not being used, both assignments should return the
	 * same value. but we don't want to pay the assignment price in that
	 * case. If it is not compiled in, the compiler should be smart enough
	 * to not do even the assignment. In that case, slab_equal_or_root
	 * will also be a constant.
	 */
	if (!memcg_kmem_enabled() && !unlikely(s->flags & SLAB_DEBUG_FREE))
		return s;

	page = virt_to_head_page(x); 
	cachep = page->slab_cache;
	if (slab_equal_or_root(cachep, s)) 
		return cachep;

	pr_err("%s: Wrong slab cache. %s but object is from %s\n",
		__FUNCTION__, cachep->name, s->name);
	WARN_ON_ONCE(1);
	return s;
}

kmem_cache在kmem_cache_free()的入参已经传入了,但是这里仍然要去重新判断获取该结构,主要是由于当内核将各缓冲区链起来的时候,其通过对象地址经virt_to_head_page()转换后获取的page页面结构远比用户传入的值得可信。所以在该函数中则先会if (!memcg_kmem_enabled() && !unlikely(s->flags &
SLAB_DEBUG_FREE))
判断是否memcg未开启且kmem_cache未设置SLAB_DEBUG_FREE,如果是的话,接着通过virt_to_head_page()经由对象地址获得其页面page管理结构;再经由slab_equal_or_root()判断调用者传入的kmem_cache是否与释放的对象所属的cache相匹配,如果匹配,则将由对象得到kmem_cache返回;否则最后只好将调用者传入的kmem_cache返回。

详细分析一下slub的对象回收实现函数slab_free()

【file:/mm/slub.c】
/*
 * Fastpath with forced inlining to produce a kfree and kmem_cache_free that
 * can perform fastpath freeing without additional function calls.
 *
 * The fastpath is only possible if we are freeing to the current cpu slab
 * of this processor. This typically the case if we have just allocated
 * the item before.
 *
 * If fastpath is not possible then fall back to __slab_free where we deal
 * with all sorts of special processing.
 */
static __always_inline void slab_free(struct kmem_cache *s,
			struct page *page, void *x, unsigned long addr)
{
	void **object = (void *)x;
	struct kmem_cache_cpu *c;
	unsigned long tid;

	slab_free_hook(s, x);

redo:
	/*
	 * Determine the currently cpus per cpu slab.
	 * The cpu may change afterward. However that does not matter since
	 * data is retrieved via this pointer. If we are on the same cpu
	 * during the cmpxchg then the free will succedd.
	 */
	preempt_disable();
	c = __this_cpu_ptr(s->cpu_slab);

	tid = c->tid;
	preempt_enable();

	if (likely(page == c->page)) {
		set_freepointer(s, object, c->freelist);

		if (unlikely(!this_cpu_cmpxchg_double(
				s->cpu_slab->freelist, s->cpu_slab->tid,
				c->freelist, tid,
				object, next_tid(tid)))) {

			note_cmpxchg_failure("slab_free", s, tid);
			goto redo;
		}
		stat(s, FREE_FASTPATH);
	} else
		__slab_free(s, page, x, addr);

}

函数最先的是slab_free_hook()对象释放处理钩子调用处理,主要是用于去注册kmemleak中的对象;接着是redo的标签,该标签主要是用于释放过程中出现因抢占而发生CPU迁移的时候,跳转重新处理的点;在redo里面,将先通过preempt_disable()禁止抢占,然后__this_cpu_ptr()获取本地CPUkmem_cache_cpu管理结构以及其中的事务IDtid),然后preempt_enable()恢复抢占;if(likely(page == c->page))如果当前释放的对象与本地CPU的缓存区相匹配,将会set_freepointer()设置该对象尾随的空闲对象指针数据,然后类似分配时,经由this_cpu_cmpxchg_double()原子操作,将对象归还回去;但是如果当前释放的对象与本地CPU的缓存区不匹配,意味着不可以快速释放对象,此时将会通过__slab_free()慢通道将对象释放。

接着分析一下__slab_free()的实现:

【file:/mm/slub.c】
/*
 * Slow patch handling. This may still be called frequently since objects
 * have a longer lifetime than the cpu slabs in most processing loads.
 *
 * So we still attempt to reduce cache line usage. Just take the slab
 * lock and free the item. If there is no additional partial page
 * handling required then we can return immediately.
 */
static void __slab_free(struct kmem_cache *s, struct page *page,
			void *x, unsigned long addr)
{
	void *prior;
	void **object = (void *)x;
	int was_frozen;
	struct page new;
	unsigned long counters;
	struct kmem_cache_node *n = NULL;
	unsigned long uninitialized_var(flags);

	stat(s, FREE_SLOWPATH);

	if (kmem_cache_debug(s) &&
		!(n = free_debug_processing(s, page, x, addr, &flags)))
		return;

	do {
		if (unlikely(n)) {
			spin_unlock_irqrestore(&n->list_lock, flags);
			n = NULL;
		}
		prior = page->freelist;
		counters = page->counters;
		set_freepointer(s, object, prior);
		new.counters = counters;
		was_frozen = new.frozen;
		new.inuse--;
		if ((!new.inuse || !prior) && !was_frozen) {

			if (kmem_cache_has_cpu_partial(s) && !prior) {

				/*
				 * Slab was on no list before and will be
				 * partially empty
				 * We can defer the list move and instead
				 * freeze it.
				 */
				new.frozen = 1;

			} else { /* Needs to be taken off a list */

	                        n = get_node(s, page_to_nid(page));
				/*
				 * Speculatively acquire the list_lock.
				 * If the cmpxchg does not succeed then we may
				 * drop the list_lock without any processing.
				 *
				 * Otherwise the list_lock will synchronize with
				 * other processors updating the list of slabs.
				 */
				spin_lock_irqsave(&n->list_lock, flags);

			}
		}

	} while (!cmpxchg_double_slab(s, page,
		prior, counters,
		object, new.counters,
		"__slab_free"));

	if (likely(!n)) {

		/*
		 * If we just froze the page then put it onto the
		 * per cpu partial list.
		 */
		if (new.frozen && !was_frozen) {
			put_cpu_partial(s, page, 1);
			stat(s, CPU_PARTIAL_FREE);
		}
		/*
		 * The list lock was not taken therefore no list
		 * activity can be necessary.
		 */
                if (was_frozen)
                        stat(s, FREE_FROZEN);
                return;
        }

	if (unlikely(!new.inuse && n->nr_partial > s->min_partial))
		goto slab_empty;

	/*
	 * Objects left in the slab. If it was not on the partial list before
	 * then add it.
	 */
	if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
		if (kmem_cache_debug(s))
			remove_full(s, n, page);
		add_partial(n, page, DEACTIVATE_TO_TAIL);
		stat(s, FREE_ADD_PARTIAL);
	}
	spin_unlock_irqrestore(&n->list_lock, flags);
	return;

slab_empty:
	if (prior) {
		/*
		 * Slab on the partial list.
		 */
		remove_partial(n, page);
		stat(s, FREE_REMOVE_PARTIAL);
	} else {
		/* Slab must be on the full list */
		remove_full(s, n, page);
	}

	spin_unlock_irqrestore(&n->list_lock, flags);
	stat(s, FREE_SLAB);
	discard_slab(s, page);
}

该函数最先的if (kmem_cache_debug(s) && !(n = free_debug_processing(s,
page, x, addr, &flags)))
主要用于kmem_cache_debug()判断是否开启调试,如果开启,将通过free_debug_processing()进行调试检测以及获取经检验过的合法的kmem_cache_node节点缓冲区管理结构;接着进入do-while循环,如果kmem_cache_nodefree_debug_processing()返回出来,则n不为空,那么将会释放其在free_debug_processing()内加的锁进行释放,并将n置空;然后获取缓冲区的信息以及设置对象末尾的空闲对象指针,同时更新缓冲区中对象使用数。

往下if ((!new.inuse || !prior) && !was_frozen)的判断,如果缓冲区中被使用的对象为0或者空闲队列为空,且缓冲区未处于冻结态(即缓冲区未处于每CPU对象缓存中),那么意味着该释放的对象是缓冲区中最后一个被使用的对象,对象释放之后的缓冲区是可以被释放回伙伴管理算法的;接着if (kmem_cache_has_cpu_partial(s) && !prior)的判断,表示每CPU存在partial半满队列同时空闲队列不为空,那么该缓冲区将会设置frozen标识,用于后期将其放置到每CPUpartial队列中,反之,那么意味着该缓冲区将会从链表中移出,接着将会get_node()获取节点缓冲区管理结构,同时spin_lock_irqsave()加锁持有该slab的节点管理结构;最后通过cmpxchg_double_slab()将对象释放,如果执行失败,将返回重试。

接下来if (likely(!n))判断中kmem_cache_node不为空,如果if (new.frozen && !was_frozen)前面未冻结该缓冲区,这将会把该缓冲区put_cpu_partial()挂入到每CPUpartial队列中,同时stat()更新统计信息;如果if (was_frozen)冻结了该缓冲区,则仅需stat()更新统计信息。

if (unlikely(!new.inuse && n->nr_partial >
s->min_partial))
如果缓冲区无对象被使用,且节点的半满slab缓冲区数量超过了最小临界点,则该页面将需要被释放掉,那么将会跳转至slab_empty执行缓冲区释放操作。

此外if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) 该缓冲区因对象的释放,处于半满状态(即仍有对象被占用的情况),则其将从full链表中remove_full()移出,并add_partial()添加至半满partial队列中。

最后spin_unlock_irqrestore()释放中断锁并恢复中断环境。

至于slab_empty标签中的缓冲区释放流程,则是根据其空闲队列是否空,然后选择地去将该页面从对应的full或者partial链表中摘除,然后spin_unlock_irqrestore()释放锁,最终通过discard_slab()将缓冲区释放。

回顾该函数,下面侧重看一下free_debug_processing()的处理及discard_slab()的实现。

【file:/mm/slub.c】
static noinline struct kmem_cache_node *free_debug_processing(
	struct kmem_cache *s, struct page *page, void *object,
	unsigned long addr, unsigned long *flags)
{
	struct kmem_cache_node *n = get_node(s, page_to_nid(page));

	spin_lock_irqsave(&n->list_lock, *flags);
	slab_lock(page);

	if (!check_slab(s, page))
		goto fail;

	if (!check_valid_pointer(s, page, object)) {
		slab_err(s, page, "Invalid object pointer 0x%p", object);
		goto fail;
	}

	if (on_freelist(s, page, object)) {
		object_err(s, page, object, "Object already free");
		goto fail;
	}

	if (!check_object(s, page, object, SLUB_RED_ACTIVE))
		goto out;

	if (unlikely(s != page->slab_cache)) {
		if (!PageSlab(page)) {
			slab_err(s, page, "Attempt to free object(0x%p) "
				"outside of slab", object);
		} else if (!page->slab_cache) {
			printk(KERN_ERR
				"SLUB <none>: no slab for object 0x%p.\n",
						object);
			dump_stack();
		} else
			object_err(s, page, object,
					"page slab pointer corrupt.");
		goto fail;
	}

	if (s->flags & SLAB_STORE_USER)
		set_track(s, object, TRACK_FREE, addr);
	trace(s, page, object, 0);
	init_object(s, object, SLUB_RED_INACTIVE);
out:
	slab_unlock(page);
	/*
	 * Keep node_lock to preserve integrity
	 * until the object is actually freed
	 */
	return n;

fail:
	slab_unlock(page);
	spin_unlock_irqrestore(&n->list_lock, *flags);
	slab_fix(s, "Object at 0x%p not freed", object);
	return NULL;
}

该调测处理函数主要检测有:check_slab()检查slabkmem_cachepage中的slab信息是否匹配,如果不匹配,可能发生了破坏或者数据不符;check_valid_pointer()检查对象地址的合法性,表示地址确切地为某对象的首地址,而非对象的中间位置;on_freelist()检测该对象是否已经被释放,避免造成重复释放;check_object()主要是根据内存标识SLAB_RED_ZONESLAB_POISON的设置,对对象空间进行完整性检测;至于if (unlikely(s != page->slab_cache))判断主要是为了确保用户传入的kmem_cache与页面所属的kmem_cache类型是匹配的,否则将记录错误日志。此外还根据if (s->flags & SLAB_STORE_USER) 如果设置了SLAB_STORE_USER标识,将记录对象释放的track信息。最后将trace()记录对象的轨迹信息,同时还init_object()将重新初始化对象。代码末尾的outfail则是对检测处理的成功及释放的后处理。

至于discard_slab()的实现:

【file:/mm/slub.c】
static void discard_slab(struct kmem_cache *s, struct page *page)
{
	dec_slabs_node(s, page_to_nid(page), page->objects); 
	free_slab(s, page);
}

    如果discard_slab()释放缓冲区,将会先dec_slabs_node()更新统计,然后通过free_slab()进行处理。

【file:/mm/slub.c】
static void free_slab(struct kmem_cache *s, struct page *page)
{
	if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) {
		struct rcu_head *head;

		if (need_reserve_slab_rcu) {
			int order = compound_order(page);
			int offset = (PAGE_SIZE << order) - s->reserved;

			VM_BUG_ON(s->reserved != sizeof(*head));
			head = page_address(page) + offset;
		} else {
			/*
			 * RCU free overloads the RCU head over the LRU
			 */
			head = (void *)&page->lru;
		}

		call_rcu(head, rcu_free_slab);
	} else
		__free_slab(s, page);
}

如果设置了SLAB_DESTROY_BY_RCU标识,将会通过RCU的方式将内存页面释放掉,否则将会通过__free_slab()普通方式释放。

__free_slab()的实现:

【file:/mm/slub.c】
static void __free_slab(struct kmem_cache *s, struct page *page)
{
	int order = compound_order(page);
	int pages = 1 << order;

	if (kmem_cache_debug(s)) {
		void *p;

		slab_pad_check(s, page);
		for_each_object(p, s, page_address(page),
						page->objects)
			check_object(s, page, p, SLUB_RED_INACTIVE);
	}

	kmemcheck_free_shadow(page, compound_order(page));

	mod_zone_page_state(page_zone(page),
		(s->flags & SLAB_RECLAIM_ACCOUNT) ?
		NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
		-pages);

	__ClearPageSlabPfmemalloc(page);
	__ClearPageSlab(page);

	memcg_release_pages(s, order);
	page_mapcount_reset(page);
	if (current->reclaim_state)
		current->reclaim_state->reclaimed_slab += pages;
	__free_memcg_kmem_pages(page, order);
}

    其将通过compound_order()获取页面阶数转而获得释放的页面数;然后kmem_cache_debug()判断该slab是否开启了调测,如果开启,将会对该slab缓冲区进行一次检测,主要是检测是否有内存破坏以记录相关信息;接着kmemcheck_free_shadow()释放影子内存;mod_zone_page_state()修改内存页面的状态,同时__ClearPageSlabPfmemalloc()__ClearPageSlab()清除页面的slab信息;最后memcg_release_pages()释放memcg中的页面处理,接着page_mapcount_reset()重置页面映射计数,最后则是__free_memcg_kmem_pages()将页面释放。

    至于__free_memcg_kmem_pages()的实现则是将memcg去注册页面,然后经由__free_pages()将页面归还到伙伴管理算法中。

【file:/mm/page_alloc.c】
/*
 * __free_memcg_kmem_pages and free_memcg_kmem_pages will free
 * pages allocated with __GFP_KMEMCG.
 *
 * Those pages are accounted to a particular memcg, embedded in the
 * corresponding page_cgroup. To avoid adding a hit in the allocator to search
 * for that information only to find out that it is NULL for users who have no
 * interest in that whatsoever, we provide these functions.
 *
 * The caller knows better which flags it relies on.
 */
void __free_memcg_kmem_pages(struct page *page, unsigned int order)
{
	memcg_kmem_uncharge_pages(page, order);
	__free_pages(page, order);
}

    至此对象释放分析完毕。



发表评论

电子邮件地址不会被公开。 必填项已用*标注