内存管理 · 2017-05-15 0

【Linux内存管理】vmalloc不连续内存管理(3)

前面分析了不连续页面管理的初始化以及申请的实现,最后以释放不连续页面空间vfree()收尾。

【file:/mm/vmalloc.c】
/**
 * vfree - release memory allocated by vmalloc()
 * @addr: memory base address
 *
 * Free the virtually continuous memory area starting at @addr, as
 * obtained from vmalloc(), vmalloc_32() or __vmalloc(). If @addr is
 * NULL, no operation is performed.
 *
 * Must not be called in NMI context (strictly speaking, only if we don't
 * have CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG, but making the calling
 * conventions for vfree() arch-depenedent would be a really bad idea)
 *
 * NOTE: assumes that the object at *addr has a size >= sizeof(llist_node)
 */
void vfree(const void *addr)
{
    BUG_ON(in_nmi());
 
    kmemleak_free(addr);
 
    if (!addr)
        return;
    if (unlikely(in_interrupt())) {
        struct vfree_deferred *p = &__get_cpu_var(vfree_deferred);
        if (llist_add((struct llist_node *)addr, &p->list))
            schedule_work(&p->wq);
    } else
        __vunmap(addr, 1);
}

 

    该函数首先对kmemleak内存泄漏跟踪进行解除,继而保证释放的地址空间不为空。如若当前释放操作在中断中,那么将释放的内存空间信息加入到当前CPU的vfree_deferred管理链表中,继而通过schedule_work()唤醒工作队列,对内存进行异步释放操作;但如果当前操作不在中断中,将直接通过__vunmap()进行内存释放。

对于工作队列,是在vmalloc_init()中创建的free_work(),其具体实现。

【file:/mm/vmalloc.c】
static void free_work(struct work_struct *w)
{
    struct vfree_deferred *p = container_of(w, struct vfree_deferred, wq);
    struct llist_node *llnode = llist_del_all(&p->list);
    while (llnode) {
        void *p = llnode;
        llnode = llist_next(llnode);
        __vunmap(p, 1);
    }
}

 

     可以看到该函数实际上也是调用__vunmap()进行不连续内存页面进行释放的。

具体分析一下__vunmap()的实现,其实现也是非常简单的,实则是vmalloc()的倒序操作。

【file:/mm/vmalloc.c】
static void __vunmap(const void *addr, int deallocate_pages)
{
    struct vm_struct *area;
 
    if (!addr)
        return;
 
    if (WARN(!PAGE_ALIGNED(addr), "Trying to vfree() bad address (%p)\n",
            addr))
        return;
 
    area = remove_vm_area(addr);
    if (unlikely(!area)) {
        WARN(1, KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
                addr);
        return;
    }
 
    debug_check_no_locks_freed(addr, area->size);
    debug_check_no_obj_freed(addr, area->size);
 
    if (deallocate_pages) {
        int i;
 
        for (i = 0; i < area->nr_pages; i++) {
            struct page *page = area->pages[i];
 
            BUG_ON(!page);
            __free_page(page);
        }
 
        if (area->flags & VM_VPAGES)
            vfree(area->pages);
        else
            kfree(area->pages);
    }
 
    kfree(area);
    return;
}

 

    可以看到该函数先是通过remove_vm_area()将虚拟地址空间从管理结构中移除,继而是相关调测。最后判断入参deallocate_pages,如果该值为0,则表示它是vumap()调用释放,否则是vfree()释放内存,其需要将申请的不连续物理内存页面释放掉。释放不连续物理内存页面的操作也很简单,通过循环遍历物理页面数组,将各页面逐一返回。页面释放完之后,最后将数组空间和vm_struct信息接口空间释放。

深入看一下remove_vm_area()的实现。

【file:/mm/vmalloc.c】
/**
 * remove_vm_area - find and remove a continuous kernel virtual area
 * @addr: base address
 *
 * Search for the kernel VM area starting at @addr, and remove it.
 * This function returns the found VM area, but using it is NOT safe
 * on SMP machines, except for its size or flags.
 */
struct vm_struct *remove_vm_area(const void *addr)
{
    struct vmap_area *va;
 
    va = find_vmap_area((unsigned long)addr);
    if (va && va->flags & VM_VM_AREA) {
        struct vm_struct *vm = va->vm;
 
        spin_lock(&vmap_area_lock);
        va->vm = NULL;
        va->flags &= ~VM_VM_AREA;
        spin_unlock(&vmap_area_lock);
 
        vmap_debug_free_range(va->va_start, va->va_end);
        free_unmap_vmap_area(va);
        vm->size -= PAGE_SIZE;
 
        return vm;
    }
    return NULL;
}

 

该函数先是通过find_vmap_area()查找前面注册到红黑树的不连续内存的vmap_area管理信息,继而清除VM_VM_AREA标记,最后调用free_unmap_vmap_area()释放相关的内存页面。

具体看一下find_vmap_area()实现。

【file:/mm/vmalloc.c】
static struct vmap_area *find_vmap_area(unsigned long addr)
{
    struct vmap_area *va;
 
    spin_lock(&vmap_area_lock);
    va = __find_vmap_area(addr);
    spin_unlock(&vmap_area_lock);
 
    return va;
}

 

该函数主要是加保护锁后调用__find_vmap_area()进行查找。

【file:/mm/vmalloc.c】
static struct vmap_area *__find_vmap_area(unsigned long addr)
{
    struct rb_node *n = vmap_area_root.rb_node;
 
    while (n) {
        struct vmap_area *va;
 
        va = rb_entry(n, struct vmap_area, rb_node);
        if (addr < va->va_start)
            n = n->rb_left;
        else if (addr >= va->va_end)
            n = n->rb_right;
        else
            return va;
    }
 
    return NULL;
}

 

可以看到很熟悉的代码,类似前面分配虚拟内存空间一样,遍历vmap_area_root红黑树进行查找。

    现在回到free_unmap_vmap_area()进行收尾。

【file:/mm/vmalloc.c】
/*
 * Free and unmap a vmap area
 */
static void free_unmap_vmap_area(struct vmap_area *va)
{
    flush_cache_vunmap(va->va_start, va->va_end);
    free_unmap_vmap_area_noflush(va);
}

 

该函数主要用于清除告诉缓冲的指定的虚拟地址空间,当然这取决于硬件体系结构。其中flush_cache_vunmap()在x86环境中是空函数。而free_unmap_vmap_area_noflush()函数。

【file:/mm/vmalloc.c】
/*
 * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been
 * called for the correct range previously.
 */
static void free_unmap_vmap_area_noflush(struct vmap_area *va)
{
    unmap_vmap_area(va);
    free_vmap_area_noflush(va);
}

 

unmap_vmap_area()主要是通过vunmap_page_range()指定范围,最终通过vunmap_page_range()将该虚拟地址空间释放到内核页表。其中unmap_vmap_area()实现。

【file:/mm/vmalloc.c】
/*
 * Clear the pagetable entries of a given vmap_area
 */
static void unmap_vmap_area(struct vmap_area *va)
{
    vunmap_page_range(va->va_start, va->va_end);
}

 

    而vunmap_page_range()实现了内存页面的映射。

【file:/mm/vmalloc.c】
static void vunmap_page_range(unsigned long addr, unsigned long end)
{
    pgd_t *pgd;
    unsigned long next;
 
    BUG_ON(addr >= end);
    pgd = pgd_offset_k(addr);
    do {
        next = pgd_addr_end(addr, end);
        if (pgd_none_or_clear_bad(pgd))
            continue;
        vunmap_pud_range(pgd, addr, next);
    } while (pgd++, addr = next, addr != end);
}

 

至于free_vmap_area_noflush()的实现。

【file:/mm/vmalloc.c】
/*
 * Free a vmap area, caller ensuring that the area has been unmapped
 * and flush_cache_vunmap had been called for the correct range
 * previously.
 */
static void free_vmap_area_noflush(struct vmap_area *va)
{
    va->flags |= VM_LAZY_FREE;
    atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr);
    if (unlikely(atomic_read(&vmap_lazy_nr) > lazy_max_pages()))
        try_purge_vmap_area_lazy();
}

 

则是将vmp_area加入到vmap_purge_list链表,当加入该链表的数量超过一定值之后,才会通过try_purge_vmap_area_lazy()将vmap_area真正释放掉。这是一个延迟的释放过程,最终会把vmap_area及相关的页面全部释放掉。