文章接着上回Linux migrate_type初步探索
1、物理页面添加到buddy系统
我们都知道物理内存一开始是由memblock进行分配管理,后面会切换到buddy系统管理。那么接下来我们看一下,memblock管理的物理页面是怎么添加到buddy系统中的。
start_kernel()
-> mm_init()
--> mem_init()
---> memblock_free_all()
----> free_low_memory_core_early()
1.1 free_low_memory_core_early()
static unsigned long __init free_low_memory_core_early(void)
{
unsigned long count = 0;
phys_addr_t start, end;
u64 i;
memblock_clear_hotplug(0, -1);
// 处理预留内存
for_each_reserved_mem_range(i, &start, &end)
reserve_bootmem_region(start, end);
/*
* We need to use NUMA_NO_NODE instead of NODE_DATA(0)->node_id
* because in some case like Node0 doesn't have RAM installed
* low ram will be on Node1
*/
// 遍历可释放物理内存区域,进行释放
for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end,
NULL)
count += __free_memory_core(start, end);
return count;
}
static unsigned long __init __free_memory_core(phys_addr_t start,
phys_addr_t end)
{
unsigned long start_pfn = PFN_UP(start);
unsigned long end_pfn = min_t(unsigned long,
PFN_DOWN(end), max_low_pfn);
if (start_pfn >= end_pfn)
return 0;
// 进行页面释放处理
__free_pages_memory(start_pfn, end_pfn);
return end_pfn - start_pfn;
}
1.2 __free_pages_memory()
static void __init __free_pages_memory(unsigned long start, unsigned long end)
{
int order;
while (start < end) {
/**
* 由于buddy系统最大能存放的页面order是MAX_ORDER - 1UL,所以这里要进行限制
* __ffs()函数是用来根据start值计算出最合适的order值
* __ffs()函数作用是求第start第一个位为1的位置,例如:start = 0x63300,
* 说明该地址以0x100对齐,那么__ffs()返回值为8
*/
order = min(MAX_ORDER - 1UL, __ffs(start));
// 如果发现order太大,实际没有那么多物理内存,则不断减小order,直至能包含为止
while (start + (1UL << order) > end)
order--;
// 将页面释放到buddy系统
memblock_free_pages(pfn_to_page(start), start, order);
start += (1UL << order);
}
}
1.3 memblock_free_pages()
void __init memblock_free_pages(struct page *page, unsigned long pfn,
unsigned int order)
{
if (early_page_uninitialised(pfn))
return;
// 调用内部接口释放页面
__free_pages_core(page, order);
}
void __free_pages_core(struct page *page, unsigned int order)
{
unsigned int nr_pages = 1 << order;
struct page *p = page;
unsigned int loop;
/*
* When initializing the memmap, __init_single_page() sets the refcount
* of all pages to 1 ("allocated"/"not free"). We have to set the
* refcount of all involved pages to 0.
*/
prefetchw(p);
// 遍历当前order页面内所有page,并初始化
for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
prefetchw(p + 1);
// 清楚页面预留标记
__ClearPageReserved(p);
// 设置页面引用计数为0
set_page_count(p, 0);
}
__ClearPageReserved(p);
set_page_count(p, 0);
atomic_long_add(nr_pages, &page_zone(page)->managed_pages);
/*
* Bypass PCP and place fresh pages right to the tail, primarily
* relevant for memory onlining.
*/
// 将页面释放到buddy系统中
__free_pages_ok(page, order, FPI_TO_TAIL);
}
static void __free_pages_ok(struct page *page, unsigned int order,
fpi_t fpi_flags)
{
unsigned long flags;
int migratetype;
unsigned long pfn = page_to_pfn(page);
if (!free_pages_prepare(page, order, true))
return;
// 获取页面所在页块的迁移类型
migratetype = get_pfnblock_migratetype(page, pfn);
local_irq_save(flags);
__count_vm_events(PGFREE, 1 << order);
// 将页面放置在对应迁移类型对应order的管理链表上
free_one_page(page_zone(page), page, pfn, order, migratetype,
fpi_flags);
local_irq_restore(flags);
}
这里就是物理内存从memblock转移到buddy系统的流程。
2、迁移类型fallback处理逻辑
接下来我们再来看看一个新问题:一开始页块的迁移类型都是MIGRATE_MOVABLE
,那对于MIGRATE_UNMOVABLE
迁移类型的内存分配应该怎么处理呢?
2.1 原理图
我们都知道Linux内核内存分配接口alloc_pages()
,那么我们就跟踪这个接口,看看是如何分配出MIGRATE_UNMOVABLE
迁移类型的内存。
alloc_pages()
-> alloc_pages_node()
--> __alloc_pages_node()
---> __alloc_pages()
----> __alloc_pages_nodemask()
我们接下来仔细研究一下__alloc_pages_nodemask()
实现:
2.2 __alloc_pages_nodemask()
/*
* This is the 'heart' of the zoned buddy allocator.
*/
struct page *