美文网首页Linux知识点汇集
内存管理相关数据结构之内存域zone

内存管理相关数据结构之内存域zone

作者: vincent_0425 | 来源:发表于2019-03-17 22:13 被阅读0次

    承接内存管理相关概念讲解相关数据结构。
    主要有

    • pg_data_t: 表示节点;
    • zone: 内存域;
    • page: 页帧;

    struct zone {
        /* Read-mostly fields */
        unsigned long watermark[NR_WMARK];
        unsigned long nr_reserved_highatomic;
        /*
         * We don't know if the memory that we're going to allocate will be
         * freeable or/and it will be released eventually, so to avoid totally
         * wasting several GB of ram we must reserve some of the lower zone
         * memory (otherwise we risk to run OOM on the lower zones despite
         * there being tons of freeable ram on the higher zones).  This array is
         * recalculated at runtime if the sysctl_lowmem_reserve_ratio sysctl
         * changes.
         */
        long lowmem_reserve[MAX_NR_ZONES];
    #ifdef CONFIG_NUMA
        int node;
    #endif
        /*
         * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
         * this zone's LRU.  Maintained by the pageout code.
         */
        unsigned int inactive_ratio;
        struct pglist_data  *zone_pgdat;
        struct per_cpu_pageset __percpu *pageset;
        /*
         * This is a per-zone reserve of pages that should not be
         * considered dirtyable memory.
         */
        unsigned long       dirty_balance_reserve;
    #ifndef CONFIG_SPARSEMEM
        /*
         * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
         * In SPARSEMEM, this map is stored in struct mem_section
         */
        unsigned long       *pageblock_flags;
    #endif /* CONFIG_SPARSEMEM */
    #ifdef CONFIG_NUMA
        /*
         * zone reclaim becomes active if more unmapped pages exist.
         */
        unsigned long       min_unmapped_pages;
        unsigned long       min_slab_pages;
    #endif /* CONFIG_NUMA */
        /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
        unsigned long       zone_start_pfn;
        /*
         * spanned_pages is the total pages spanned by the zone, including
         * holes, which is calculated as:
         *  spanned_pages = zone_end_pfn - zone_start_pfn;
         * present_pages is physical pages existing within the zone, which
         * is calculated as:
         *  present_pages = spanned_pages - absent_pages(pages in holes);
         *
         * managed_pages is present pages managed by the buddy system, which
         * is calculated as (reserved_pages includes pages allocated by the
         * bootmem allocator):
         *  managed_pages = present_pages - reserved_pages;
         *
         * So present_pages may be used by memory hotplug or memory power
         * management logic to figure out unmanaged pages by checking
         * (present_pages - managed_pages). And managed_pages should be used
         * by page allocator and vm scanner to calculate all kinds of watermarks
         * and thresholds.
         *
         * Locking rules:
         *
         * zone_start_pfn and spanned_pages are protected by span_seqlock.
         * It is a seqlock because it has to be read outside of zone->lock,
         * and it is done in the main allocator path.  But, it is written
         * quite infrequently.
         *
         * The span_seq lock is declared along with zone->lock because it is
         * frequently read in proximity to zone->lock.  It's good to
         * give them a chance of being in the same cacheline.
         *
         * Write access to present_pages at runtime should be protected by
         * mem_hotplug_begin/end(). Any reader who can't tolerant drift of
         * present_pages should get_online_mems() to get a stable value.
         *
         * Read access to managed_pages should be safe because it's unsigned
         * long. Write access to zone->managed_pages and totalram_pages are
         * protected by managed_page_count_lock at runtime. Idealy only
         * adjust_managed_page_count() should be used instead of directly
         * touching zone->managed_pages and totalram_pages.
         */
        unsigned long       managed_pages;
        unsigned long       spanned_pages;
        unsigned long       present_pages;
        const char      *name;
    #ifdef CONFIG_MEMORY_ISOLATION
        /*
         * Number of isolated pageblock. It is used to solve incorrect
         * freepage counting problem due to racy retrieving migratetype
         * of pageblock. Protected by zone->lock.
         */
        unsigned long       nr_isolate_pageblock;
    #endif
    #ifdef CONFIG_MEMORY_HOTPLUG
        /* see spanned/present_pages for more description */
        seqlock_t       span_seqlock;
    #endif
        /*
         * wait_table       -- the array holding the hash table
         * wait_table_hash_nr_entries   -- the size of the hash table array
         * wait_table_bits  -- wait_table_size == (1 << wait_table_bits)
         *
         * The purpose of all these is to keep track of the people
         * waiting for a page to become available and make them
         * runnable again when possible. The trouble is that this
         * consumes a lot of space, especially when so few things
         * wait on pages at a given time. So instead of using
         * per-page waitqueues, we use a waitqueue hash table.
         *
         * The bucket discipline is to sleep on the same queue when
         * colliding and wake all in that wait queue when removing.
         * When something wakes, it must check to be sure its page is
         * truly available, a la thundering herd. The cost of a
         * collision is great, but given the expected load of the
         * table, they should be so rare as to be outweighed by the
         * benefits from the saved space.
         *
         * __wait_on_page_locked() and unlock_page() in mm/filemap.c, are the
         * primary users of these fields, and in mm/page_alloc.c
         * free_area_init_core() performs the initialization of them.
         */
        wait_queue_head_t   *wait_table;
        unsigned long       wait_table_hash_nr_entries;
        unsigned long       wait_table_bits;
        ZONE_PADDING(_pad1_)
        /* free areas of different sizes */
        struct free_area    free_area[MAX_ORDER];
        /* zone flags, see below */
        unsigned long       flags;
        /* Write-intensive fields used from the page allocator */
        spinlock_t      lock;
        ZONE_PADDING(_pad2_)
        /* Write-intensive fields used by page reclaim */
        /* Fields commonly accessed by the page reclaim scanner */
        spinlock_t      lru_lock;
        struct lruvec       lruvec;
        /* Evictions & activations on the inactive file list */
        atomic_long_t       inactive_age;
        /*
         * When free pages are below this point, additional steps are taken
         * when reading the number of free pages to avoid per-cpu counter
         * drift allowing watermarks to be breached
         */
        unsigned long percpu_drift_mark;
    #if defined CONFIG_COMPACTION || defined CONFIG_CMA
        /* pfn where compaction free scanner should start */
        unsigned long       compact_cached_free_pfn;
        /* pfn where async and sync compaction migration scanner should start */
        unsigned long       compact_cached_migrate_pfn[2];
    #endif
    #ifdef CONFIG_COMPACTION
        /*
         * On compaction failure, 1<<compact_defer_shift compactions
         * are skipped before trying again. The number attempted since
         * last failure is tracked with compact_considered.
         */
        unsigned int        compact_considered;
        unsigned int        compact_defer_shift;
        int         compact_order_failed;
    #endif
    #if defined CONFIG_COMPACTION || defined CONFIG_CMA
        /* Set to true when the PG_migrate_skip bits should be cleared */
        bool            compact_blockskip_flush;
    #endif
        ZONE_PADDING(_pad3_)
        /* Zone statistics */
        atomic_long_t       vm_stat[NR_VM_ZONE_STAT_ITEMS];
    } ____cacheline_internodealigned_in_smp;
    

    该结构由ZONE_PADDING分割成了四部分,这是由于在多cpu系统上,通常会有多个cpu同时访问结构成员,使用锁分段从而提高性能(span_seqlock, lock, lru_lock)。
    ZONE_PADDING用于填充缓冲行,保证各段在不同的高速缓冲行中互不干扰,确保了每个自旋锁都处于自身的缓冲行中。

    • watermark是换出时使用的水印值,影响交换守护进程的行为,分为三部分,定义如下:
    enum zone_watermarks {
        WMARK_MIN,
        WMARK_LOW,
        WMARK_HIGH,
        NR_WMARK
    };
    // 若空闲内存页数目小于该值,页回收工作的压力就比较大。
    #define min_wmark_pages(z) (z->watermark[WMARK_MIN])
    // 若内存空闲页数目低于该值,内存开始将页换出到磁盘
    #define low_wmark_pages(z) (z->watermark[WMARK_LOW])
    // 若空闲页数目多于high_wmark_pages,则内存域的状态是理想的。
    #define high_wmark_pages(z) (z->watermark[WMARK_HIGH])
    
    • lowmem_reserve分别为各种内存域指定了若干页,用于一些无论如何都不能失败的关键性内存分配;

    • pageset用于实现每个cpu的冷热页帧列表。(ps: 页帧在高速缓存中叫热的,否则叫冷的);

    • free_area用于实现伙伴系统,每个数组元素都表示某种固定长度的一些连续内存区。

    相关文章

      网友评论

        本文标题:内存管理相关数据结构之内存域zone

        本文链接:https://www.haomeiwen.com/subject/ljbpmqtx.html