Hallo, dies ist ein Test.
PWD: /www/data-lst1/unixsoft/unixsoft/kaempfer/.public_html
Running in File Mode
Relative path: ./../.././../../../../usr/include/vm/mo_impl.h
Real path: /usr/include/vm/mo_impl.h
Zurück
/* * Copyright (c) 2013, 2021, Oracle and/or its affiliates. */ #ifndef _VM_MO_IMPL_H #define _VM_MO_IMPL_H #include <sys/avl.h> #include <sys/condvar.h> #include <sys/list.h> #include <sys/lgrp.h> #include <sys/mutex.h> #include <sys/rwlock.h> #include <sys/sysmacros.h> #include <vm/types.h> #include <vm/interface.h> #include <vm/freelist_impl.h> #include <vm/util_impl.h> #include <vm/mw.h> #include <vm/mo.h> #ifdef __cplusplus extern "C" { #endif /* * If psf_pfn is PSF_PFN_INVALID, or PSF_EXCL is set: * * All state transitions only require mon_lock * * Once psf_pfn is valid, and PSF_EXCL is clear: * * Setting/clearing PSF_CAPTURING only requires the mon_lock * Setting PSF_EXCL (invalidation) requires setting PSF_CAPTURING, * calling all mw callbacks, waiting for any MWCB_LATERs to drain, * and then holding mon_lock. * * If PSF_CHILDNODE is set, the entire interpretation of the field is * different. The szc is the stride szc of the child node, and everything * above the PSF_CHILDNODE bit is actually the pointer to the child monode. */ typedef enum psf_flags { PSF_CHILDNODE = (1 << 0), /* this slot is in another MON */ PSF_EXCL = (1 << 1), /* EXCL setter has exclusive PFN */ PSF_CAPTURING = (1 << 2), /* capture in progress */ PSF_REF = (1 << 3), /* referenced */ PSF_MOD = (1 << 4), /* modified */ PSF_ZERO = (1 << 5), /* present, zero */ PSF_RESIDENT = (1 << 6), /* locked in memory */ PSF_VALID = (1 << 7), /* psf_pfn represents memory */ PSF_TEMP_INVALID = (1 << 8), /* PSF_VALID clear, will be re-asserted before PSF_EXCL cleared */ PSF_RANGELOCK = (1 << 9), /* setter owns range */ PSF_RANGELOCK_EXCL = (1 << 10) /* PSF_RANGELOCK owner also owns PSF_EXCL */ } psf_flags_t; /* * PFN, SZC, and Flags entry (PSF). The bulk of the contents of a * Memory Object Node is in mon_psf[], an array of PSF structures. */ typedef union psf { struct psf_s { #ifdef __x86 /* need to ensure szc and flags are the low bits */ uint64_t psfs_szc : 5; uint64_t psfs_flags : 19; uint64_t psfs_pfn : 40; #else uint64_t psfs_pfn : 40; uint64_t psfs_flags : 19; uint64_t psfs_szc : 5; #endif } psf_s; uint64_t psf_bits; } psf_t; #define psf_pfn psf_s.psfs_pfn #define psf_flags psf_s.psfs_flags #define psf_szc psf_s.psfs_szc /* We encode the MONODE pointer for PSF_CHILDNODE slots in the PSF entry */ #define MONODE_CHILD_ALIGN ((size_t)PSF_CHILDNODE << (5 + 1)) #define PSF_CHILDNODE_MONODE(psfp) \ ((monode_t *)(P2ALIGN((psfp)->psf_bits, MONODE_CHILD_ALIGN))) #define PSF_PFN_INVALID 0xfffffffffful typedef void psfw_cb_f(psfw_t *, void *); /* * PSF Wait structure, used to wait for a psf_flags change in a particular * offset range of an monode. */ struct psfw { psfw_t *psfw_next; /* waiters list linkage */ psfw_t *psfw_prev; /* waiters list linkage */ u_offset_t psfw_off; /* offset of interest */ struct monode *psfw_mon; /* monode installed in */ psfw_cb_f *psfw_cb; void *psfw_cbarg; psf_flags_t psfw_flags; /* flags we are interested change */ uint8_t psfw_fired; }; typedef enum mon_flags { MON_BUILTIN = (1 << 0), /* mon is part of mo_t */ MON_TEARDOWN = (1 << 1), /* MON is no longer visible */ MON_PREALLOC = (1 << 2) /* This node has prealloc'ed children */ } mon_flags_t; /* * Must match up with the beginning of "struct monode"; used for avl_find()ing * a particular node in one of the monode AVL trees. */ typedef struct monode_header { u_offset_t mon_base; u_offset_t mon_bound; uint8_t mon_level; } monode_header_t; #define MON_MAX_LEVELS 8 /* mon_level < this */ /* * A Memory Object Node, which manages a subset of a Memory Object's offset * space. */ #define MON_DEFAULT 8 typedef struct monode { u_offset_t mon_base; u_offset_t mon_bound; uint8_t mon_level; kcondvar_t mon_cv; mon_flags_t mon_flags; /* MON_* flags */ mo_t *mon_mo; union { avl_node_t monl_alink; /* AVL linkage */ list_node_t monl_llink; /* list linkage */ } mon_link; kmutex_t mon_lock; /* protects following fields */ uint32_t mon_nslots; /* mon_psf[] count */ uint32_t mon_nfull; uint32_t mon_ncap; /* # PSF_CAPTURING bits set */ uint32_t mon_nchildnode; /* # PSF_CHILDNODE bits set */ uint8_t mon_strideszc; uint8_t mon_maxszc; psfw_t *mon_waiters; /* state change watchers */ psf_t mon_psf[MON_DEFAULT]; } monode_t; #define mon_alink mon_link.monl_alink #define mon_llink mon_link.monl_llink #define MON_SIZE(npsf) offsetof(monode_t, mon_psf[(npsf)]) /* allocated MO nodes must be aligned to FALSE_SHARING_ALIGN */ #define MON_ALLOC_SIZE(npsf) P2ROUNDUP(MON_SIZE(npsf), FALSE_SHARING_ALIGN) /* Figure out the slot number for <off>, given <base> offset and stride SZC */ #define MON_SLOT_OFF_BS(off, base, strideszc) \ (((off) - (base)) >> SZC_SHIFT((strideszc))) /* Figure out the slot number for <off> for MON <mon> */ #define MON_SLOT_OFF(mon, off) \ MON_SLOT_OFF_BS(off, (mon)->mon_base, (mon)->mon_strideszc) /* Compute the PSF pointer in mon for offset off, given base & strideszc */ #define MON_PSFP_OFF_BS(mon, off, base, strideszc) \ (&(mon)->mon_psf[MON_SLOT_OFF_BS((off), (base), (strideszc))]) /* compute the PSF slot pointer for a given off in an monode. */ #define MON_PSFP_OFF(mon, off) \ (&(mon)->mon_psf[MON_SLOT_OFF((mon), (off))]) /* Compute the slot offset to get the base PSF for a given offset */ #define PSFP_BASE_ADJUST(off, szc, strideszc) \ (P2PHASE(off, SZC_BYTES(szc)) >> SZC_SHIFT(strideszc)) /* this structure must match the beginning of monode_prealloc_t */ typedef struct monode_prealloc_lookup { monode_t *monp_mon; uint8_t monp_level; } monode_prealloc_lookup_t; /* * Data structure tracking the pre-allocated monodes which will be children * of a level-0 monode. */ typedef struct monode_prealloc { monode_t *monp_mon; uint8_t monp_level; /* 1..n */ uint8_t monp_top_szc; /* SZC of slot in parent */ uint8_t monp_bottom_szc; /* SZC of slots in child */ uint8_t monp_slotshift; /* lg_2 [# slots] */ uint32_t monp_nslots; /* # slots in monodes */ uint64_t monp_max; /* maximum # monodes */ uint64_t monp_total; /* total # monodes */ uint64_t monp_used; /* # monodes currently in use */ uint64_t monp_dirty; /* # free nodes prev. used */ list_t monp_list; union { avl_node_t monpl_alink; list_node_t monpl_llink; } monp_link; } monode_prealloc_t; #define monp_alink monp_link.monpl_alink #define monp_llink monp_link.monpl_llink /* * The various types of MO policy. Must be kept in sync with mo_policy_data_t. */ typedef enum { MOPT_POLICY, /* lgrp policy */ MOPT_LGRPSET, /* for LGRP_MEM_POLICY_NEXT_SEG */ MOPT_MEMTYPE, /* memory type */ MOPT_SZC, /* preferred szc */ MOPT_COUNT /* MUST BE LAST */ } mo_policy_type_t; /* * The mo_policy_data_t members must be kept in sync with mo_policy_type_t. */ typedef union { lgrp_mem_policy_t mopd_policy; /* lgrp policy */ klgrpset_t mopd_lgrpset; /* for NEXT_SEG */ kmemtype_t mopd_memtype; /* memory type */ szc_t mopd_szc; /* preferred szc */ } mo_policy_data_t; /* * An entry in the mox_policy_* trees, containing the NUMA placement, * memory type, or size code policy for the offset range [base, bound). */ typedef struct mo_policy { u_offset_t mop_base; u_offset_t mop_bound; mo_policy_data_t mop_d; avl_node_t mop_link; } mo_policy_t; /* * moresident_t tracks a portion of the range mapped by a particular * MW_RESIDENT mapping window. * * For now, we don't support overlapping resident windows; when we do, * there will be a "mor_next" pointer here. */ typedef struct moresident { u_offset_t mor_base; u_offset_t mor_bound; volatile uint64_t mor_allocated; /* outstanding allocations */ const mw_resident_ops_t *mor_ops; void *mor_cbarg; mw_t *mor_mw; avl_node_t mor_link; } moresident_t; /* * Memory Object eXtension structure: holds data which isn't needed for small, * simple memory objects. */ typedef struct mox { mo_t *mox_mo; /* protected by mo_lock */ avl_tree_t mox_nodes; /* level-zero nodes */ avl_tree_t mox_subnodes; /* all children, grouped by level */ avl_tree_t mox_prealloc; /* monode_prealloc_t's, range & level */ /* resident tree is protected by mo_residentlock */ avl_tree_t *mox_resident; /* moresident_t's */ /* policy trees are protected by mox_policylock */ krwlock_t mox_policylock; avl_tree_t *mox_policy_trees[MOPT_COUNT]; klgrpset_t mox_lgrpset; /* for LGRP_MEM_POLICY_NEXT_SEG */ } mox_t; /* * Tracks the state for single active MO capture walking the MO's mo_mws list. * Including its initial and current location in the two-level mo_mws list. * * if mwi_pmw is non-NULL, it is mwi_mw's parent. * * If mwi_mw_seen is set, the capture may be referencing mwi_mw, and it must * go on a zombie list instead of being freed if it is destroyed. */ typedef struct mw_iter { mw_t *mwi_fmw; /* first MW seen */ mw_t *mwi_mw; /* current/next targeted MW */ mw_t *mwi_pmw; /* if set, pmw is mw's parent */ uint8_t mwi_mw_seen; /* is mwi_mw the current targeted MW? */ struct mw_iter *mwi_next; /* Linkage on mo_mw_iters list */ struct mw_iter *mwi_prev; /* Linkage on mo_mw_iters list */ } mw_iter_t; /* * Memory Object Anchors track active memory objects so that they can be found * by debuggers and pageout-like activity. */ struct mo_anchor { kmutex_t moa_lock; list_t moa_list; size_t moa_count; enum mo_anchor_type moa_anchortype; /* which Mo_anchor[] index? */ memory_usage_type_t moa_usagetype; /* what type of memory? */ uint8_t moa_pad[P2NPHASE( sizeof (kmutex_t) + sizeof (list_t) + sizeof (size_t) + sizeof (enum mo_anchor_type) + sizeof (memory_usage_type_t), _FALSE_SHARING_ALIGN)]; }; typedef enum mo_flags { /* never changed after init */ MO_FULLMO = (1 << 0), /* this MO is "real", with off->pfn */ MO_NORESIZE = (1 << 1), /* PSF slots are never resized */ MO_NOPREALLOC = (1 << 2), /* do not prealloc all necessary MONs */ /* normal state */ MO_SINGLE = (1 << 8), /* only has built-in MON */ MO_CAPTURE = (1 << 9), /* capture head installed */ MO_DRAINING = (1 << 10) /* in process of being destroyed */ } mo_flags_t; /* * The main Memory Object structure; always aligned to an MO_ALIGN boundary. * * mo_cnmws is a cumulative count of MWs ever added to the mo_mws list, clamped * at UINT32_MAX. */ struct mo { list_node_t mo_listnode; /* global mo linkage */ mo_anchor_type_t mo_anchortype; /* which list we are on */ /* constant or filled-once */ mox_t *mo_mox; /* mo-extension */ flr_t *mo_flr; /* in mox? */ hrtime_t mo_ctime; const mo_ops_t *mo_ops; /* callbacks */ void *mo_ops_arg; /* callback argument */ /* read-mostly */ uint64_t mo_geom_gen; /* generation # for geometry */ /* lock-protected */ kmutex_t mo_lock; mo_flags_t mo_flags; /* MO_* flags */ uint8_t mo_prefszc; /* or SZC_MAX if multiple answers */ uint8_t mo_policy; /* or MO_POLICY_INVALID if mult. ans */ uint8_t mo_memtype; /* requested memory type */ mw_t *mo_mws; /* mapping windows */ uint32_t mo_nmws; /* count of mapping windows */ uint32_t mo_cnmws; /* # MWs ever added to mo_mws */ mw_iter_t *mo_iters; /* active mo_mws iterations */ mw_t *mo_zombiemw; /* zombie mapping windows */ krwlock_t mo_resident; monode_t mo_node; /* built-in node (variable size) */ }; #define MO_SIZE(npsf) \ P2ROUNDUP((offsetof(mo_t, mo_node) + MON_SIZE(npsf)), MO_ALIGN) /* * Anchored if mo_anchortype is non-zero and less then MOA_NTYPES. */ #define MO_ANCHORED(mo) ((uint8_t)((mo)->mo_anchortype - 1) < (MOA_NTYPES - 1)) /* * MO Capture state transition diagram (starts in CREATED, if this changes * update the copy in the vm_mo.c Big Theory Statement) * * ,-- CANCELED <--------. * | ^ | * | | | * | | | * | WAIT_{RM,PSF} WAIT_{MW,LATER} * | ^ ^ | ^ * | | | | | * | v v v | * | CREATED -----> VACATING -----> ACTIVE -----> DONE ----> DEAD * | | | ^ * | | | | * `-----`---------------`---------> FAILED --------------------' */ typedef enum mo_capstate { MOCS_INVALID = 0, /* known-invalid state */ MOCS_CREATED, /* created, or conflict possibly resolved */ MOCS_WAIT_RM, /* conflict with rm_capturing bit */ MOCS_WAIT_PSF, /* conflict with PSF_CAPTURING or _EXCL bit */ MOCS_VACATING, /* PSF_CAPTURING set, vacating MWs */ MOCS_WAIT_MW, /* waiting for mw_meta lock */ MOCS_WAIT_LATER, /* MWs called back, MWCB_LATERs pending */ MOCS_ACTIVE, /* MWs vacated */ MOCS_DONE, /* Notifying MWs of new state */ MOCS_CANCELED, /* operation canceled */ MOCS_FAILED, /* Capture failed, clearing PSF_CAPTURING */ MOCS_DEAD, /* ready to be reaped */ MOCS_NSTATES /* must be last; count of queues */ } mo_capstate_t; /* * Vacate states for mo_capture structures. A capture moves through these * states in order. */ typedef enum moc_vacate_state { MOCV_IDLE = 0, /* not yet entered vacate */ MOCV_MW_CANFAIL, /* calling callbacks which can fail/later */ MOCV_LWAIT, /* waiting for later count to hit zero */ MOCV_MW_NOFAIL, /* calling callbacks which cannot fail */ MOCV_DONE, /* vacate complete */ MOCV_NSTATES /* last: count of states */ } moc_vacate_state_t; typedef struct mo_capbucket mo_capbucket_t; typedef struct mo_caphead mo_caphead_t; /* * Small indirection structure which can out-live the associated mo_capture_t. * * Freed when both mocl_cap == NULL and mocl_lcount == 0, by whoever * transitioned it into that state. */ typedef struct mo_caplater { mo_capbucket_t *mocl_bucket; /* lock protects state */ mo_capture_t *mocl_cap; /* NULLed if capture is canceled */ uint64_t mocl_lcount; /* # mw_later_ts which point at me */ } mo_caplater_t; #define MW_LATER_COUNT 12 /* enough make mw_later_t fill a cache line */ /* * Tracks MWCB_LATER returns from a Mapping Window callback. Links (in a * many-to-many fashion) Mapping Windows with mo_caplater structures. */ typedef struct mw_later { struct mw_later *mwl_next; /* hash linkage */ mw_t *mwl_mw; /* the tag */ struct mw_later *mwl_overflow; /* if more are needed */ uint_t mwl_count; /* # slots used */ mo_caplater_t *mwl_cap[MW_LATER_COUNT]; } mw_later_t; /* * Mw_laterhash hash bucket; tracks active mw_later_ts hashing to that bucket. */ typedef struct mw_laterbucket { kmutex_t mwlb_lock; mw_later_t *mwlb_head; char mwlb_pad[P2NPHASE( sizeof (kmutex_t) + sizeof (mw_later_t *), FALSE_SHARING_ALIGN)]; } mw_laterbucket_t; /* * Optional callbacks for MO capture operations. */ struct moc_callbacks { boolean_t (*mocc_will_succeed)(void *); rm_t *(*mocc_get_rml)(void *); void (*mocc_ready)(void *); }; /* * The Memory Object Capture tracking structure. */ struct mo_capture { list_node_t moc_listnode; /* linkage for caphead list */ /* Constant after creation */ mo_caphead_t *moc_head; /* head for our MO */ mo_t *moc_mo; /* MO targeted */ kthread_t *moc_thread; /* thread driving capture */ offiter_t moc_offi; /* range we are capturing */ const moc_callbacks_t *moc_cbs; /* callbacks (optional) */ void *moc_cbarg; mw_t *moc_mw; /* MW for operation */ mo_capture_flags_t moc_flags; /* const: operation flags */ /* protected by moc_head->moch_bucket->mocb_lock */ mw_t *moc_vacate_mw; /* we own its mw_meta */ mo_caplater_t *moc_caplater; /* holds later count */ mo_capstate_t moc_state; /* mo_capstate index */ mo_capstate_t moc_wstate; /* set up wait state */ kcondvar_t moc_cv; uint8_t moc_cancel; /* cancel requested */ uint8_t moc_bits_set; /* PSF_CAPTURING set */ /* Other state */ moc_vacate_state_t moc_vstate; /* current vacate sub-state */ mw_iter_t moc_mw_iter; /* MW iterator */ mw_later_t *moc_later; /* preallocated later_t */ union { const rm_t *mocw_rm; /* mocw_rmw target */ monode_t *mocw_mon; /* mocw_psfw target */ mw_t *mocw_mw; /* mocw_rma target */ } moc_wtarget; union { rmw_t mocw_rmw; /* wait_rm: RMW state */ psfw_t mocw_psfw; /* wait_psf: PSF wait */ krw_async_t mocw_rwa; /* wait_mw: RW async state */ } moc_wait; /* statistics and debugging info */ hrtime_t moc_qtime[MOCS_NSTATES]; }; /* * The mo_caphead structure tracks all captures for a given MO. If * one exists for a MO, the MO_CAPTURE bit will be set in mo_flags. * * These are held in a global Mo_capture_hash table, hashed by MO pointer. */ struct mo_caphead { /* static for the life of the caphead */ mo_capbucket_t *moch_bucket; mo_t *moch_mo; /* our MO */ /* rest protected by moch_bucket->mocb_lock */ mo_caphead_t *moch_hash; /* hash link */ list_t moch_list; /* list of non-dead captures */ size_t moch_ncap; /* # captures on list */ }; /* * A single entry in the Mo_capture_hash array; contains the mocb_lock, which * protects all mo_capture state for MOs which hash to its entry. */ struct mo_capbucket { kmutex_t mocb_lock; /* main mo_capture state lock */ mo_caphead_t *mocb_head; /* list of mo_capheads */ size_t mocb_inserts; size_t mocb_removes; uint64_t mocb_holds; /* # RM_TAKEME_OFF RMs */ kcondvar_t mocb_holdcv; /* signaled: holds->0 */ kcondvar_t mocb_removecv; /* signaled: caphead removal */ char mocb_pad[P2NPHASE( sizeof (kmutex_t) + sizeof (mo_caphead_t *) + 2 * sizeof (size_t) + sizeof (uint64_t) + 2 * sizeof (kcondvar_t), FALSE_SHARING_ALIGN)]; }; /* * An aligned Memory Object, suitable for allocating arrays of mo_ts. */ typedef struct mo_aligned { mo_t moa_mo; char moa_pad[P2NPHASE(sizeof (mo_t), MO_ALIGN)]; } mo_aligned_t; #ifdef _KERNEL /* * Private interfaces used by vm_mw.c. */ extern void mw_destroy_rangelock_cb(mw_t *, void *); extern monode_t *mo_find_mon_psf(mo_t *, monode_t *, u_offset_t, psf_t *); extern monode_t *mon_find(mo_t *, u_offset_t, uint_t); extern uint16_t mo_gen_modata(mo_t *); extern void mo_destroy(mo_t *); typedef void mon_wait_lockdrop_cb(void *); extern void mon_wait_psf_clearflags(monode_t *, mw_t *, u_offset_t, psf_flags_t, mon_wait_lockdrop_cb *, void *); extern boolean_t moresident_install(mo_t *, mw_t *, const mw_resident_ops_t *, void *); extern void moresident_uninstall(mo_t *mo, mw_t *mw); /* * Implemented in vm_loops.c for efficiency */ extern void mon_slot_write_loop(monode_t *, size_t, size_t, uint64_t); extern void mon_slot_expand_loop(monode_t *, size_t, uint_t); #endif /* _KERNEL */ #ifdef __cplusplus } #endif #endif /* _VM_MO_IMPL_H */