Hallo, dies ist ein Test.
PWD: /www/data-lst1/unixsoft/unixsoft/kaempfer/.public_html
Running in File Mode
Relative path: ./../.././../../../../usr/include/vm/predict_impl.h
Real path: /usr/include/vm/predict_impl.h
Zurück
/* * Copyright (c) 2012, 2021, Oracle and/or its affiliates. */ #ifndef _VM_PREDICT_IMPL_H #define _VM_PREDICT_IMPL_H #include <vm/system.h> #include <vm/predict.h> #ifdef __cplusplus extern "C" { #endif #define BP_SAMPLES 1800 /* How many samples we keep */ #define BP_NUM_EMA 8 /* Matches up below */ typedef enum { BP_EMA_5_SEC, BP_EMA_10_SEC, BP_EMA_30_SEC, BP_EMA_1_MIN, BP_EMA_2_MIN, BP_EMA_5_MIN, BP_EMA_10_MIN, BP_EMA_30_MIN, BP_EMA_MAX_IDX = BP_NUM_EMA } bp_ema_idx_t; static const uint_t bp_ema_samples[] = { 5, /* nominally, 5 sec */ 10, /* 10 sec */ 30, /* 30 sec */ 60, /* 1 min */ 120, /* 2 min */ 300, /* 5 min */ 600, /* 10 min */ BP_SAMPLES /* 30 min */ }; #ifdef GENUNIX_MDB_MODULE /* For ::predict -E */ static const char *bp_ema_names[] = { "5s", "10s", "30s", "1m", "2m", "5m", "10m", "30m" }; #endif /* GENUNIX_MDB_MODULE */ typedef enum { BP_SAMPLE_FED, BP_SAMPLE_PAGES, BP_SAMPLE_BORROW, BP_SAMPLE_CACHED, BP_SAMPLE_TYPES /* must be last */ } bp_sample_type_t; /* ------------------------------------------------------------------- */ struct bp; struct bp_data; struct taskq; typedef enum { BPP_ACTION_PENDING = 0x2 /* there is work to do */ } bpp_flags_t; #define BP_MAX_SUBPART 8 /* maximum sub-mnode division of labor */ /* * All counts are in units of base pages (e.g. rm counts). */ typedef struct bp_part { /* These fields are invariant */ struct bp *bpp_bp; struct bp_data *bpp_bpdata; mnodeid_t bpp_mnodeid; /* Who am I */ uint8_t bpp_partid; ku_type_t bpp_ku; uint8_t bpp_szc; /* * These fields are written by the mnode main thread and read by the * taskq threads when they first begin running */ spgcnt_t bpp_disp_quota; /* How much to coalesce or encage */ spgcnt_t bpp_disp_surplus; /* How much to NOSZC */ /* These fields are protected by the bpp_bpdata->bpd_mutex */ bpp_flags_t bpp_flags; /* see above */ hrtime_t bpp_action_enqueue_time; void (*bpp_action_callback)(void *); /* What to do */ void *bpp_action_arg; list_node_t bpp_link; /* For work list and partition free */ /* These fields are private to the taskq thread */ spgcnt_t bpp_quota; /* How much left to coalesce / encage */ spgcnt_t bpp_surplus; /* How much left to NOSZC */ pgcnt_t bpp_mincage; /* Min. credits to encage a tilelet */ hrtime_t bpp_last_cageout; /* Last time of cageout for demand */ } bp_part_t; /* * Some definitions: * * Surplus means there is more of this ku/szc than long term demand * dictates we need. * * Inbounds means there is enough supply of this ku/szc to meet the * demands of the current system load. * * Deficit means the current system load is consuming this ku/szc faster * than supply is being produced. * * Shortage means that threads are waiting in breadlines for this ku/szc. * * The Disabled state is used when things are hopeless to prevent us from doing * unnecessary work, or blocking threads in breadlines for an unbounded amount * of time. */ typedef enum { BP_STATE_INVALID, BP_STATE_SURPLUS, BP_STATE_INBOUNDS, BP_STATE_DEFICIT, BP_STATE_SHORTAGE, BP_STATE_DISABLED } bp_state_t; typedef enum { BP_REBUILD_CANDIDATE_LIST = 0x1, /* rebuild tilelet list */ BP_REBUILDING = 0x2, /* someone rebuilding list */ BP_BREADLINE_ACTIVE = 0x4, /* breadline is active */ BP_BREADLINE_DISABLED = 0x8, /* breadline is disabled */ BP_CANDIDATE_LIST_NEED_GROW = 0x10 /* need to grow cand list */ } bp_flags_t; /* * All counts are in units of base pages (e.g. rm counts). * * bp_f is the forcing value. The forcing value determines how much liquidity * we wish to inject into the system at the next step based on predicted future * demand. For example if we predict that we will need 5gb in the short term, * we will coalesce 5gb on the next step by setting bp_f to btop(5gb). If a * shortage is not predicted to occur in the near-term, it will be zero. * * bp_liquidity is the minimum amount of supply we try to keep around. It * should be >= hysteresis if we are seeing demand for this memory type. If * there is no demand, this will be dropped to zero after awhile (eg. for giant * pages). Initially, we set liquidity to zero for all large pages, and wait to * see if there is demand for them before committing to keeping them around. * On the user side, liquidity is the minimum amount we try to create in * deficit or shortage states; on the kernel side, it is only used as the * borrowing limit, since we can always grow the kernel cage later if we need * to create more supply. * * Locking: * * - bp_bpdata, bp_mnodeid, bp_ku, bp_szc are invariant. * - bp_state, bp_flags, bp_net_quota, and bp_dispatch_count are protected * by the bp_bpdata->bpd_mutex. * - bp_num_part and bp_part[] are protected by the bp_bpdata->bpd_part_mutex. * * All other fields are written only from the per-mnode main thread. */ typedef struct bp { struct bp_data *bp_bpdata; uint8_t bp_mnodeid; /* which mnodeid is bp_bpdata for? */ uint8_t bp_szc; /* who am I, anyway? */ kcondvar_t bp_cv; /* waiting for BP_REBUILDING to clear */ ku_type_t bp_ku; /* KU_KCAGE or KU_USER */ bp_state_t bp_state; /* current state of this ku/szc */ hrtime_t bp_state_time; /* hrtime of last state transition */ uint_t bp_state_age; /* number of loops in this state */ bp_flags_t bp_flags; /* see above */ hrtime_t bp_last_juice; /* Last time we juiced the cachelist */ /* Candidate tilelet array */ tileletid_t *bp_cands; /* Candidate tilelet array */ size_t bp_cands_bytes; /* Size of candidate array */ uint_t bp_cand_idx; /* Next valid entry in cands array */ uint_t bp_cand_eidx; /* Number of valid tilelets in cands */ hrtime_t bp_cand_last_rebuild; /* Time we setup cands array */ /* Per-mnode/ku/szc counts */ pgcnt_t bp_supply; /* current supply available */ pgcnt_t bp_cached; /* current supply on cache lists */ spgcnt_t bp_demand; /* short-term demand */ spgcnt_t bp_demandlt; /* long-term demand */ spgcnt_t bp_deficit; /* short-term deficit */ spgcnt_t bp_surplus; /* long-term surplus */ spgcnt_t bp_shortage; /* immediate shortage */ pgcnt_t bp_amin; /* recent minimum alloc'ed */ pgcnt_t bp_amax; /* recent maximum alloc'ed */ pgcnt_t bp_disabled_freecnt; /* mnode free at last disable */ uint64_t bp_fail_backoff; /* backoff counter */ uint64_t bp_progress; /* made progress since last enable */ hrtime_t bp_progress_time; /* time of last forward prog */ /* See comment above */ spgcnt_t bp_f; /* forcing value */ pgcnt_t bp_liquidity; /* minimum supply to keep on hand */ /* Sample data */ pgcnt_t bp_samples[BP_SAMPLE_TYPES][BP_SAMPLES]; hrtime_t bp_sample_timestamp[BP_SAMPLES]; uint64_t bp_nsamples; /* These are really EMA * N, where N is the number of samples */ spgcnt_t bp_ema[BP_SAMPLE_TYPES][BP_NUM_EMA]; /* Partition data */ spgcnt_t bp_net_quota; /* sum of all partition work done */ uint_t bp_dispatch_count; /* # of dispatched taskq threads */ bp_part_t *bp_part[BP_MAX_SUBPART]; uint_t bp_num_part; /* <= BP_MAX_SUBPART */ } bp_t; /* * Event logging. Essentially, this is for ::predict -l. In the future * it may also be useful for kstats. */ typedef enum { BP_EVENT_STATE_TRANSITION, BP_EVENT_ENABLE_BORROW, BP_EVENT_LIMIT_BORROW, BP_EVENT_COALESCE_TILELET, BP_EVENT_COALESCE_CHUNK, BP_EVENT_DISOWN_TILELET, BP_EVENT_DISOWN_CHUNK, BP_EVENT_ENCAGE_TILELET, BP_EVENT_ENCAGE_CHUNK, BP_EVENT_UNCAGE_TILELET, BP_EVENT_JUICE_CACHELIST, BP_EVENT_KCAGE_FCCB, BP_EVENT_KCAGE_ADD_TILE, BP_EVENT_MAX_SUCCESS = BP_EVENT_KCAGE_ADD_TILE, BP_EVENT_COALESCE_TILELET_FAILED, BP_EVENT_COALESCE_CHUNK_FAILED, BP_EVENT_ENCAGE_TILELET_FAILED, BP_EVENT_ENCAGE_CHUNK_FAILED, BP_EVENT_UNCAGE_TILELET_FAILED, BP_EVENT_MAXID } bp_event_t; typedef enum { BP_LOG_SUCCESS, BP_LOG_FAILURE, BP_LOG_NTYPES } bplog_type_t; /* * Audit record for ::predict -l */ typedef struct { hrtime_t bpa_event_time; bp_event_t bpa_event_id; uint8_t bpa_mnodeid; uint8_t bpa_ku; uint8_t bpa_szc; uint8_t bpa_partid; bp_state_t bpa_prev_state; /* set for transition only */ bp_state_t bpa_state; spgcnt_t bpa_val; /* quota/surplus */ spgcnt_t bpa_delta; /* amount removed */ hrtime_t bpa_prev_timestamp; /* time of prev event */ tileletid_t bpa_tilelet; tileletid_t bpa_etilelet; } bp_audit_t; #define BP_NAUDIT 2048 typedef enum { BP_QUIESCE = 0x1, /* bp_pause() wants to stop thread */ BP_RUNNING = 0x2, /* Predictor thread is running */ BP_PAUSED = 0x4, /* Predictor thread is stopped */ BP_NOSLEEP = 0x8, /* Do not block on cv */ BP_SAMPLE_INTERVAL = 0x10, /* Run the sample engine */ BP_GO_AWAY = 0x20 /* This mnode is going away */ } bpm_flags_t; /* Per-mnode state data for predictor */ typedef struct bp_data { /* These fields are invariant */ mnodeid_t bpd_mnodeid; mnode_t *bpd_mnode; kthread_t *bpd_thread; /* main thread for mnode */ bp_t *bpd_data[KU_NTYPES][SZC_MAX]; kmutex_t bpd_mutex; kcondvar_t bpd_cv; /* main thread waits on this cv */ kcondvar_t bpd_pause_cv; /* for bp_pause() and bp_resume() */ hrtime_t bpd_wakeup_time; /* for observability only */ bpm_flags_t bpd_flags; /* * These bitmaps provide a quick way of telling what ku/szcs are * disabled or have threads waiting in the breadlines. */ uint32_t bpd_breadline_active_map[KU_NTYPES]; uint32_t bpd_disabled_map[KU_NTYPES]; uint32_t bpd_ignore_szc_map[KU_NTYPES]; /* These are written by the main thread */ pgcnt_t bpd_sample[BP_SAMPLE_TYPES][KU_NTYPES][SZC_MAX]; /* For ::predict -l logging */ kmutex_t bpd_audit_mutex; bp_audit_t *bpd_audit[KU_NTYPES][BP_LOG_NTYPES]; uint_t bpd_audit_index[KU_NTYPES][BP_LOG_NTYPES]; /* This lock is used by bp_pause() and bp_resume() mechanism */ krwlock_t bpd_kcage_lock; /* * bpd_part_mutex protects the next two fields as well as bp_num_part * and bp_part[] of each bp_t hanging off of this structure. */ kmutex_t bpd_part_mutex; uint_t bpd_part_count[KU_NTYPES][BP_MAX_SUBPART]; struct taskq *bpd_part_taskq[KU_NTYPES][BP_MAX_SUBPART]; /* These are protected by the bpd_mutex */ list_t bpd_part_worklist[KU_NTYPES][BP_MAX_SUBPART]; /* Uncage data */ hrtime_t bpd_last_uncage; } bp_data_t; /* System-wide predictor state, stored in Bp_global */ typedef struct bp_global { kmutex_t bpg_mutex; kcondvar_t bpg_ready_cv; /* ready == alloced */ kcondvar_t bpg_active_cv; /* bpg_active set */ kcondvar_t bpg_boot_active_cv; /* bpg_boot_active cleared */ uint8_t bpg_alloced; /* bpds alloced */ uint8_t bpg_ready; /* bpd threads */ mnodeset_t bpg_predictors; /* predictors are active */ /* Power management state */ mnodeset_t bpg_pm_kcage_needed; /* unfullfillable kcage need */ /* Pre-bpg_active state */ uint32_t bpg_boot_failed[MAXMNODES][KU_NTYPES]; uint32_t bpg_boot_failed_bl_active[MAXMNODES][KU_NTYPES]; /* rarely-changed stuff, at the end to avoid false sharing */ pac_pool_t *bpg_pool; /* Pool for BP captures */ kthread_t *bpg_boot_active[KU_NTYPES]; /* boot sync predictor */ cyclic_id_t bpg_schedpaging_cyclic; uint8_t bpg_active; /* bounds_predictor_init done */ } bp_global_t; #ifdef __cplusplus } #endif #endif /* _VM_PREDICT_IMPL_H */