diff --git a/sys/amd64/include/atomic.h b/sys/amd64/include/atomic.h --- a/sys/amd64/include/atomic.h +++ b/sys/amd64/include/atomic.h @@ -64,7 +64,7 @@ * avoid a dependency on sys/pcpu.h in machine/atomic.h consumers. * An assertion in amd64/vm_machdep.c ensures that the value is correct. */ -#define OFFSETOF_MONITORBUF 0x100 +#define OFFSETOF_MONITORBUF 0x200 #endif #if defined(SAN_NEEDS_INTERCEPTORS) && !defined(SAN_RUNTIME) diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h --- a/sys/amd64/include/pcpu.h +++ b/sys/amd64/include/pcpu.h @@ -101,7 +101,8 @@ u_int pc_small_core; \ u_int pc_pcid_invlpg_workaround; \ struct pmap_pcid pc_kpmap_store; \ - char __pad[2900] /* pad to UMA_PCPU_ALLOC_SIZE */ + uint8_t pc_hfi_index; /* HFI table index */ \ + char __pad[2644] /* pad to UMA_PCPU_ALLOC_SIZE */ #define PC_DBREG_CMD_NONE 0 #define PC_DBREG_CMD_LOAD 1 diff --git a/sys/arm/include/pcpu.h b/sys/arm/include/pcpu.h --- a/sys/arm/include/pcpu.h +++ b/sys/arm/include/pcpu.h @@ -63,7 +63,7 @@ uint32_t pc_original_actlr; \ uint64_t pc_clock; \ uint32_t pc_mpidr; \ - char __pad[135] + char __pad[391] #ifdef _KERNEL diff --git a/sys/arm64/include/pcpu.h b/sys/arm64/include/pcpu.h --- a/sys/arm64/include/pcpu.h +++ b/sys/arm64/include/pcpu.h @@ -51,7 +51,7 @@ uint64_t pc_mpidr; \ u_int pc_bcast_tlbi_workaround; \ uint64_t pc_release_addr; \ - char __pad[189] + char __pad[384] #ifdef _KERNEL diff --git a/sys/conf/files b/sys/conf/files --- a/sys/conf/files +++ b/sys/conf/files @@ -1270,7 +1270,7 @@ dev/bhnd/cores/chipc/bhnd_sprom_chipc.c optional bhnd dev/bhnd/cores/chipc/bhnd_pmu_chipc.c optional bhnd dev/bhnd/cores/chipc/chipc.c optional bhnd -dev/bhnd/cores/chipc/chipc_cfi.c optional bhnd cfi +dev/bhnd/cores/chipc/chipc_cfi.c optional bhnd cfi dev/bhnd/cores/chipc/chipc_gpio.c optional bhnd gpio dev/bhnd/cores/chipc/chipc_slicer.c optional bhnd cfi | bhnd spibus dev/bhnd/cores/chipc/chipc_spi.c optional bhnd spibus @@ -3955,6 +3955,7 @@ kern/subr_gtaskqueue.c standard kern/subr_hash.c standard kern/subr_hints.c standard +kern/subr_hmp.c standard kern/subr_kdb.c standard kern/subr_kobj.c standard kern/subr_lock.c standard diff --git a/sys/conf/options b/sys/conf/options --- a/sys/conf/options +++ b/sys/conf/options @@ -639,6 +639,7 @@ EARLY_AP_STARTUP opt_global.h SMP opt_global.h NUMA opt_global.h +HMP opt_global.h # Size of the kernel message buffer MSGBUF_SIZE opt_msgbuf.h diff --git a/sys/kern/subr_hmp.c b/sys/kern/subr_hmp.c new file mode 100644 --- /dev/null +++ b/sys/kern/subr_hmp.c @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2026 The FreeBSD Foundation + * + * This software was developed by Minsoo Choo under sponsorship from the + * FreeBSD Foundation. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include +#include +#include + +#include "opt_global.h" + +#ifdef HMP + +/* System-wide CPU capability information */ +struct cpucap_t cpucap; + +/* + * Detect if static capacity varies across CPUs + */ +static void +cpucap_detect_capacity_varies(void) +{ + struct pcpu *pc; + int first; + cpucap_score_t first_cap; + + first = 1; + first_cap = 0; + + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { + if (!(pc->pc_cap_flags & CPUCAP_FLAG_VALID)) + continue; + + if (first) { + first_cap = pc->pc_cap_capacity; + first = 0; + } else if (pc->pc_cap_capacity != first_cap) { + cpucap.capacity_varies = 1; + return; + } + } + + cpucap.capacity_varies = 0; +} + +/* + * Detect if any CPU has dynamic scores + */ +static void +cpucap_detect_dynamic_scores(void) +{ + struct pcpu *pc; + + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { + if (pc->pc_cap_flags & CPUCAP_FLAG_DYNAMIC) { + cpucap.has_dynamic_scores = 1; + return; + } + } + + cpucap.has_dynamic_scores = 0; +} + +/* + * Initialize machine-independent cpucap state + */ +static void +cpucap_init(void *arg __unused) +{ + struct pcpu *pc; + + /* Default system state */ + cpucap.class_count = 1; + cpucap.cap_count = 0; + cpucap.cap_bitmap = 0; + cpucap.capacity_varies = 0; + cpucap.has_dynamic_scores = 0; + + /* Initialize pcpu fields with defaults */ + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { + pc->pc_cap_capacity = CPUCAP_SCORE_DEFAULT; + pc->pc_cap_flags = 0; + } + + /* MD init populates actual values */ + cpucap_md_init(); + + /* Detect features */ + cpucap_detect_capacity_varies(); + cpucap_detect_dynamic_scores(); + + cpucap.initialized = 1; + + /* Log what we found */ + if (cpucap.capacity_varies || cpucap.has_dynamic_scores) { + printf("cpucap: enabled (capacity_varies=%d, dynamic_scores=%d, " + "classes=%d, caps=%d)\n", + cpucap.capacity_varies, cpucap.has_dynamic_scores, + cpucap.class_count, cpucap.cap_count); + } +} +SYSINIT(cpucap, SI_SUB_SMP + 1, SI_ORDER_ANY, cpucap_init, NULL); + +/* + * MI functions called by machine-dependent code to set values + */ +void +cpucap_set_class_count(int count) +{ + if (count < 1) + count = 1; + if (count > CPUCAP_CLASS_MAX) + count = CPUCAP_CLASS_MAX; + cpucap.class_count = count; +} + +void +cpucap_set_cap_count(int count, uint8_t bitmap) +{ + if (count > CPUCAP_CAP_MAX) + count = CPUCAP_CAP_MAX; + cpucap.cap_count = count; + cpucap.cap_bitmap = bitmap; +} + +void +cpucap_set_capacity_varies(int varies) +{ + cpucap.capacity_varies = varies; +} + +void +cpucap_set_dynamic_scores(int dynamic) +{ + cpucap.has_dynamic_scores = dynamic; +} + +/* + * Machine-independent functions for scheduler + */ +int +cpucap_highest_capacity_cpu(const cpuset_t *mask) +{ + struct pcpu *pc; + int best_cpu; + cpucap_score_t best_cap; + + if (!cpucap.initialized) + return -1; + + best_cpu = -1; + best_cap = 0; + + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { + if (mask != NULL && !CPU_ISSET(pc->pc_cpuid, mask)) + continue; + + if (pc->pc_cap_capacity > best_cap) { + best_cap = pc->pc_cap_capacity; + best_cpu = pc->pc_cpuid; + } + } + + return best_cpu; +} + +/* + * Find CPU with best score for given class and capability + */ +int +cpucap_best_cpu(const cpuset_t *mask, int class, int cap) +{ + struct pcpu *pc; + int best_cpu; + cpucap_score_t best_score; + + if (!cpucap.initialized) + return -1; + + /* + * If no dynamic scores, fall back to static capacity + */ + if (!cpucap.has_dynamic_scores) + return cpucap_highest_capacity_cpu(mask); + + if (class >= cpucap.class_count) + class = 0; + if (cap >= cpucap.cap_count) + return cpucap_highest_capacity_cpu(mask); + + best_cpu = -1; + best_score = 0; + + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { + if (mask != NULL && !CPU_ISSET(pc->pc_cpuid, mask)) + continue; + + if (pc->pc_cap_scores[class][cap] > best_score) { + best_score = pc->pc_cap_scores[class][cap]; + best_cpu = pc->pc_cpuid; + } + } + + return best_cpu; +} + +/* + * Weak MD functions - overridden by architecture + */ +__weak_symbol void +cpucap_md_init(void) +{ +} + +__weak_symbol int +cpucap_md_get_class(struct thread *td __unused) +{ + return 0; +} + +#endif /* HMP */ diff --git a/sys/powerpc/include/pcpu.h b/sys/powerpc/include/pcpu.h --- a/sys/powerpc/include/pcpu.h +++ b/sys/powerpc/include/pcpu.h @@ -68,7 +68,7 @@ struct pvo_entry *qmap_pvo; \ struct mtx qmap_lock; \ uint64_t opal_hmi_flags; \ - char __pad[1337]; + char __pad[1081]; #ifdef __powerpc64__ #define PCPU_MD_AIM_FIELDS PCPU_MD_AIM64_FIELDS diff --git a/sys/riscv/include/pcpu.h b/sys/riscv/include/pcpu.h --- a/sys/riscv/include/pcpu.h +++ b/sys/riscv/include/pcpu.h @@ -47,7 +47,7 @@ uint32_t pc_pending_ipis; /* IPIs pending to this CPU */ \ uint32_t pc_hart; /* Hart ID */ \ uint64_t pc_clock; \ - char __pad[48] /* Pad to factor of PAGE_SIZE */ + char __pad[552] /* Pad to factor of PAGE_SIZE */ #ifdef _KERNEL diff --git a/sys/sys/_hmp.h b/sys/sys/_hmp.h new file mode 100644 --- /dev/null +++ b/sys/sys/_hmp.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2026 The FreeBSD Foundation + * + * This software was developed by Minsoo Choo under sponsorship from the + * FreeBSD Foundation. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#ifndef _SYS__HMP_H_ +#define _SYS__HMP_H_ + +#include + +/* + * CPU capability score type. + * This value should be normalized to 0-1024. + * + * ACPI CPPC : 0-255 + * Intel HFI : 0-255 + * Arm capacity : 0-1024 + * + * Whenever there is a new capability score scheme where highest score excceds + * 1024, CPUCAP_SCORE_SCALE should be bumped to the new highest score for + * fine-grained score management on a new architecture. + */ +typedef uint16_t cpucap_score_t; + +/* + * Maximum supported classes and capabilities + * + * These are compile-time maximums for array sizing to avoid dynamic + * allocation. Actual counts are runtime values for from + * cpucap.class_count and cpucap.cap_count. + * + * Intel SDM reserves ECS[15:8] for class count (max 256) + * Intel SDM reserves EDX[7:0] for capability bitmap (max 8 capabilities) + * + * We use conservative maximums to avoid excessive memory usage. + */ +#define CPUCAP_CLASS_MAX 16 +#define CPUCAP_CAP_MAX 8 + +#endif /* _SYS__HMP_H_ */ diff --git a/sys/sys/hmp.h b/sys/sys/hmp.h new file mode 100644 --- /dev/null +++ b/sys/sys/hmp.h @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2026 The FreeBSD Foundation + * + * This software was developed by Minsoo Choo under sponsorship from the + * FreeBSD Foundation. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#ifndef _SYS_HMP_H_ +#define _SYS_HMP_H_ + +#ifdef _KERNEL +#ifndef LOCORE + +#include +#include +#include + +#include + +#include "opt_global.h" + +#ifdef HMP + +#define CPUCAP_SCORE_SCALE 1024 +#define CPUCAP_SCORE_MAX CPUCAP_SCORE_SCALE +#define CPUCAP_SCORE_DEFAULT (CPUCAP_SCORE_SCALE / 2) + +/* Score normalization macro functions */ +#define CPUCAP_SCORE_NORMAL_FROM(x, y) \ + (((x) * CPUCAP_SCORE_SCALE) / (y)) +#define CPUCAP_SCORE_NORMAL_FROM_255(x) CPUCAP_SCORE_NORMAL_FROM((x), 255) +#define CPUCAP_SCORE_NORMAL_FROM_1024(x) CPUCAP_SCORE_NORMAL_FROM((x), 1024) + +#define CPUCAP_SCORE_NORMAL_TO(x, y) \ + (((x) * (y)) / CPUCAP_SCORE_SCALE) +/* + * Users should be able to see their CPUCAP_SCORE through sysctl. CPUCAP_SCORE + * is displayed as percentage so they don't need to know CPUCAP_SCORE_MAX. + */ +#define CPUCAP_SCORE_NORMAL_TO_PERCENT(x) CPUCAP_SCORE_NORMAL_TO((x), 100) + +/* + * Capabilty indices from Intel SDM + * + * These are bit positions in CPUID.06H:EDX[7:0] + * Bit 0 and 1 are always set together (either 00 or 11) + * Only valid if corresponsidng bit is set in capability bitmap. + * + * If not provided, CPUCAP_SCORE_DEFAULT will be used. + * + */ +#define CPUCAP_CAP_PERF 0 /* Performance capability */ +#define CPUCAP_CAP_EFF 1 /* Efficiency capability */ +/* Bit 2-7 are reserved */ + +/* + * Flags (stored in pc_cap_flags) + */ +#define CPUCAP_FLAG_VALID (1u << 0) /* Capability data is valid */ +#define CPUCAP_FLAG_DYNAMIC (1u << 1) /* Scores update at runtime */ +#define CPUCAP_FLAG_THROTTLED (1u << 2) /* CPU currently throttled */ + +/* + * System-wide CPU capability state + * + * Do not modify directly — use accessors and setters below + */ +struct cpucap_t { + int initialized; /* Subsystem ready */ + int capacity_varies; /* CPUs have different capacity */ + int has_dynamic_scores; /* Runtime updates available */ + int class_count; /* cl: class count */ + int cap_count; /* cp: capability count */ + uint8_t cap_bitmap; /* enabled capabilities */ +}; + +extern struct cpucap_t cpucap; + +/* + * Feature checks for scheduler + */ +static inline int +cpucap_initialized(void) +{ + return (cpucap.initialized); +} + +static inline int +cpucap_has_capacity(void) +{ + return (cpucap.initialized && cpucap.capacity_varies); +} + +static inline int +cpucap_has_scores(void) +{ + return (cpucap.initialized && cpucap.has_dynamic_scores); +} + +static inline int +cpucap_enabled(void) +{ + return (cpucap.initialized && (cpucap.capacity_varies || cpucap.has_dynamic_scores)); +} + +/* + * Accessors - use atomic loads for runtime-updated fields + */ +static inline cpucap_score_t +cpucap_capacity(struct pcpu *pc) +{ + return (pc->pc_cap_capacity); +} + +static inline cpucap_score_t +cpucap_score(struct pcpu *pc, int class, int cap) +{ + if (__predict_false(!cpucap.has_dynamic_scores)) + return pc->pc_cap_capacity; /* Fall back to capacity */ + if (__predict_false(class >= cpucap.class_count)) + class = 0; + if (__predict_false(cap >= cpucap.cap_count)) + return CPUCAP_SCORE_DEFAULT; + + return atomic_load_acq_16(&pc->pc_cap_scores[class][cap]); +} + +static inline cpucap_score_t +cpucap_perf_score(struct pcpu *pc, int class) +{ + return cpucap_score(pc, class, CPUCAP_CAP_PERF); +} + +static inline cpucap_score_t +cpucap_eff_score(struct pcpu *pc, int class) +{ + return cpucap_score(pc, class, CPUCAP_CAP_EFF); +} + +static inline int +cpucap_throttled(struct pcpu *pc) +{ + return ((atomic_load_acq_8(&pc->pc_cap_flags) & CPUCAP_FLAG_THROTTLED) != 0); +} + +static inline int +cpucap_is_dynamic(struct pcpu *pc) +{ + return ((pc->pc_cap_flags & CPUCAP_FLAG_DYNAMIC) != 0); +} + +/* + * Setters - called by MD initializer and drivers + * Use atomic stores for fields that may be read concurrently + */ +static inline void +cpucap_cpu_set_capacity(struct pcpu *pc, cpucap_score_t cap) +{ + pc->pc_cap_capacity = cap; + atomic_set_8(&pc->pc_cap_flags, CPUCAP_FLAG_VALID); +} + +static inline void +cpucap_cpu_set_score(struct pcpu *pc, int class, int cap, cpucap_score_t score) +{ + if (class < CPUCAP_CLASS_MAX && cap < CPUCAP_CAP_MAX) + atomic_store_rel_16(&pc->pc_cap_scores[class][cap], score); +} + +static inline void +cpucap_cpu_set_flags(struct pcpu *pc, uint8_t flags) +{ + atomic_set_8(&pc->pc_cap_flags, flags); +} + +static inline void +cpucap_cpu_clear_flags(struct pcpu *pc, uint8_t flags) +{ + atomic_clear_8(&pc->pc_cap_flags, flags); +} + +/* + * Called by MD initializer to set values + */ +void cpucap_set_class_count(int count); +void cpucap_set_cap_count(int count, uint8_t bitmap); +void cpucap_set_capacity_varies(int varies); +void cpucap_set_dynamic_scores(int dynamic); + +/* + * MI thread classification - wrapper that calls MD implementation + * Returns thread class (0 to class_count-1) + * + * On Intel: May use hardware ITD classification + * On others: Uses software heuristic (priority-based) + */ +int cpucap_get_class(struct thread *td); + +/* + * Called by scheduler + */ +int cpucap_highest_capacity_cpu(const cpuset_t *mask); +int cpucap_best_cpu(const cpuset_t *mask, int class, int cap); + +/* + * Machine-dependent implementation + * If not exist, weak symbol will be used + */ +void cpucap_md_init(void); +int cpucap_md_get_class(struct thread *td); + +/* + * ACPI CPPC interface - MI, implemented in kern/subr_cpucap_acpi.c + * Called during ACPI CPU device attach + */ +void cpucap_acpi_init(void); +void cpucap_acpi_cpu_attach(device_t dev); + +#endif /* HMP */ +#endif /* !LOCORE */ +#endif /* _KERNEL */ +#endif /* _SYS_HMP_H_ */ diff --git a/sys/sys/pcpu.h b/sys/sys/pcpu.h --- a/sys/sys/pcpu.h +++ b/sys/sys/pcpu.h @@ -39,6 +39,7 @@ #include #include +#include #include #include #include @@ -194,6 +195,10 @@ uintptr_t pc_dynamic; /* Dynamic per-cpu data area */ uint64_t pc_early_dummy_counter; /* Startup time counter(9) */ uintptr_t pc_zpcpu_offset; /* Offset into zpcpu allocs */ + cpucap_score_t pc_cap_capacity; /* Capacity for load balancing */ + cpucap_score_t pc_cap_scores[CPUCAP_CLASS_MAX][CPUCAP_CAP_MAX]; + /* Dynamic scores for thread placement */ + uint8_t pc_cap_flags; /* capability flags, CPUCAP_FLAG_* */ /* * Keep MD fields last, so that CPU-specific variations on a