diff --git a/sys/amd64/include/atomic.h b/sys/amd64/include/atomic.h --- a/sys/amd64/include/atomic.h +++ b/sys/amd64/include/atomic.h @@ -64,7 +64,7 @@ * avoid a dependency on sys/pcpu.h in machine/atomic.h consumers. * An assertion in amd64/vm_machdep.c ensures that the value is correct. */ -#define OFFSETOF_MONITORBUF 0x100 +#define OFFSETOF_MONITORBUF 0x200 #endif #if defined(SAN_NEEDS_INTERCEPTORS) && !defined(SAN_RUNTIME) diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h --- a/sys/amd64/include/pcpu.h +++ b/sys/amd64/include/pcpu.h @@ -101,7 +101,8 @@ u_int pc_small_core; \ u_int pc_pcid_invlpg_workaround; \ struct pmap_pcid pc_kpmap_store; \ - char __pad[2900] /* pad to UMA_PCPU_ALLOC_SIZE */ + uint8_t pc_hfi_index; /* HFI table index */ \ + char __pad[2644] /* pad to UMA_PCPU_ALLOC_SIZE */ #define PC_DBREG_CMD_NONE 0 #define PC_DBREG_CMD_LOAD 1 diff --git a/sys/arm/include/pcpu.h b/sys/arm/include/pcpu.h --- a/sys/arm/include/pcpu.h +++ b/sys/arm/include/pcpu.h @@ -63,7 +63,7 @@ uint32_t pc_original_actlr; \ uint64_t pc_clock; \ uint32_t pc_mpidr; \ - char __pad[135] + char __pad[391] #ifdef _KERNEL diff --git a/sys/arm64/include/pcpu.h b/sys/arm64/include/pcpu.h --- a/sys/arm64/include/pcpu.h +++ b/sys/arm64/include/pcpu.h @@ -51,7 +51,7 @@ uint64_t pc_mpidr; \ u_int pc_bcast_tlbi_workaround; \ uint64_t pc_release_addr; \ - char __pad[189] + char __pad[384] #ifdef _KERNEL diff --git a/sys/conf/NOTES b/sys/conf/NOTES --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -211,7 +211,7 @@ options SCHED_4BSD options SCHED_STATS #options SCHED_ULE - + ##################################################################### # SMP OPTIONS: # @@ -220,6 +220,15 @@ # Mandatory: options SMP # Symmetric MultiProcessor Kernel +##################################################################### +# HMP OPTIONS: +# +# HMP enables building of Heterogeneous MultiProcessor Kernel. +# This depends on SMP and SCHED_ULE. + +# Mandatory: +options HMP # Heterogeneous MultiProcessor Kernel + # EARLY_AP_STARTUP releases the Application Processors earlier in the # kernel startup process (before devices are probed) rather than at the # end. This is a temporary option for use during the transition from diff --git a/sys/conf/files b/sys/conf/files --- a/sys/conf/files +++ b/sys/conf/files @@ -1270,7 +1270,7 @@ dev/bhnd/cores/chipc/bhnd_sprom_chipc.c optional bhnd dev/bhnd/cores/chipc/bhnd_pmu_chipc.c optional bhnd dev/bhnd/cores/chipc/chipc.c optional bhnd -dev/bhnd/cores/chipc/chipc_cfi.c optional bhnd cfi +dev/bhnd/cores/chipc/chipc_cfi.c optional bhnd cfi dev/bhnd/cores/chipc/chipc_gpio.c optional bhnd gpio dev/bhnd/cores/chipc/chipc_slicer.c optional bhnd cfi | bhnd spibus dev/bhnd/cores/chipc/chipc_spi.c optional bhnd spibus @@ -3955,6 +3955,7 @@ kern/subr_gtaskqueue.c standard kern/subr_hash.c standard kern/subr_hints.c standard +kern/subr_hmp.c standard kern/subr_kdb.c standard kern/subr_kobj.c standard kern/subr_lock.c standard diff --git a/sys/conf/options b/sys/conf/options --- a/sys/conf/options +++ b/sys/conf/options @@ -639,6 +639,7 @@ EARLY_AP_STARTUP opt_global.h SMP opt_global.h NUMA opt_global.h +HMP opt_global.h # Size of the kernel message buffer MSGBUF_SIZE opt_msgbuf.h diff --git a/sys/kern/subr_hmp.c b/sys/kern/subr_hmp.c new file mode 100644 --- /dev/null +++ b/sys/kern/subr_hmp.c @@ -0,0 +1,196 @@ +/* + * Copyright (c) 2026 The FreeBSD Foundation + * + * This software was developed by Minsoo Choo under sponsorship from the + * FreeBSD Foundation. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include +#include +#include + +#ifdef HMP + +/* System-wide CPU capability information */ +struct hmp_t hmp; + +/* + * Detect if any CPU has dynamic scores + */ +static void +hmp_detect_scores(void) +{ + struct pcpu *pc; + + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { + if (pc->pc_hmp_flags & HMP_FLAG_DYNAMIC) { + hmp.has_scores = 1; + return; + } + } + + hmp.has_scores = false; +} + +/* + * Initialize machine-independent hmp state + */ +static void +hmp_init(void *arg __unused) +{ + struct pcpu *pc; + + /* Default system state */ + hmp.total_capacity = 0; + hmp.class_count = 1; + hmp.cap_count = 0; + hmp.cap_bitmap = 0; + hmp.has_scores = false; + + /* Initialize pcpu fields with defaults */ + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { + pc->pc_hmp_capacity = HMP_SCORE_DEFAULT; + pc->pc_hmp_flags = 0; + } + + /* MD init populates actual values */ + hmp_md_init(); + + hmp_detect_scores(); + + /* Log what we found */ + if (hmp.has_scores) { + printf("hmp: scores=%d, classes=%d, caps=%d)\n", + hmp.has_scores, hmp.class_count, + hmp.cap_count); + } +} +SYSINIT(hmp, SI_SUB_SMP + 1, SI_ORDER_ANY, hmp_init, NULL); + +/* + * MI functions called by machine-dependent code to set values during boot + */ +void +hmp_set_class_count(uint8_t count) +{ + if (count < 1) + count = 1; + if (count > HMP_CLASS_MAX) + count = HMP_CLASS_MAX; + hmp.class_count = count; +} + +void +hmp_set_cap_count(uint8_t count, uint8_t bitmap) +{ + if (count > HMP_CAP_MAX) + count = HMP_CAP_MAX; + hmp.cap_count = count; + hmp.cap_bitmap = bitmap; +} + +/* + * Machine-independent functions for scheduler + */ + +/* + * Fall back for hmp_best_cpu in case processor doesn't support dynamic + * score update. Takes O(n). + * + * TODO: Precalculate cpu with highest capacity on boot after initialization. + */ +int +hmp_highest_capacity_cpu(const cpuset_t *mask) +{ + struct pcpu *pc; + int best_cpu; + hmp_score_t best_cap; + + best_cpu = -1; + best_cap = 0; + + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { + if (mask != NULL && !CPU_ISSET(pc->pc_cpuid, mask)) + continue; + + if (pc->pc_hmp_capacity > best_cap) { + best_cap = pc->pc_hmp_capacity; + best_cpu = pc->pc_cpuid; + } + } + + return best_cpu; +} + +/* + * Find CPU with best score for given class and capability for thread + * placement. Fall backs to capacity if scores are not provided. Takes O(n). + * + * TODO: If this brings severe performance degradation, score providers should + * maintain and update index everytime new information is fed and + * the scheduler should use the index which takes O(1). + */ +int +hmp_best_cpu(const cpuset_t *mask, int class, int cap) +{ + struct pcpu *pc; + int best_cpu; + hmp_score_t best_score; + + /* + * If no dynamic scores, fall back to static capacity + */ + if (!hmp.has_scores) + return hmp_highest_capacity_cpu(mask); + + if (class >= hmp.class_count) + class = 0; + if (cap >= hmp.cap_count) + return hmp_highest_capacity_cpu(mask); + + best_cpu = -1; + best_score = 0; + + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { + if (mask != NULL && !CPU_ISSET(pc->pc_cpuid, mask)) + continue; + + if (pc->pc_hmp_scores[class][cap] > best_score) { + best_score = pc->pc_hmp_scores[class][cap]; + best_cpu = pc->pc_cpuid; + } + } + + return best_cpu; +} + +void +hmp_default_init(void) +{ + struct pcpu *pc; + + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { + hmp_set_capacity(pc, HMP_SCORE_DEFAULT); + } +} + +/* + * Weak MD functions - overridden by architecture + */ +__weak_symbol void +hmp_md_init(void) +{ + hmp_default_init(); +} + +__weak_symbol int +hmp_md_get_class(struct thread *td __unused) +{ + return 0; +} + +#endif /* HMP */ diff --git a/sys/powerpc/include/pcpu.h b/sys/powerpc/include/pcpu.h --- a/sys/powerpc/include/pcpu.h +++ b/sys/powerpc/include/pcpu.h @@ -68,7 +68,7 @@ struct pvo_entry *qmap_pvo; \ struct mtx qmap_lock; \ uint64_t opal_hmi_flags; \ - char __pad[1337]; + char __pad[1081]; #ifdef __powerpc64__ #define PCPU_MD_AIM_FIELDS PCPU_MD_AIM64_FIELDS diff --git a/sys/riscv/include/pcpu.h b/sys/riscv/include/pcpu.h --- a/sys/riscv/include/pcpu.h +++ b/sys/riscv/include/pcpu.h @@ -47,7 +47,7 @@ uint32_t pc_pending_ipis; /* IPIs pending to this CPU */ \ uint32_t pc_hart; /* Hart ID */ \ uint64_t pc_clock; \ - char __pad[48] /* Pad to factor of PAGE_SIZE */ + char __pad[552] /* Pad to factor of PAGE_SIZE */ #ifdef _KERNEL diff --git a/sys/sys/_hmp.h b/sys/sys/_hmp.h new file mode 100644 --- /dev/null +++ b/sys/sys/_hmp.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2026 The FreeBSD Foundation + * + * This software was developed by Minsoo Choo under sponsorship from the + * FreeBSD Foundation. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#ifndef _SYS__HMP_H_ +#define _SYS__HMP_H_ + +#include + +/* + * CPU capability score type. + * This value should be normalized to 0-1024. + * + * ACPI CPPC : 0-255 + * Intel HFI : 0-255 + * Arm capacity : 0-1024 + * + * Whenever there is a new capability score scheme where highest score excceds + * 1024, HMP_SCORE_SCALE should be bumped to the new highest score for + * fine-grained score management on a new architecture. + */ +typedef uint16_t hmp_score_t; + +/* + * Maximum supported classes and capabilities + * + * These are compile-time maximums for array sizing to avoid dynamic + * allocation. Actual counts are runtime values for from + * hmp.class_count and hmp.cap_count. + * + * Intel SDM reserves ECS[15:8] for class count (max 256) + * Intel SDM reserves EDX[7:0] for capability bitmap (max 8 capabilities) + * + * We use conservative maximums to avoid excessive memory usage. + */ +#define HMP_CLASS_MAX 16 +#define HMP_CAP_MAX 8 + +#endif /* _SYS__HMP_H_ */ diff --git a/sys/sys/hmp.h b/sys/sys/hmp.h new file mode 100644 --- /dev/null +++ b/sys/sys/hmp.h @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2026 The FreeBSD Foundation + * + * This software was developed by Minsoo Choo under sponsorship from the + * FreeBSD Foundation. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#ifndef _SYS_HMP_H_ +#define _SYS_HMP_H_ + +#ifdef _KERNEL +#ifndef LOCORE + +#include +#include +#include + +#include + +#ifdef HMP + +#define HMP_SCORE_SCALE 1024 +#define HMP_SCORE_MAX HMP_SCORE_SCALE +#define HMP_SCORE_DEFAULT HMP_SCORE_MAX + +/* Score normalization macro functions */ +#define HMP_SCORE_NORMAL_FROM(x, y) (((x) * HMP_SCORE_SCALE) / (y)) +#define HMP_SCORE_NORMAL_FROM_255(x) HMP_SCORE_NORMAL_FROM((x), 255) +#define HMP_SCORE_NORMAL_FROM_1024(x) HMP_SCORE_NORMAL_FROM((x), 1024) + +#define HMP_SCORE_NORMAL_TO(x, y) (((x) * (y)) / HMP_SCORE_SCALE) +/* + * Users should be able to see their HMP_SCORE through sysctl. HMP_SCORE + * is displayed as percentage so they don't need to know HMP_SCORE_MAX. + */ +#define HMP_SCORE_NORMAL_TO_PERCENT(x) HMP_SCORE_NORMAL_TO((x), 100) + +/* + * Capabilty indices from Intel SDM + * + * These are bit positions in CPUID.06H:EDX[7:0] + * Bit 0 and 1 are always set together (either 00 or 11) + * Only valid if corresponsidng bit is set in capability bitmap. + * + * If not provided, HMP_SCORE_DEFAULT will be used. + * + */ +#define HMP_CAP_PERF 0 /* Performance capability */ +#define HMP_CAP_EFF 1 /* Efficiency capability */ +/* Bit 2-7 are reserved */ + +/* + * Flags (stored in pc_hmp_flags) + */ +#define HMP_FLAG_VALID (1u << 0) /* Capability data is valid */ +#define HMP_FLAG_DYNAMIC (1u << 1) /* Scores update at runtime */ +#define HMP_FLAG_THROTTLED (1u << 2) /* CPU currently throttled */ + +/* + * System-wide CPU capability state + * + * Do not modify directly — use accessors and setters below + * + * capacity : processor's heterogeneity. Static and initialized at boot time. + * score : processor's real-time status. dynamic and updated by drivers. + * + * Both use hmp_score_t type and normalized to 0-1024. + */ +struct hmp_t { + hmp_score_t total_capacity; /* Precalculated for scheduler */ + uint8_t class_count; /* cl: class count */ + uint8_t cap_count; /* cp: capability count */ + uint8_t cap_bitmap; /* Enabled capabilities */ + bool has_scores; /* Runtime updates available */ +}; + +extern struct hmp_t hmp; + +/* + * Accessors - use atomic loads for runtime-updated fields + */ +static inline hmp_score_t +hmp_capacity(struct pcpu *pc) +{ + return (pc->pc_hmp_capacity); +} + +static inline hmp_score_t +hmp_score(struct pcpu *pc, int class, int cap) +{ + if (__predict_false(!hmp.has_scores)) + return pc->pc_hmp_capacity; /* Fall back to capacity */ + if (__predict_false(class >= hmp.class_count)) + class = 0; + if (__predict_false(cap >= hmp.cap_count)) + return HMP_SCORE_DEFAULT; + + return atomic_load_acq_16(&pc->pc_hmp_scores[class][cap]); +} + +static inline hmp_score_t +hmp_perf_score(struct pcpu *pc, int class) +{ + return hmp_score(pc, class, HMP_CAP_PERF); +} + +static inline hmp_score_t +hmp_eff_score(struct pcpu *pc, int class) +{ + return hmp_score(pc, class, HMP_CAP_EFF); +} + +static inline int +hmp_throttled(struct pcpu *pc) +{ + return ((atomic_load_acq_8(&pc->pc_hmp_flags) & HMP_FLAG_THROTTLED) != 0); +} + +static inline int +hmp_is_dynamic(struct pcpu *pc) +{ + return ((pc->pc_hmp_flags & HMP_FLAG_DYNAMIC) != 0); +} + +/* + * Setters - called by MD initializer and drivers + * Use atomic stores for fields that may be read concurrently + */ +static inline void +hmp_set_capacity(struct pcpu *pc, hmp_score_t cap) +{ + pc->pc_hmp_capacity = cap; + hmp.total_capacity += cap; + atomic_set_8(&pc->pc_hmp_flags, HMP_FLAG_VALID); +} + +static inline void +hmp_set_score(struct pcpu *pc, int class, int cap, hmp_score_t score) +{ + if (class < HMP_CLASS_MAX && cap < HMP_CAP_MAX) + atomic_store_rel_16(&pc->pc_hmp_scores[class][cap], score); +} + +static inline void +hmp_set_flags(struct pcpu *pc, uint8_t flags) +{ + atomic_set_8(&pc->pc_hmp_flags, flags); +} + +static inline void +hmp_clear_flags(struct pcpu *pc, uint8_t flags) +{ + atomic_clear_8(&pc->pc_hmp_flags, flags); +} + +/* + * Called by MD initializer to set values + */ +void hmp_set_class_count(uint8_t count); +void hmp_set_cap_count(uint8_t count, uint8_t bitmap); + +/* + * TODO: + * MI thread classification - wrapper that calls MD implementation + * Returns thread class (0 to class_count-1) + * + * On Intel: May use hardware ITD classification + * On others: Uses software heuristic (priority-based) + * + * int hmp_get_class(struct thread *td); + */ + +/* + * Called by scheduler + */ +int hmp_highest_capacity_cpu(const cpuset_t *mask); +int hmp_best_cpu(const cpuset_t *mask, int class, int cap); + +/* + * Default implementation for hmp_md_init() + * + * This can be used in architecture-specific implementation of cpu_md_init() + * if the machine doesn't provide any information about capacity and score. + */ +void hmp_default_init(void); + +/* + * Machine-dependent implementation + * If not exist, weak symbol will be used + */ +void hmp_md_init(void); +int hmp_md_get_class(struct thread *td); + +/* + * TODO: + * ACPI CPPC interface - MI, implemented in kern/subr_hmp_acpi.c + * Called during ACPI CPU device attach + * + * void hmp_acpi_init(void); + * void hmp_acpi_cpu_attach(device_t dev; + */ + +#endif /* HMP */ +#endif /* !LOCORE */ +#endif /* _KERNEL */ +#endif /* _SYS_HMP_H_ */ diff --git a/sys/sys/pcpu.h b/sys/sys/pcpu.h --- a/sys/sys/pcpu.h +++ b/sys/sys/pcpu.h @@ -39,6 +39,7 @@ #include #include +#include #include #include #include @@ -194,6 +195,10 @@ uintptr_t pc_dynamic; /* Dynamic per-cpu data area */ uint64_t pc_early_dummy_counter; /* Startup time counter(9) */ uintptr_t pc_zpcpu_offset; /* Offset into zpcpu allocs */ + hmp_score_t pc_hmp_capacity; /* Capacity for load balancing */ + hmp_score_t pc_hmp_scores[HMP_CLASS_MAX][HMP_CAP_MAX]; + /* Dynamic scores for thread placement */ + uint8_t pc_hmp_flags; /* capability flags, HMP_FLAG_* */ /* * Keep MD fields last, so that CPU-specific variations on a