Index: head/lib/libc/x86/sys/__vdso_gettc.c =================================================================== --- head/lib/libc/x86/sys/__vdso_gettc.c (revision 310238) +++ head/lib/libc/x86/sys/__vdso_gettc.c (revision 310239) @@ -1,179 +1,252 @@ /*- * Copyright (c) 2012 Konstantin Belousov * Copyright (c) 2016 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include "namespace.h" #include #include #include #include #include #include #include #include #include "un-namespace.h" #include #include #include +#ifdef __amd64__ +#include +#include +#endif #include "libc_private.h" static void lfence_mb(void) { #if defined(__i386__) static int lfence_works = -1; u_int cpuid_supported, p[4]; if (lfence_works == -1) { __asm __volatile( " pushfl\n" " popl %%eax\n" " movl %%eax,%%ecx\n" " xorl $0x200000,%%eax\n" " pushl %%eax\n" " popfl\n" " pushfl\n" " popl %%eax\n" " xorl %%eax,%%ecx\n" " je 1f\n" " movl $1,%0\n" " jmp 2f\n" "1: movl $0,%0\n" "2:\n" : "=r" (cpuid_supported) : : "eax", "ecx", "cc"); if (cpuid_supported) { __asm __volatile( " pushl %%ebx\n" " cpuid\n" " movl %%ebx,%1\n" " popl %%ebx\n" : "=a" (p[0]), "=r" (p[1]), "=c" (p[2]), "=d" (p[3]) : "0" (0x1)); lfence_works = (p[3] & CPUID_SSE2) != 0; } else lfence_works = 0; } if (lfence_works == 1) lfence(); #elif defined(__amd64__) lfence(); #else #error "arch" #endif } static u_int __vdso_gettc_rdtsc_low(const struct vdso_timehands *th) { u_int rv; lfence_mb(); __asm __volatile("rdtsc; shrd %%cl, %%edx, %0" : "=a" (rv) : "c" (th->th_x86_shift) : "edx"); return (rv); } static u_int __vdso_rdtsc32(void) { lfence_mb(); return (rdtsc32()); } static char *hpet_dev_map = NULL; static uint32_t hpet_idx = 0xffffffff; static void __vdso_init_hpet(uint32_t u) { static const char devprefix[] = "/dev/hpet"; char devname[64], *c, *c1, t; int fd; c1 = c = stpcpy(devname, devprefix); u = hpet_idx; do { *c++ = u % 10 + '0'; u /= 10; } while (u != 0); *c = '\0'; for (c--; c1 != c; c1++, c--) { t = *c1; *c1 = *c; *c = t; } fd = _open(devname, O_RDONLY); if (fd == -1) { hpet_dev_map = MAP_FAILED; return; } if (hpet_dev_map != NULL && hpet_dev_map != MAP_FAILED) munmap(hpet_dev_map, PAGE_SIZE); hpet_dev_map = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, fd, 0); _close(fd); } +#ifdef __amd64__ + +#define HYPERV_REFTSC_DEVPATH "/dev/" HYPERV_REFTSC_DEVNAME + +/* + * NOTE: + * We use 'NULL' for this variable to indicate that initialization + * is required. And if this variable is 'MAP_FAILED', then Hyper-V + * reference TSC can not be used, e.g. in misconfigured jail. + */ +static struct hyperv_reftsc *hyperv_ref_tsc; + +static void +__vdso_init_hyperv_tsc(void) +{ + int fd; + + fd = _open(HYPERV_REFTSC_DEVPATH, O_RDONLY); + if (fd < 0) { + /* Prevent the caller from re-entering. */ + hyperv_ref_tsc = MAP_FAILED; + return; + } + hyperv_ref_tsc = mmap(NULL, sizeof(*hyperv_ref_tsc), PROT_READ, + MAP_SHARED, fd, 0); + _close(fd); +} + +static int +__vdso_hyperv_tsc(struct hyperv_reftsc *tsc_ref, u_int *tc) +{ + uint64_t disc, ret, tsc, scale; + uint32_t seq; + int64_t ofs; + + while ((seq = atomic_load_acq_int(&tsc_ref->tsc_seq)) != 0) { + scale = tsc_ref->tsc_scale; + ofs = tsc_ref->tsc_ofs; + + lfence_mb(); + tsc = rdtsc(); + + /* ret = ((tsc * scale) >> 64) + ofs */ + __asm__ __volatile__ ("mulq %3" : + "=d" (ret), "=a" (disc) : + "a" (tsc), "r" (scale)); + ret += ofs; + + atomic_thread_fence_acq(); + if (tsc_ref->tsc_seq == seq) { + *tc = ret; + return (0); + } + + /* Sequence changed; re-sync. */ + } + return (ENOSYS); +} + +#endif /* __amd64__ */ + #pragma weak __vdso_gettc int __vdso_gettc(const struct vdso_timehands *th, u_int *tc) { uint32_t tmp; switch (th->th_algo) { case VDSO_TH_ALGO_X86_TSC: *tc = th->th_x86_shift > 0 ? __vdso_gettc_rdtsc_low(th) : __vdso_rdtsc32(); return (0); case VDSO_TH_ALGO_X86_HPET: tmp = th->th_x86_hpet_idx; if (hpet_dev_map == NULL || tmp != hpet_idx) { hpet_idx = tmp; __vdso_init_hpet(hpet_idx); } if (hpet_dev_map == MAP_FAILED) return (ENOSYS); *tc = *(volatile uint32_t *)(hpet_dev_map + HPET_MAIN_COUNTER); return (0); +#ifdef __amd64__ + case VDSO_TH_ALGO_X86_HVTSC: + if (hyperv_ref_tsc == NULL) + __vdso_init_hyperv_tsc(); + if (hyperv_ref_tsc == MAP_FAILED) + return (ENOSYS); + return (__vdso_hyperv_tsc(hyperv_ref_tsc, tc)); +#endif default: return (ENOSYS); } } #pragma weak __vdso_gettimekeep int __vdso_gettimekeep(struct vdso_timekeep **tk) { return (_elf_aux_info(AT_TIMEKEEP, tk, sizeof(*tk))); } Index: head/sys/dev/hyperv/vmbus/amd64/hyperv_machdep.c =================================================================== --- head/sys/dev/hyperv/vmbus/amd64/hyperv_machdep.c (revision 310238) +++ head/sys/dev/hyperv/vmbus/amd64/hyperv_machdep.c (revision 310239) @@ -1,204 +1,219 @@ /*- * Copyright (c) 2016 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include struct hyperv_reftsc_ctx { struct hyperv_reftsc *tsc_ref; struct hyperv_dma tsc_ref_dma; }; +static uint32_t hyperv_tsc_vdso_timehands( + struct vdso_timehands *, + struct timecounter *); + static d_open_t hyperv_tsc_open; static d_mmap_t hyperv_tsc_mmap; static struct timecounter hyperv_tsc_timecounter = { .tc_get_timecount = NULL, /* based on CPU vendor. */ - .tc_poll_pps = NULL, .tc_counter_mask = 0xffffffff, .tc_frequency = HYPERV_TIMER_FREQ, .tc_name = "Hyper-V-TSC", .tc_quality = 3000, - .tc_flags = 0, - .tc_priv = NULL + .tc_fill_vdso_timehands = hyperv_tsc_vdso_timehands, }; static struct cdevsw hyperv_tsc_cdevsw = { .d_version = D_VERSION, .d_open = hyperv_tsc_open, .d_mmap = hyperv_tsc_mmap, .d_name = HYPERV_REFTSC_DEVNAME }; static struct hyperv_reftsc_ctx hyperv_ref_tsc; uint64_t hypercall_md(volatile void *hc_addr, uint64_t in_val, uint64_t in_paddr, uint64_t out_paddr) { uint64_t status; __asm__ __volatile__ ("mov %0, %%r8" : : "r" (out_paddr): "r8"); __asm__ __volatile__ ("call *%3" : "=a" (status) : "c" (in_val), "d" (in_paddr), "m" (hc_addr)); return (status); } static int hyperv_tsc_open(struct cdev *dev __unused, int oflags, int devtype __unused, struct thread *td __unused) { if (oflags & FWRITE) return (EPERM); return (0); } static int hyperv_tsc_mmap(struct cdev *dev __unused, vm_ooffset_t offset, vm_paddr_t *paddr, int nprot __unused, vm_memattr_t *memattr __unused) { KASSERT(hyperv_ref_tsc.tsc_ref != NULL, ("reftsc has not been setup")); /* * NOTE: * 'nprot' does not contain information interested to us; * WR-open is blocked by d_open. */ if (offset != 0) return (EOPNOTSUPP); *paddr = hyperv_ref_tsc.tsc_ref_dma.hv_paddr; return (0); +} + +static uint32_t +hyperv_tsc_vdso_timehands(struct vdso_timehands *vdso_th, + struct timecounter *tc __unused) +{ + + vdso_th->th_algo = VDSO_TH_ALGO_X86_HVTSC; + vdso_th->th_x86_shift = 0; + vdso_th->th_x86_hpet_idx = 0; + bzero(vdso_th->th_res, sizeof(vdso_th->th_res)); + return (1); } #define HYPERV_TSC_TIMECOUNT(fence) \ static u_int \ hyperv_tsc_timecount_##fence(struct timecounter *tc) \ { \ struct hyperv_reftsc *tsc_ref = hyperv_ref_tsc.tsc_ref; \ uint32_t seq; \ \ while ((seq = atomic_load_acq_int(&tsc_ref->tsc_seq)) != 0) { \ uint64_t disc, ret, tsc; \ uint64_t scale = tsc_ref->tsc_scale; \ int64_t ofs = tsc_ref->tsc_ofs; \ \ fence(); \ tsc = rdtsc(); \ \ /* ret = ((tsc * scale) >> 64) + ofs */ \ __asm__ __volatile__ ("mulq %3" : \ "=d" (ret), "=a" (disc) : \ "a" (tsc), "r" (scale)); \ ret += ofs; \ \ atomic_thread_fence_acq(); \ if (tsc_ref->tsc_seq == seq) \ return (ret); \ \ /* Sequence changed; re-sync. */ \ } \ /* Fallback to the generic timecounter, i.e. rdmsr. */ \ return (rdmsr(MSR_HV_TIME_REF_COUNT)); \ } \ struct __hack HYPERV_TSC_TIMECOUNT(lfence); HYPERV_TSC_TIMECOUNT(mfence); static void hyperv_tsc_tcinit(void *dummy __unused) { uint64_t val, orig; if ((hyperv_features & (CPUID_HV_MSR_TIME_REFCNT | CPUID_HV_MSR_REFERENCE_TSC)) != (CPUID_HV_MSR_TIME_REFCNT | CPUID_HV_MSR_REFERENCE_TSC) || (cpu_feature & CPUID_SSE2) == 0) /* SSE2 for mfence/lfence */ return; switch (cpu_vendor_id) { case CPU_VENDOR_AMD: hyperv_tsc_timecounter.tc_get_timecount = hyperv_tsc_timecount_mfence; break; case CPU_VENDOR_INTEL: hyperv_tsc_timecounter.tc_get_timecount = hyperv_tsc_timecount_lfence; break; default: /* Unsupport CPU vendors. */ return; } hyperv_ref_tsc.tsc_ref = hyperv_dmamem_alloc(NULL, PAGE_SIZE, 0, sizeof(struct hyperv_reftsc), &hyperv_ref_tsc.tsc_ref_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); if (hyperv_ref_tsc.tsc_ref == NULL) { printf("hyperv: reftsc page allocation failed\n"); return; } orig = rdmsr(MSR_HV_REFERENCE_TSC); val = MSR_HV_REFTSC_ENABLE | (orig & MSR_HV_REFTSC_RSVD_MASK) | ((hyperv_ref_tsc.tsc_ref_dma.hv_paddr >> PAGE_SHIFT) << MSR_HV_REFTSC_PGSHIFT); wrmsr(MSR_HV_REFERENCE_TSC, val); /* Register "enlightened" timecounter. */ tc_init(&hyperv_tsc_timecounter); /* Add device for mmap(2). */ make_dev(&hyperv_tsc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0444, HYPERV_REFTSC_DEVNAME); } SYSINIT(hyperv_tsc_init, SI_SUB_DRIVERS, SI_ORDER_FIRST, hyperv_tsc_tcinit, NULL); Index: head/sys/sys/vdso.h =================================================================== --- head/sys/sys/vdso.h (revision 310238) +++ head/sys/sys/vdso.h (revision 310239) @@ -1,132 +1,134 @@ /*- * Copyright 2012 Konstantin Belousov . * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_VDSO_H #define _SYS_VDSO_H #include #include struct vdso_timehands { uint32_t th_algo; uint32_t th_gen; uint64_t th_scale; uint32_t th_offset_count; uint32_t th_counter_mask; struct bintime th_offset; struct bintime th_boottime; VDSO_TIMEHANDS_MD }; struct vdso_timekeep { uint32_t tk_ver; uint32_t tk_enabled; uint32_t tk_current; struct vdso_timehands tk_th[]; }; #define VDSO_TK_CURRENT_BUSY 0xffffffff #define VDSO_TK_VER_1 0x1 #define VDSO_TK_VER_CURR VDSO_TK_VER_1 #define VDSO_TH_ALGO_1 0x1 #define VDSO_TH_ALGO_2 0x2 +#define VDSO_TH_ALGO_3 0x3 +#define VDSO_TH_ALGO_4 0x4 #ifndef _KERNEL struct timespec; struct timeval; struct timezone; int __vdso_clock_gettime(clockid_t clock_id, struct timespec *ts); int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); int __vdso_gettc(const struct vdso_timehands *vdso_th, u_int *tc); int __vdso_gettimekeep(struct vdso_timekeep **tk); #endif #ifdef _KERNEL struct timecounter; struct vdso_sv_tk { int sv_timekeep_off; int sv_timekeep_curr; uint32_t sv_timekeep_gen; }; void timekeep_push_vdso(void); uint32_t tc_fill_vdso_timehands(struct vdso_timehands *vdso_th); /* * The cpu_fill_vdso_timehands() function should fill MD-part of the * struct vdso_timehands, which is both machine- and * timecounter-depended. The return value should be 1 if fast * userspace timecounter is enabled by hardware, and 0 otherwise. The * global sysctl enable override is handled by machine-independed code * after cpu_fill_vdso_timehands() call is made. */ uint32_t cpu_fill_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc); struct vdso_sv_tk *alloc_sv_tk(void); #define VDSO_TH_NUM 4 #ifdef COMPAT_FREEBSD32 struct bintime32 { uint32_t sec; uint32_t frac[2]; }; struct vdso_timehands32 { uint32_t th_algo; uint32_t th_gen; uint32_t th_scale[2]; uint32_t th_offset_count; uint32_t th_counter_mask; struct bintime32 th_offset; struct bintime32 th_boottime; VDSO_TIMEHANDS_MD32 }; struct vdso_timekeep32 { uint32_t tk_ver; uint32_t tk_enabled; uint32_t tk_current; struct vdso_timehands32 tk_th[]; }; uint32_t tc_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32); uint32_t cpu_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32, struct timecounter *tc); struct vdso_sv_tk *alloc_sv_tk_compat32(void); #endif #endif #endif Index: head/sys/x86/include/vdso.h =================================================================== --- head/sys/x86/include/vdso.h (revision 310238) +++ head/sys/x86/include/vdso.h (revision 310239) @@ -1,50 +1,51 @@ /*- * Copyright 2012 Konstantin Belousov . * Copyright 2016 The FreeBSD Foundation. * All rights reserved. * * Portions of this software were developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _X86_VDSO_H #define _X86_VDSO_H #define VDSO_TIMEHANDS_MD \ uint32_t th_x86_shift; \ uint32_t th_x86_hpet_idx; \ uint32_t th_res[6]; #define VDSO_TH_ALGO_X86_TSC VDSO_TH_ALGO_1 #define VDSO_TH_ALGO_X86_HPET VDSO_TH_ALGO_2 +#define VDSO_TH_ALGO_X86_HVTSC VDSO_TH_ALGO_3 /* Hyper-V ref. TSC */ #ifdef _KERNEL #ifdef COMPAT_FREEBSD32 #define VDSO_TIMEHANDS_MD32 VDSO_TIMEHANDS_MD #endif #endif #endif