Index: projects/hps_head/share/man/man9/timeout.9 =================================================================== --- projects/hps_head/share/man/man9/timeout.9 (revision 302743) +++ projects/hps_head/share/man/man9/timeout.9 (revision 302744) @@ -1,889 +1,889 @@ .\" $NetBSD: timeout.9,v 1.2 1996/06/23 22:32:34 pk Exp $ .\" .\" Copyright (c) 1996 The NetBSD Foundation, Inc. .\" All rights reserved. .\" .\" This code is derived from software contributed to The NetBSD Foundation .\" by Paul Kranenburg. .\" .\" Redistribution and use in source and binary forms, with or without .\" modification, are permitted provided that the following conditions .\" are met: .\" 1. Redistributions of source code must retain the above copyright .\" notice, this list of conditions and the following disclaimer. .\" 2. Redistributions in binary form must reproduce the above copyright .\" notice, this list of conditions and the following disclaimer in the .\" documentation and/or other materials provided with the distribution. .\" .\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS .\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED .\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR .\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE .\" LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR .\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF .\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS .\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN .\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" .\" $FreeBSD$ .\" -.Dd July 8, 2015 +.Dd July 13, 2015 .Dt TIMEOUT 9 .Os .Sh NAME .Nm callout_active , .Nm callout_deactivate , .Nm callout_async_drain , .Nm callout_drain , .Nm callout_handle_init , .Nm callout_init , .Nm callout_init_mtx , .Nm callout_init_rm , .Nm callout_init_rw , .Nm callout_pending , .Nm callout_reset , .Nm callout_reset_curcpu , .Nm callout_reset_on , .Nm callout_reset_sbt , .Nm callout_reset_sbt_curcpu , .Nm callout_reset_sbt_on , .Nm callout_schedule , .Nm callout_schedule_curcpu , .Nm callout_schedule_on , .Nm callout_schedule_sbt , .Nm callout_schedule_sbt_curcpu , .Nm callout_schedule_sbt_on , .Nm callout_stop , .Nm timeout , .Nm untimeout .Nd execute a function after a specified length of time .Sh SYNOPSIS .In sys/types.h .In sys/systm.h .Bd -literal typedef void timeout_t (void *); typedef void callout_func_t (void *); .Ed .Ft int .Fn callout_active "struct callout *c" .Ft void .Fn callout_deactivate "struct callout *c" .Ft int .Fn callout_async_drain "struct callout *c" "callout_func_t *drain" .Ft int .Fn callout_drain "struct callout *c" .Ft void .Fn callout_handle_init "struct callout_handle *handle" .Bd -literal struct callout_handle handle = CALLOUT_HANDLE_INITIALIZER(&handle); .Ed .Ft void .Fn callout_init "struct callout *c" "int mpsafe" .Ft void .Fn callout_init_mtx "struct callout *c" "struct mtx *mtx" "int flags" .Ft void .Fn callout_init_rm "struct callout *c" "struct rmlock *rm" "int flags" .Ft void .Fn callout_init_rw "struct callout *c" "struct rwlock *rw" "int flags" .Ft int .Fn callout_pending "struct callout *c" .Ft int .Fn callout_reset "struct callout *c" "int ticks" "timeout_t *func" "void *arg" .Ft int .Fn callout_reset_curcpu "struct callout *c" "int ticks" "timeout_t *func" \ "void *arg" .Ft int .Fn callout_reset_on "struct callout *c" "int ticks" "timeout_t *func" \ "void *arg" "int cpu" .Ft int .Fn callout_reset_sbt "struct callout *c" "sbintime_t sbt" \ "sbintime_t pr" "timeout_t *func" "void *arg" "int flags" .Ft int .Fn callout_reset_sbt_curcpu "struct callout *c" "sbintime_t sbt" \ "sbintime_t pr" "timeout_t *func" "void *arg" "int flags" .Ft int .Fn callout_reset_sbt_on "struct callout *c" "sbintime_t sbt" \ "sbintime_t pr" "timeout_t *func" "void *arg" "int cpu" "int flags" .Ft int .Fn callout_schedule "struct callout *c" "int ticks" .Ft int .Fn callout_schedule_curcpu "struct callout *c" "int ticks" .Ft int .Fn callout_schedule_on "struct callout *c" "int ticks" "int cpu" .Ft int .Fn callout_schedule_sbt "struct callout *c" "sbintime_t sbt" \ "sbintime_t pr" "int flags" .Ft int .Fn callout_schedule_sbt_curcpu "struct callout *c" "sbintime_t sbt" \ "sbintime_t pr" "int flags" .Ft int .Fn callout_schedule_sbt_on "struct callout *c" "sbintime_t sbt" \ "sbintime_t pr" "int cpu" "int flags" .Ft int .Fn callout_stop "struct callout *c" .Ft struct callout_handle .Fn timeout "timeout_t *func" "void *arg" "int ticks" .Ft void .Fn untimeout "timeout_t *func" "void *arg" "struct callout_handle handle" .Sh DESCRIPTION The .Nm callout API is used to schedule a one-time call to an arbitrary function at a specific time in the future. Consumers of this API are required to allocate a .Ft struct callout for each pending function invocation. The .Ft struct callout stores the full state about any pending function call and must be drained by a call to .Fn callout_drain or .Fn callout_async_drain before freeing. .Sh INITIALIZATION .Ft void .Fn callout_handle_init "struct callout_handle *handle" This function is deprecated. Please use .Fn callout_init instead. This function is used to prepare a .Ft struct callout_handle before it can be used the first time. If this function is called on a pending timeout, the pending timeout cannot be cancelled and the .Fn untimeout function will return as if no timeout was pending. .Pp .Fn CALLOUT_HANDLE_INITIALIZER "&handle" This macro is deprecated. This macro is used to statically initialize a .Ft struct callout_handle . Please use .Fn callout_init instead. .Pp .Ft void .Fn callout_init "struct callout *c" "int mpsafe" This function prepares a .Ft struct callout before it can be used. This function should not be used when the callout is pending a timeout. If the .Fa mpsafe argument is non-zero, the callback function will be running unlocked and the callback is so-called "mpsafe". .Bf Sy It is the application's entire responsibility to not call any .Fn callout_xxx functions, including the .Fn callout_drain function, simultaneously on the same callout when the .Fa mpsafe argument is non-zero. Otherwise, undefined behavior can happen. Avoid simultaneous calls by obtaining an exclusive lock before calling any .Fn callout_xxx functions other than the .Fn callout_drain function. .Ef If the .Fa mpsafe argument is zero, the Giant mutex will be locked before the callback function is called. If the .Fa mpsafe argument is zero, the Giant mutex is expected to be locked when calling any .Fn callout_xxx functions which start and stop a callout other than the .Fn callout_drain function. .Pp .Ft void .Fn callout_init_mtx "struct callout *c" "struct mtx *mtx" "int flags" This function prepares a .Ft struct callout before it can be used. This function should not be used when the callout is pending a timeout. The .Fa mtx argument is a pointer to a valid spinlock type of mutex or a valid regular non-sleepable mutex which the callback subsystem will lock before calling the callback function. The specified mutex is expected to be locked when calling any .Fn callout_xxx functions which start and stop a callout other than the .Fn callout_drain function. Valid .Fa flags are: .Bl -tag -width ".Dv CALLOUT_RETURNUNLOCKED" .It Dv CALLOUT_RETURNUNLOCKED The callout function is assumed to have released the specified mutex before returning. .It Dv 0 The callout subsystem will release the specified mutex after the callout function has returned. .El .Pp .Ft void .Fn callout_init_rm "struct callout *c" "struct rmlock *rm" "int flags" This function is similar to .Fn callout_init_mtx , but it accepts a read-mostly type of lock. The read-mostly lock must not be initialized with the .Dv RM_SLEEPABLE flag. .Pp .Ft void .Fn callout_init_rw "struct callout *c" "struct rwlock *rw" "int flags" This function is similar to .Fn callout_init_mtx , but it accepts a read/write type of lock. .Sh SCHEDULING CALLOUTS .Ft struct callout_handle .Fn timeout "timeout_t *func" "void *arg" "int ticks" This function is deprecated. Please use .Fn callout_reset instead. This function schedules a call to .Fa func to take place after .Fa ticks Ns No /hz seconds. Non-positive values of .Fa ticks are silently converted to the value .Sq 1 . The .Fa func argument is a valid pointer to a function that takes a single .Fa void * argument. Upon invocation, the .Fa func function will receive .Fa arg as its only argument. The Giant lock is locked when the .Fa arg function is invoked and should not be unlocked by this function. The returned value from .Fn timeout is a .Ft struct callout_handle structure which can be used in conjunction with the .Fn untimeout function to request that a scheduled timeout be cancelled. As handles are recycled by the system, it is possible, although unlikely, that a handle from one invocation of .Fn timeout may match the handle of another invocation of .Fn timeout if both calls used the same function pointer and argument, and the first timeout is expired or cancelled before the second call. Please ensure that the function and argument pointers are unique when using this function. .Pp .Ft int .Fn callout_reset "struct callout *c" "int ticks" "callout_func_t *func" "void *arg" This function is used to schedule or re-schedule a callout. This function at first stops the callout given by the .Fa c argument, if any. Then it will start the callout given by the .Fa c argument. The relative time until the timeout callback happens is given by the .Fa ticks argument. The number of ticks in a second is defined by .Dv hz and can vary from system to system. This function returns either .Dv CALLOUT_RET_STOPPED , .Dv CALLOUT_RET_CANCELLED or .Dv CALLOUT_RET_DRAINING . If the callout was scheduled regardless of being serviced or not, .Dv CALLOUT_RET_CANCELLED is returned. If the callout was stopped and is still being serviced .Dv CALLOUT_RET_DRAINING is returned. If the callout was stopped and is no longer being serviced .Dv CALLOUT_RET_STOPPED is returned. If a lock is associated with the callout given by the .Fa c argument and it is exclusivly locked when this function is called, this function will always ensure that previous callback function, if any, is never reached. In other words, the callout will be atomically restarted. Otherwise, there is no such guarantee. The callback function is given by .Fa func and its function argument is given by .Fa arg . .Pp .Ft int .Fn callout_reset_curcpu "struct callout *c" "int ticks" "callout_func_t *func" \ "void *arg" This function works the same like the .Fn callout_reset function except the callback function given by the .Fa func argument will be executed on the same CPU which called this function. .Pp .Ft int .Fn callout_reset_on "struct callout *c" "int ticks" "callout_func_t *func" \ "void *arg" "int cpu" This function works the same like the .Fn callout_reset function except the callback function given by the .Fa func argument will be executed on the CPU given by the .Fa cpu argument. .Pp .Ft int .Fn callout_reset_sbt "struct callout *c" "sbintime_t sbt" \ "sbintime_t pr" "callout_func_t *func" "void *arg" "int flags" This function works the same like the .Fn callout_reset function except the relative or absolute time after which the timeout callback should happen is given by the .Fa sbt argument and extends for the amount of time specified in .Fa pr . This function is used when high precision timeouts are needed. If .Fa sbt specifies a time in the past, the window is adjusted to start at the current time. A non-zero value for .Fa pr allows the callout subsystem to coalesce callouts scheduled close to each other into fewer timer interrupts, reducing processing overhead and power consumption. The .Fa flags argument may be non-zero to adjust the interpretation of the .Fa sbt and .Fa pr arguments: .Bl -tag -width ".Dv C_DIRECT_EXEC" .It Dv C_ABSOLUTE Handle the .Fa sbt argument as an absolute time since boot. By default, .Fa sbt is treated as a relative amount of time, similar to .Fa ticks . .It Dv C_DIRECT_EXEC Run the handler directly from hardware interrupt context instead of from the softclock thread. This reduces latency and overhead, but puts more constraints on the callout function. Callout functions run in this context may use only spin mutexes for locking and should be as small as possible because they run with absolute priority. .It Fn C_PREL Specifies relative event time precision as binary logarithm of time interval divided by acceptable time deviation: 1 -- 1/2, 2 -- 1/4, and so on. Note that the larger of .Fa pr or this value is used as the length of the time window. Smaller values .Pq which result in larger time intervals allow the callout subsystem to aggregate more events in one timer interrupt. .It Dv C_HARDCLOCK Align the timeouts to .Fn hardclock calls if possible. .El .Pp .Ft int .Fn callout_reset_sbt_curcpu "struct callout *c" "sbintime_t sbt" \ "sbintime_t pr" "callout_func_t *func" "void *arg" "int flags" This function works like .Fn callout_reset_sbt , except the callback function given by the .Fa func argument will be executed on the CPU which called this function. .Pp .Ft int .Fn callout_reset_sbt_on "struct callout *c" "sbintime_t sbt" \ "sbintime_t pr" "callout_func_t *func" "void *arg" "int cpu" "int flags" This function works like .Fn callout_reset_sbt , except the callback function given by .Fa func will be executed on the CPU given by .Fa cpu . .Pp .Ft int .Fn callout_schedule "struct callout *c" "int ticks" This function works the same like the .Fn callout_reset function except it re-uses the callback function and the callback argument already stored in the .Pq struct callout structure. .Pp .Ft int .Fn callout_schedule_curcpu "struct callout *c" "int ticks" This function works the same like the .Fn callout_reset_curcpu function except it re-uses the callback function and the callback argument already stored in the .Pq struct callout structure. .Pp .Ft int .Fn callout_schedule_on "struct callout *c" "int ticks" "int cpu" This function works the same like the .Fn callout_reset_on function except it re-uses the callback function and the callback argument already stored in the .Pq struct callout structure. .Pp .Ft int .Fn callout_schedule_sbt "struct callout *c" "sbintime_t sbt" \ "sbintime_t pr" "int flags" This function works the same like the .Fn callout_reset_sbt function except it re-uses the callback function and the callback argument already stored in the .Pq struct callout structure. .Pp .Ft int .Fn callout_schedule_sbt_curcpu "struct callout *c" "sbintime_t sbt" \ "sbintime_t pr" "int flags" This function works the same like the .Fn callout_reset_sbt_curcpu function except it re-uses the callback function and the callback argument already stored in the .Pq struct callout structure. .Pp .Ft int .Fn callout_schedule_sbt_on "struct callout *c" "sbintime_t sbt" \ "sbintime_t pr" "int cpu" "int flags" This function works the same like the .Fn callout_reset_sbt_on function except it re-uses the callback function and the callback argument already stored in the .Pq struct callout structure. .Sh CHECKING THE STATE OF CALLOUTS .Ft int .Fn callout_pending "struct callout *c" This function returns non-zero if the callout pointed to by the .Fa c argument is pending for callback. Else this function returns zero. This function returns zero when inside the callout function if the callout is not re-scheduled. .Pp .Ft int .Fn callout_active "struct callout *c" This function is deprecated and returns non-zero if the callout pointed to by the .Fa c argument was scheduled in the past. Else this function returns zero. This function also returns zero after the .Fn callout_deactivate or the .Fn callout_stop or the .Fn callout_drain or the .Fn callout_async_drain function is called on the same callout as given by the .Fa c argument. .Pp .Ft void .Fn callout_deactivate "struct callout *c" This function is deprecated and ensures that subsequent calls to the .Fn callout_activate function returns zero until the callout is scheduled again. .Sh STOPPING CALLOUTS .Ft void .Fn untimeout "timeout_t *func" "void *arg" "struct callout_handle handle" This function is deprecated and cancels the timeout associated with the .Fa handle argument using the function pointed to by the .Fa func argument and having the .Fa arg arguments to validate the handle. If the handle does not correspond to a timeout with the function .Fa func taking the argument .Fa arg no action is taken. The .Fa handle must be initialized by a previous call to .Fn timeout , .Fn callout_handle_init or assigned the value of .Fn CALLOUT_HANDLE_INITIALIZER "&handle" before being passed to .Fn untimeout . The behavior of calling .Fn untimeout with an uninitialized handle is undefined. .Pp .Ft int .Fn callout_stop "struct callout *c" This function is used to stop a timeout function invocation associated with the callout pointed to by the .Fa c argument, in a non-blocking fashion. This function can be called multiple times in a row with no side effects, even if the callout is already stopped. This function however should not be called before the callout has been initialized. This function returns either .Dv CALLOUT_RET_STOPPED , .Dv CALLOUT_RET_CANCELLED or .Dv CALLOUT_RET_DRAINING . -If the callout is being serviced regardless of being scheduled or not, +If the callout was scheduled regardless of being serviced or not, +.Dv CALLOUT_RET_CANCELLED +is returned. +If the callout was stopped and is still being serviced .Dv CALLOUT_RET_DRAINING is returned. -If the callout is not being serviced and was already stopped +If the callout was stopped and is no longer being serviced .Dv CALLOUT_RET_STOPPED -is returned. -If the callout is not being serviced and was scheduled -.Dv CALLOUT_RET_CANCELLED is returned. If a lock is associated with the callout given by the .Fa c argument and it is exclusivly locked when this function is called, the .Fn callout_stop function will always ensure that the callback function is never reached. In other words the callout will be atomically stopped. Else there is no such guarantee. .Sh DRAINING CALLOUTS .Ft int .Fn callout_drain "struct callout *c" This function works the same like the .Fn callout_stop function except it ensures that all callback functions have returned and there are no more references to the callout pointed to by the .Fa c argument inside the callout subsystem before it returns. Also this function ensures that the lock, if any, associated with the callout is no longer being used. When this function returns, it is safe to free the callout structure pointed to by the .Fa c argument. .Pp .Ft int .Fn callout_async_drain "struct callout *c" "callout_func_t *drain" This function is non-blocking and works the same like the .Fn callout_stop function except if it returns .Dv CALLOUT_RET_DRAINING it means the callback function pointed to by the .Fa drain argument will be called back with the .Fa arg argument when all references to the callout pointed to by the .Fa c argument are gone. If this function returns .Dv CALLOUT_RET_DRAINING it should not be called again until the callback function has been called. Note that when stopping multiple callouts that use the same lock it is possible to get multiple return values of .Dv CALLOUT_RET_DRAINING and multiple calls to the .Fa drain function, depending upon which CPU's the callouts are running on. The .Fa drain function itself is called unlocked from the context of the completing callout either softclock or hardclock, just like a callout itself. If the .Fn callout_drain or .Fn callout_async_drain functions are called while an asynchronous drain is pending, previously pending asynchronous drains might get cancelled. If this function returns a value different from .Dv CALLOUT_RET_DRAINING it is safe to free the callout structure pointed to by the .Fa c argument right away. .Sh CALLOUT FUNCTION RETURN VALUES .Bl -tag -width ".Dv CALLOUT_RET_CANCELLED" .It CALLOUT_RET_DRAINING The callout cannot be stopped and needs to be drained. .It CALLOUT_RET_CANCELLED The callout was successfully stopped. .It CALLOUT_RET_STOPPED The callout was already stopped. .El .Sh CALLOUT FUNCTION RESTRICTIONS Callout functions must not sleep. They may not acquire sleepable locks, wait on condition variables, perform blocking allocation requests, or invoke any other action that might sleep. .Sh CALLOUT SUBSYSTEM INTERNALS The callout subsystem has its own set of spinlocks to protect its internal state. The callout subsystem provides a softclock thread for each CPU in the system. Callouts are assigned to a single CPU and are executed by the softclock thread for that CPU. Initially, callouts are assigned to CPU 0. Softclock threads are not pinned to their respective CPUs by default. The softclock thread for CPU 0 can be pinned to CPU 0 by setting the .Va kern.pin_default_swi loader tunable to a non-zero value. Softclock threads for CPUs other than zero can be pinned to their respective CPUs by setting the .Va kern.pin_pcpu_swi loader tunable to a non-zero value. .Sh AVOIDING RACE CONDITIONS The callout subsystem invokes callout functions from its own thread context. Without some kind of synchronization, it is possible that a callout function will be invoked concurrently with an attempt to stop or reset the callout by another thread. In particular, since callout functions typically acquire a lock as their first action, the callout function may have already been invoked, but is blocked waiting for that lock at the time that another thread tries to reset or stop the callout. .Pp There are three main techniques for addressing these synchronization concerns. The first approach is preferred as it is the simplest: .Bl -enum -offset indent .It Callouts can be associated with a specific lock when they are initialized by .Fn callout_init_mtx , .Fn callout_init_rm , or .Fn callout_init_rw . When a callout is associated with a lock, the callout subsystem acquires the lock before the callout function is invoked. This allows the callout subsystem to transparently handle races between callout cancellation, scheduling, and execution. Note that the associated lock must be acquired before calling .Fn callout_stop or one of the .Fn callout_reset or .Fn callout_schedule functions to provide this safety. .Pp A callout initialized via .Fn callout_init with .Fa mpsafe set to zero is implicitly associated with the .Va Giant mutex. If .Va Giant is held when cancelling or rescheduling the callout, then its use will prevent races with the callout function. .It The return value from .Fn callout_stop .Po or the .Fn callout_reset and .Fn callout_schedule function families .Pc indicates whether or not the callout was removed. If it is known that the callout was set and the callout function has not yet executed, then a return value of zero indicates that the callout function is about to be called. For example: .Bd -literal -offset indent if (sc->sc_flags & SCFLG_CALLOUT_RUNNING) { if (callout_stop(&sc->sc_callout) == CALLOUT_RET_CANCELLED) { sc->sc_flags &= ~SCFLG_CALLOUT_RUNNING; /* successfully stopped */ } else { /* * callout has expired and callout * function is about to be executed */ } } .Ed .It The .Fn callout_pending , .Fn callout_active and .Fn callout_deactivate macros can be used together to work around the race conditions. When a callout's timeout is set, the callout subsystem marks the callout as both .Em active and .Em pending . When the timeout time arrives, the callout subsystem begins processing the callout by first clearing the .Em pending flag. It then invokes the callout function without changing the .Em active flag, and does not clear the .Em active flag even after the callout function returns. The mechanism described here requires the callout function itself to clear the .Em active flag using the .Fn callout_deactivate macro. The .Fn callout_stop and .Fn callout_drain functions always clear both the .Em active and .Em pending flags before returning. .Pp The callout function should first check the .Em pending flag and return without action if .Fn callout_pending returns non-zero. This indicates that the callout was rescheduled using .Fn callout_reset just before the callout function was invoked. If .Fn callout_active returns zero then the callout function should also return without action. This indicates that the callout has been stopped. Finally, the callout function should call .Fn callout_deactivate to clear the .Em active flag. For example: .Bd -literal -offset indent mtx_lock(&sc->sc_mtx); if (callout_pending(&sc->sc_callout)) { /* callout was reset */ mtx_unlock(&sc->sc_mtx); return; } if (!callout_active(&sc->sc_callout)) { /* callout was stopped */ mtx_unlock(&sc->sc_mtx); return; } callout_deactivate(&sc->sc_callout); /* rest of callout function */ .Ed .Pp Together with appropriate synchronization, such as the mutex used above, this approach permits the .Fn callout_stop and .Fn callout_reset functions to be used at any time without races. For example: .Bd -literal -offset indent mtx_lock(&sc->sc_mtx); callout_stop(&sc->sc_callout); /* The callout is effectively stopped now. */ .Ed .Pp If the callout is still pending then these functions operate normally, but if processing of the callout has already begun then the tests in the callout function cause it to return without further action. Synchronization between the callout function and other code ensures that stopping or resetting the callout will never be attempted while the callout function is past the .Fn callout_deactivate call. .Pp The above technique additionally ensures that the .Em active flag always reflects whether the callout is effectively enabled or disabled. If .Fn callout_active returns false, then the callout is effectively disabled, since even if the callout subsystem is actually just about to invoke the callout function, the callout function will return without action. .El .Pp There is one final race condition that must be considered when a callout is being stopped for the last time. In this case it may not be safe to let the callout function itself detect that the callout was stopped, since it may need to access data objects that have already been destroyed or recycled. To ensure that the callout is completely finished, a call to .Fn callout_drain should be used. In particular, a callout should always be drained prior to destroying its associated lock or releasing the storage for the callout structure. .Sh LEGACY API .Bf Sy The .Fn timeout and .Fn untimeout functions are a legacy API that will be removed in a future release. New code should not use these routines. .Ef .Sh HISTORY The current timeout and untimeout routines are based on the work of .An Adam M. Costello and .An George Varghese , published in a technical report entitled .%T "Redesigning the BSD Callout and Timer Facilities" and modified slightly for inclusion in .Fx by .An Justin T. Gibbs . The original work on the data structures used in this implementation was published by .An G. Varghese and .An A. Lauck in the paper .%T "Hashed and Hierarchical Timing Wheels: Data Structures for the Efficient Implementation of a Timer Facility" in the .%B "Proceedings of the 11th ACM Annual Symposium on Operating Systems Principles" . The current implementation replaces the long standing .Bx linked list callout mechanism which offered O(n) insertion and removal running time and did not generate or require handles for untimeout operations. Index: projects/hps_head/sys/kern/kern_timeout.c =================================================================== --- projects/hps_head/sys/kern/kern_timeout.c (revision 302743) +++ projects/hps_head/sys/kern/kern_timeout.c (revision 302744) @@ -1,1534 +1,1531 @@ /*- * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_callout_profiling.h" #include "opt_ddb.h" #if defined(__arm__) #include "opt_timer.h" #endif #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #include #endif #ifdef SMP #include #endif #ifndef NO_EVENTTIMERS DPCPU_DECLARE(sbintime_t, hardclocktime); #endif SDT_PROVIDER_DEFINE(callout_execute); SDT_PROBE_DEFINE1(callout_execute, , , callout__start, "struct callout *"); SDT_PROBE_DEFINE1(callout_execute, , , callout__end, "struct callout *"); #ifdef CALLOUT_PROFILING static int avg_depth[2]; SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth[0], 0, "Average number of items examined per softclock call. Units = 1/1000"); static int avg_gcalls[2]; SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls[0], 0, "Average number of Giant callouts made per softclock call. Units = 1/1000"); static int avg_lockcalls[2]; SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls[0], 0, "Average number of lock callouts made per softclock call. Units = 1/1000"); static int avg_mpcalls[2]; SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls[0], 0, "Average number of MP callouts made per softclock call. Units = 1/1000"); SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth[1], 0, "Average number of direct callouts examined per callout_process call. " "Units = 1/1000"); SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD, &avg_lockcalls[1], 0, "Average number of lock direct callouts made per " "callout_process call. Units = 1/1000"); SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls[1], 0, "Average number of MP direct callouts made per callout_process call. " "Units = 1/1000"); #endif static int ncallout; SYSCTL_INT(_kern, OID_AUTO, ncallout, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &ncallout, 0, "Number of entries in callwheel and size of timeout() preallocation"); #ifdef RSS static int pin_default_swi = 1; static int pin_pcpu_swi = 1; #else static int pin_default_swi = 0; static int pin_pcpu_swi = 0; #endif SYSCTL_INT(_kern, OID_AUTO, pin_default_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_default_swi, 0, "Pin the default (non-per-cpu) swi (shared with PCPU 0 swi)"); SYSCTL_INT(_kern, OID_AUTO, pin_pcpu_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_pcpu_swi, 0, "Pin the per-CPU swis (except PCPU 0, which is also default"); /* * TODO: * allocate more timeout table slots when table overflows. */ u_int callwheelsize, callwheelmask; struct callout_args { sbintime_t time; /* absolute time for the event */ sbintime_t precision; /* delta allowed wrt opt */ void *arg; /* function argument */ callout_func_t *func; /* function to call */ int flags; /* flags passed to callout_reset() */ int cpu; /* CPU we're scheduled on */ }; typedef void callout_mutex_op_t(struct lock_object *); struct callout_mutex_ops { callout_mutex_op_t *lock; callout_mutex_op_t *unlock; }; enum { CALLOUT_LC_UNUSED_0, CALLOUT_LC_UNUSED_1, CALLOUT_LC_UNUSED_2, CALLOUT_LC_UNUSED_3, CALLOUT_LC_SPIN, CALLOUT_LC_MUTEX, CALLOUT_LC_RW, CALLOUT_LC_RM, }; static void callout_mutex_op_none(struct lock_object *lock) { } static void callout_mutex_lock(struct lock_object *lock) { mtx_lock((struct mtx *)lock); } static void callout_mutex_unlock(struct lock_object *lock) { mtx_unlock((struct mtx *)lock); } static void callout_mutex_lock_spin(struct lock_object *lock) { mtx_lock_spin((struct mtx *)lock); } static void callout_mutex_unlock_spin(struct lock_object *lock) { mtx_unlock_spin((struct mtx *)lock); } static void callout_rm_wlock(struct lock_object *lock) { rm_wlock((struct rmlock *)lock); } static void callout_rm_wunlock(struct lock_object *lock) { rm_wunlock((struct rmlock *)lock); } static void callout_rw_wlock(struct lock_object *lock) { rw_wlock((struct rwlock *)lock); } static void callout_rw_wunlock(struct lock_object *lock) { rw_wunlock((struct rwlock *)lock); } static const struct callout_mutex_ops callout_mutex_ops[8] = { [CALLOUT_LC_UNUSED_0] = { .lock = callout_mutex_op_none, .unlock = callout_mutex_op_none, }, [CALLOUT_LC_UNUSED_1] = { .lock = callout_mutex_op_none, .unlock = callout_mutex_op_none, }, [CALLOUT_LC_UNUSED_2] = { .lock = callout_mutex_op_none, .unlock = callout_mutex_op_none, }, [CALLOUT_LC_UNUSED_3] = { .lock = callout_mutex_op_none, .unlock = callout_mutex_op_none, }, [CALLOUT_LC_SPIN] = { .lock = callout_mutex_lock_spin, .unlock = callout_mutex_unlock_spin, }, [CALLOUT_LC_MUTEX] = { .lock = callout_mutex_lock, .unlock = callout_mutex_unlock, }, [CALLOUT_LC_RW] = { .lock = callout_rw_wlock, .unlock = callout_rw_wunlock, }, [CALLOUT_LC_RM] = { .lock = callout_rm_wlock, .unlock = callout_rm_wunlock, }, }; static inline void callout_lock_client(int c_flags, struct lock_object *c_lock) { callout_mutex_ops[CALLOUT_GET_LC(c_flags)].lock(c_lock); } static inline void callout_unlock_client(int c_flags, struct lock_object *c_lock) { callout_mutex_ops[CALLOUT_GET_LC(c_flags)].unlock(c_lock); } /* * The callout CPU exec structure represent information necessary for * describing the state of callouts currently running on the CPU and * for handling deferred callout restarts. * * In particular, the first entry of the array cc_exec_entity holds * information for callouts running from the SWI thread context, while * the second one holds information for callouts running directly from * the hardware interrupt context. */ struct cc_exec { /* * The "cc_curr" points to the currently executing callout and * is protected by the "cc_lock" spinlock. If no callback is * currently executing it is equal to "NULL". */ struct callout *cc_curr; /* * The "cc_restart_args" structure holds the argument for a * deferred callback restart and is protected by the "cc_lock" * spinlock. The structure is only valid if "cc_restart" is * "true". If "cc_restart" is "false" the information in the * "cc_restart_args" structure shall be ignored. */ struct callout_args cc_restart_args; bool cc_restart; /* * The "cc_cancel" variable allows the currently pending * callback to be atomically cancelled. This field is write * protected by the "cc_lock" spinlock. */ bool cc_cancel; /* * The "cc_drain_fn" points to a function which shall be * called when an asynchronous drain is performed. This field * is write protected by the "cc_lock" spinlock. */ callout_func_t *cc_drain_fn; /* * The following fields are used for callout profiling only: */ #ifdef CALLOUT_PROFILING int cc_depth; int cc_mpcalls; int cc_lockcalls; int cc_gcalls; #endif }; /* * There is one "struct callout_cpu" per CPU, holding all relevant * state for the callout processing thread on the individual CPU. */ struct callout_cpu { struct mtx_padalign cc_lock; struct cc_exec cc_exec_entity[2]; struct callout *cc_callout; struct callout_list *cc_callwheel; struct callout_list cc_directlist; struct callout_tailq cc_expireq; struct callout_slist cc_callfree; sbintime_t cc_firstevent; sbintime_t cc_lastscan; void *cc_cookie; char cc_ktr_event_name[20]; }; #define cc_exec_curr(cc, dir) (cc)->cc_exec_entity[(dir)].cc_curr #define cc_exec_restart_args(cc, dir) (cc)->cc_exec_entity[(dir)].cc_restart_args #define cc_exec_restart(cc, dir) (cc)->cc_exec_entity[(dir)].cc_restart #define cc_exec_cancel(cc, dir) (cc)->cc_exec_entity[(dir)].cc_cancel #define cc_exec_drain_fn(cc, dir) (cc)->cc_exec_entity[(dir)].cc_drain_fn #define cc_exec_depth(cc, dir) (cc)->cc_exec_entity[(dir)].cc_depth #define cc_exec_mpcalls(cc, dir) (cc)->cc_exec_entity[(dir)].cc_mpcalls #define cc_exec_lockcalls(cc, dir) (cc)->cc_exec_entity[(dir)].cc_lockcalls #define cc_exec_gcalls(cc, dir) (cc)->cc_exec_entity[(dir)].cc_gcalls #ifdef SMP struct callout_cpu cc_cpu[MAXCPU]; #define CPUBLOCK -1 #define CC_CPU(cpu) (&cc_cpu[(cpu)]) #define CC_SELF() CC_CPU(PCPU_GET(cpuid)) #else struct callout_cpu cc_cpu; #define CC_CPU(cpu) &cc_cpu #define CC_SELF() &cc_cpu #endif #define CC_LOCK(cc) mtx_lock_spin(&(cc)->cc_lock) #define CC_UNLOCK(cc) mtx_unlock_spin(&(cc)->cc_lock) #define CC_LOCK_ASSERT(cc) mtx_assert(&(cc)->cc_lock, MA_OWNED) static int timeout_cpu; static void callout_cpu_init(struct callout_cpu *cc, int cpu); static void softclock_call_cc(struct callout *c, struct callout_cpu *cc, const int direct); static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures"); /* * Kernel low level callwheel initialization called from cpu0 during * kernel startup: */ static void callout_callwheel_init(void *dummy) { struct callout_cpu *cc; /* * Calculate the size of the callout wheel and the preallocated * timeout() structures. * XXX: Clip callout to result of previous function of maxusers * maximum 384. This is still huge, but acceptable. */ memset(CC_CPU(0), 0, sizeof(cc_cpu)); ncallout = imin(16 + maxproc + maxfiles, 18508); TUNABLE_INT_FETCH("kern.ncallout", &ncallout); /* * Calculate callout wheel size, should be next power of two higher * than 'ncallout'. */ callwheelsize = 1 << fls(ncallout); callwheelmask = callwheelsize - 1; /* * Fetch whether we're pinning the swi's or not. */ TUNABLE_INT_FETCH("kern.pin_default_swi", &pin_default_swi); TUNABLE_INT_FETCH("kern.pin_pcpu_swi", &pin_pcpu_swi); /* * Only cpu0 handles timeout(9) and receives a preallocation. * * XXX: Once all timeout(9) consumers are converted this can * be removed. */ timeout_cpu = PCPU_GET(cpuid); cc = CC_CPU(timeout_cpu); cc->cc_callout = malloc(ncallout * sizeof(struct callout), M_CALLOUT, M_WAITOK); callout_cpu_init(cc, timeout_cpu); } SYSINIT(callwheel_init, SI_SUB_CPU, SI_ORDER_ANY, callout_callwheel_init, NULL); /* * Initialize the per-cpu callout structures. */ static void callout_cpu_init(struct callout_cpu *cc, int cpu) { struct callout *c; int i; mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE); SLIST_INIT(&cc->cc_callfree); cc->cc_callwheel = malloc(sizeof(struct callout_list) * callwheelsize, M_CALLOUT, M_WAITOK); for (i = 0; i < callwheelsize; i++) LIST_INIT(&cc->cc_callwheel[i]); TAILQ_INIT(&cc->cc_expireq); LIST_INIT(&cc->cc_directlist); cc->cc_firstevent = SBT_MAX; snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name), "callwheel cpu %d", cpu); if (cc->cc_callout == NULL) /* Only cpu0 handles timeout(9) */ return; for (i = 0; i < ncallout; i++) { c = &cc->cc_callout[i]; callout_init(c, 0); c->c_flags |= CALLOUT_LOCAL_ALLOC; SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); } } #ifdef CALLOUT_PROFILING static inline void callout_clear_stats(struct callout_cpu *cc, const int direct) { cc_exec_depth(cc, direct) = 0; cc_exec_mpcalls(cc, direct) = 0; cc_exec_lockcalls(cc, direct) = 0; cc_exec_gcalls(cc, direct) = 0; } #endif #ifdef CALLOUT_PROFILING static inline void callout_update_stats(struct callout_cpu *cc, const int direct) { avg_depth[direct] += (cc_exec_depth(cc, direct) * 1000 - avg_depth[direct]) >> 8; avg_mpcalls[direct] += (cc_exec_mpcalls(cc, direct) * 1000 - avg_mpcalls[direct]) >> 8; avg_lockcalls[direct] += (cc_exec_lockcalls(cc, direct) * 1000 - avg_lockcalls[direct]) >> 8; avg_gcalls[direct] += (cc_exec_gcalls(cc, direct) * 1000 - avg_gcalls[direct]) >> 8; } #endif /* * Start standard softclock thread. */ static void start_softclock(void *dummy) { struct callout_cpu *cc; char name[MAXCOMLEN]; #ifdef SMP int cpu; struct intr_event *ie; #endif cc = CC_CPU(timeout_cpu); snprintf(name, sizeof(name), "clock (%d)", timeout_cpu); if (swi_add(&clk_intr_event, name, softclock, cc, SWI_CLOCK, INTR_MPSAFE, &cc->cc_cookie)) panic("died while creating standard software ithreads"); if (pin_default_swi && (intr_event_bind(clk_intr_event, timeout_cpu) != 0)) { printf("%s: timeout clock couldn't be pinned to cpu %d\n", __func__, timeout_cpu); } #ifdef SMP CPU_FOREACH(cpu) { if (cpu == timeout_cpu) continue; cc = CC_CPU(cpu); cc->cc_callout = NULL; /* Only cpu0 handles timeout(9). */ callout_cpu_init(cc, cpu); snprintf(name, sizeof(name), "clock (%d)", cpu); ie = NULL; if (swi_add(&ie, name, softclock, cc, SWI_CLOCK, INTR_MPSAFE, &cc->cc_cookie)) panic("died while creating standard software ithreads"); if (pin_pcpu_swi && (intr_event_bind(ie, cpu) != 0)) { printf("%s: per-cpu clock couldn't be pinned to " "cpu %d\n", __func__, cpu); } } #endif } SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL); #define CC_HASH_SHIFT 8 static inline u_int callout_hash(sbintime_t sbt) { return (sbt >> (32 - CC_HASH_SHIFT)); } static inline u_int callout_get_bucket(sbintime_t sbt) { return (callout_hash(sbt) & callwheelmask); } void callout_process(sbintime_t now) { struct callout *tmp; struct callout *next; struct callout_cpu *cc; struct callout_list *sc; sbintime_t first, last, max, tmp_max; uint32_t lookahead; u_int firstb, lastb, nowb; cc = CC_SELF(); CC_LOCK(cc); #ifdef CALLOUT_PROFILING callout_clear_stats(cc, 1); #endif /* Compute the buckets of the last scan and present times. */ firstb = callout_hash(cc->cc_lastscan); cc->cc_lastscan = now; nowb = callout_hash(now); /* Compute the last bucket and minimum time of the bucket after it. */ if (nowb == firstb) lookahead = (SBT_1S / 16); else if (nowb - firstb == 1) lookahead = (SBT_1S / 8); else lookahead = (SBT_1S / 2); first = last = now; first += (lookahead / 2); last += lookahead; last &= (0xffffffffffffffffLLU << (32 - CC_HASH_SHIFT)); lastb = callout_hash(last) - 1; max = last; /* * Check if we wrapped around the entire wheel from the last scan. * In case, we need to scan entirely the wheel for pending callouts. */ if (lastb - firstb >= callwheelsize) { lastb = firstb + callwheelsize - 1; if (nowb - firstb >= callwheelsize) nowb = lastb; } /* Iterate callwheel from firstb to nowb and then up to lastb. */ do { sc = &cc->cc_callwheel[firstb & callwheelmask]; /* Iterate all callouts in the current bucket */ LIST_FOREACH_SAFE(tmp, sc, c_links.le, next) { /* Run the callout if present time within allowed. */ if (tmp->c_time <= now) { /* Remove callout from bucket */ LIST_REMOVE(tmp, c_links.le); if (tmp->c_flags & CALLOUT_DIRECT) { /* Insert callout into direct list */ LIST_INSERT_HEAD(&cc->cc_directlist, tmp, c_links.le); } else { /* Insert callout into expired list */ TAILQ_INSERT_TAIL(&cc->cc_expireq, tmp, c_links.tqe); tmp->c_flags |= CALLOUT_PROCESSED; } continue; } /* Skip events from distant future. */ if (tmp->c_time >= max) continue; /* * Event minimal time is bigger than present maximal * time, so it cannot be aggregated. */ if (tmp->c_time > last) { lastb = nowb; continue; } /* Update first and last time, respecting this event. */ if (tmp->c_time < first) first = tmp->c_time; tmp_max = tmp->c_time + tmp->c_precision; if (tmp_max < last) last = tmp_max; } /* Proceed with the next bucket. */ firstb++; /* * Stop if we looked after present time and found * some event we can't execute at now. * Stop if we looked far enough into the future. */ } while (((int)(firstb - lastb)) <= 0); cc->cc_firstevent = last; #ifndef NO_EVENTTIMERS cpu_new_callout(curcpu, last, first); #endif /* * Check for expired direct callouts, if any: */ while ((tmp = LIST_FIRST(&cc->cc_directlist)) != NULL) { LIST_REMOVE(tmp, c_links.le); softclock_call_cc(tmp, cc, 1); } #ifdef CALLOUT_PROFILING callout_update_stats(cc, 1); #endif CC_UNLOCK(cc); /* * "swi_sched()" acquires the thread lock and we don't want to * call it having cc_lock held because it leads to a locking * order reversal issue. */ if (!TAILQ_EMPTY(&cc->cc_expireq)) swi_sched(cc->cc_cookie, 0); } static struct callout_cpu * callout_lock(struct callout *c) { struct callout_cpu *cc; int cpu; for (;;) { cpu = c->c_cpu; #ifdef SMP if (cpu == CPUBLOCK) { cpu_spinwait(); continue; } #endif cc = CC_CPU(cpu); CC_LOCK(cc); if (cpu == c->c_cpu) break; CC_UNLOCK(cc); } return (cc); } static struct callout_cpu * callout_cc_add_locked(struct callout *c, struct callout_cpu *cc, struct callout_args *coa) { #ifndef NO_EVENTTIMERS sbintime_t sbt; #endif u_int bucket; CC_LOCK_ASSERT(cc); /* update flags before swapping locks, if any */ c->c_flags &= ~(CALLOUT_PROCESSED | CALLOUT_DIRECT); if (coa->flags & C_DIRECT_EXEC) c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING | CALLOUT_DIRECT); else c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); #ifdef SMP /* only set the "c_cpu" if the CPU number changed and is valid */ if (c->c_cpu != coa->cpu && coa->cpu > CPUBLOCK && coa->cpu <= mp_maxid && !CPU_ABSENT(coa->cpu)) { /* * Avoid interrupts and preemption firing after the * callout CPU is blocked in order to avoid deadlocks * as the new thread may be willing to acquire the * callout CPU lock: */ c->c_cpu = CPUBLOCK; spinlock_enter(); CC_UNLOCK(cc); cc = CC_CPU(coa->cpu); CC_LOCK(cc); spinlock_exit(); c->c_cpu = coa->cpu; } #endif if (coa->time < cc->cc_lastscan) coa->time = cc->cc_lastscan; c->c_arg = coa->arg; c->c_func = coa->func; c->c_time = coa->time; c->c_precision = coa->precision; bucket = callout_get_bucket(c->c_time); CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x", c, (int)(c->c_precision >> 32), (u_int)(c->c_precision & 0xffffffff)); LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le); #ifndef NO_EVENTTIMERS /* * Inform the eventtimers(4) subsystem there's a new callout * that has been inserted, but only if really required. */ if (SBT_MAX - c->c_time < c->c_precision) c->c_precision = SBT_MAX - c->c_time; sbt = c->c_time + c->c_precision; if (sbt < cc->cc_firstevent) { cc->cc_firstevent = sbt; cpu_new_callout(c->c_cpu, sbt, c->c_time); } #endif return (cc); } static inline void callout_cc_del(struct callout *c, struct callout_cpu *cc) { c->c_func = NULL; SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); } static inline void softclock_call_cc(struct callout *c, struct callout_cpu *cc, const int direct) { callout_func_t *c_func; void *c_arg; struct lock_object *c_lock; int c_flags; #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) sbintime_t sbt1, sbt2; struct timespec ts2; static sbintime_t maxdt = 2 * SBT_1MS; /* 2 msec */ static timeout_t *lastfunc; #endif KASSERT((c->c_flags & (CALLOUT_PENDING | CALLOUT_ACTIVE)) == (CALLOUT_PENDING | CALLOUT_ACTIVE), ("softclock_call_cc: pend|act %p %x", c, c->c_flags)); c_lock = c->c_lock; c_func = c->c_func; c_arg = c->c_arg; c_flags = c->c_flags; /* remove pending bit */ c->c_flags &= ~CALLOUT_PENDING; /* reset our local state */ cc_exec_curr(cc, direct) = c; cc_exec_restart(cc, direct) = false; cc_exec_drain_fn(cc, direct) = NULL; if (c_lock != NULL) { cc_exec_cancel(cc, direct) = false; CC_UNLOCK(cc); /* unlocked region for switching locks */ callout_lock_client(c_flags, c_lock); /* * Check if the callout may have been cancelled while * we were switching locks. Even though the callout is * specifying a lock, it might not be certain this * lock is locked when starting and stopping callouts. */ CC_LOCK(cc); if (cc_exec_cancel(cc, direct)) { callout_unlock_client(c_flags, c_lock); goto skip_cc_locked; } if (c_lock == &Giant.lock_object) { #ifdef CALLOUT_PROFILING cc_exec_gcalls(cc, direct)++; #endif CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p", c, c_func, c_arg); } else { #ifdef CALLOUT_PROFILING cc_exec_lockcalls(cc, direct)++; #endif CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p", c, c_func, c_arg); } } else { #ifdef CALLOUT_PROFILING cc_exec_mpcalls(cc, direct)++; #endif CTR3(KTR_CALLOUT, "callout %p func %p arg %p", c, c_func, c_arg); } /* The callout cannot be stopped now! */ cc_exec_cancel(cc, direct) = true; CC_UNLOCK(cc); /* unlocked region */ KTR_STATE3(KTR_SCHED, "callout", cc->cc_ktr_event_name, "running", "func:%p", c_func, "arg:%p", c_arg, "direct:%d", direct); #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) sbt1 = sbinuptime(); #endif THREAD_NO_SLEEPING(); SDT_PROBE1(callout_execute, , , callout__start, c); c_func(c_arg); SDT_PROBE1(callout_execute, , , callout__end, c); THREAD_SLEEPING_OK(); #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) sbt2 = sbinuptime(); sbt2 -= sbt1; if (sbt2 > maxdt) { if (lastfunc != c_func || sbt2 > maxdt * 2) { ts2 = sbttots(sbt2); printf( "Expensive timeout(9) function: %p(%p) %jd.%09ld s\n", c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec); } maxdt = sbt2; lastfunc = c_func; } #endif KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle"); CTR1(KTR_CALLOUT, "callout %p finished", c); /* * At this point the callback structure might have been freed, * so we need to check the previously copied value of * "c->c_flags": */ if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0) callout_unlock_client(c_flags, c_lock); CC_LOCK(cc); skip_cc_locked: KASSERT(cc_exec_curr(cc, direct) == c, ("mishandled cc_curr")); cc_exec_curr(cc, direct) = NULL; /* Check if there is anything which needs draining */ if (cc_exec_drain_fn(cc, direct) != NULL) { /* * Unlock the CPU callout last, so that any use of * structures belonging to the callout are complete: */ CC_UNLOCK(cc); /* call drain function unlocked */ cc_exec_drain_fn(cc, direct)(c_arg); CC_LOCK(cc); } else if (c_flags & CALLOUT_LOCAL_ALLOC) { /* return callout back to freelist */ callout_cc_del(c, cc); } else if (cc_exec_restart(cc, direct)) { struct callout_cpu *new_cc; /* [re-]schedule callout, if any */ new_cc = callout_cc_add_locked(c, cc, &cc_exec_restart_args(cc, direct)); if (new_cc != cc) { /* switch locks back again */ CC_UNLOCK(new_cc); CC_LOCK(cc); } } } /* * The callout mechanism is based on the work of Adam M. Costello and * George Varghese, published in a technical report entitled "Redesigning * the BSD Callout and Timer Facilities" and modified slightly for inclusion * in FreeBSD by Justin T. Gibbs. The original work on the data structures * used in this implementation was published by G. Varghese and T. Lauck in * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for * the Efficient Implementation of a Timer Facility" in the Proceedings of * the 11th ACM Annual Symposium on Operating Systems Principles, * Austin, Texas Nov 1987. */ /* * Software (low priority) clock interrupt. * Run periodic events from timeout queue. */ void softclock(void *arg) { struct callout_cpu *cc; struct callout *c; cc = (struct callout_cpu *)arg; CC_LOCK(cc); #ifdef CALLOUT_PROFILING callout_clear_stats(cc, 0); #endif while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) { TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); softclock_call_cc(c, cc, 0); } #ifdef CALLOUT_PROFILING callout_update_stats(cc, 0); #endif CC_UNLOCK(cc); } /* * timeout -- * Execute a function after a specified length of time. * * untimeout -- * Cancel previous timeout function call. * * callout_handle_init -- * Initialize a handle so that using it with untimeout is benign. * * See AT&T BCI Driver Reference Manual for specification. This * implementation differs from that one in that although an * identification value is returned from timeout, the original * arguments to timeout as well as the identifier are used to * identify entries for untimeout. */ struct callout_handle timeout(timeout_t *ftn, void *arg, int to_ticks) { struct callout_cpu *cc; struct callout *new; struct callout_handle handle; cc = CC_CPU(timeout_cpu); CC_LOCK(cc); /* Fill in the next free callout structure. */ new = SLIST_FIRST(&cc->cc_callfree); if (new == NULL) /* XXX Attempt to malloc first */ panic("timeout table full"); SLIST_REMOVE_HEAD(&cc->cc_callfree, c_links.sle); handle.callout = new; CC_UNLOCK(cc); callout_reset(new, to_ticks, ftn, arg); return (handle); } void untimeout(timeout_t *ftn, void *arg, struct callout_handle handle) { struct callout_cpu *cc; bool match; /* * Check for a handle that was initialized * by callout_handle_init, but never used * for a real timeout. */ if (handle.callout == NULL) return; cc = callout_lock(handle.callout); match = (handle.callout->c_func == ftn && handle.callout->c_arg == arg); CC_UNLOCK(cc); if (match) callout_stop(handle.callout); } void callout_handle_init(struct callout_handle *handle) { handle->callout = NULL; } #ifdef KTR static const char * callout_retvalstring(int retval) { switch (retval) { case CALLOUT_RET_DRAINING: return ("callout cannot be stopped and needs drain"); case CALLOUT_RET_CANCELLED: return ("callout was successfully stopped"); default: return ("callout was already stopped"); } } #endif static int callout_restart_async(struct callout *c, struct callout_args *coa, callout_func_t *drain_fn) { struct callout_cpu *cc; int retval; int direct; cc = callout_lock(c); /* Figure out if the callout is direct or not */ direct = ((c->c_flags & CALLOUT_DIRECT) != 0); /* * Check if the callback is currently scheduled for * completion: */ if (cc_exec_curr(cc, direct) == c) { /* * Try to prevent the callback from running by setting * the "cc_cancel" variable to "true". */ if (drain_fn != NULL) { /* set drain function, if any */ cc_exec_drain_fn(cc, direct) = drain_fn; cc_exec_cancel(cc, direct) = true; retval = CALLOUT_RET_DRAINING; } else if (cc_exec_cancel(cc, direct) == false || cc_exec_restart(cc, direct) == true) { cc_exec_cancel(cc, direct) = true; - if (coa != NULL || c->c_lock != NULL) - retval = CALLOUT_RET_CANCELLED; - else - retval = CALLOUT_RET_DRAINING; + retval = CALLOUT_RET_CANCELLED; } else { retval = CALLOUT_RET_DRAINING; } /* * Prevent callback restart if "callout_drain_xxx()" * is being called or we are stopping the callout or * the callback was preallocated by us: */ if (cc_exec_drain_fn(cc, direct) != NULL || coa == NULL || (c->c_flags & CALLOUT_LOCAL_ALLOC) != 0) { CTR4(KTR_CALLOUT, "%s: %p func %p arg %p", callout_retvalstring(retval), c, c->c_func, c->c_arg); /* clear old flags, if any */ c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING | CALLOUT_PROCESSED); /* clear restart flag, if any */ cc_exec_restart(cc, direct) = false; } else { CTR4(KTR_CALLOUT, "%s: %p func %p arg %p", callout_retvalstring(retval), c, c->c_func, c->c_arg); /* get us back into the game */ c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); c->c_flags &= ~CALLOUT_PROCESSED; /* enable deferred restart */ cc_exec_restart(cc, direct) = true; /* store arguments for the deferred restart, if any */ cc_exec_restart_args(cc, direct) = *coa; } } else { /* stop callout */ if (c->c_flags & CALLOUT_PENDING) { /* * The callback has not yet been executed, and * we simply just need to unlink it: */ if ((c->c_flags & CALLOUT_PROCESSED) == 0) { LIST_REMOVE(c, c_links.le); } else { TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); } retval = CALLOUT_RET_CANCELLED; } else { retval = CALLOUT_RET_STOPPED; } CTR4(KTR_CALLOUT, "%s: %p func %p arg %p", callout_retvalstring(retval), c, c->c_func, c->c_arg); /* [re-]schedule callout, if any */ if (coa != NULL) { cc = callout_cc_add_locked(c, cc, coa); } else { /* clear old flags, if any */ c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING | CALLOUT_PROCESSED); /* return callback to pre-allocated list, if any */ if ((c->c_flags & CALLOUT_LOCAL_ALLOC) && retval != CALLOUT_RET_STOPPED) { callout_cc_del(c, cc); } } } CC_UNLOCK(cc); return (retval); } /* * New interface; clients allocate their own callout structures. * * callout_reset() - establish or change a timeout * callout_stop() - disestablish a timeout * callout_init() - initialize a callout structure so that it can * safely be passed to callout_reset() and callout_stop() * * defines three convenience macros: * * callout_active() - returns truth if callout has not been stopped, * drained, or deactivated since the last time the callout was * reset. * callout_pending() - returns truth if callout is still waiting for timeout * callout_deactivate() - marks the callout as having been serviced */ int callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision, callout_func_t *ftn, void *arg, int cpu, int flags) { struct callout_args coa; /* store arguments for callout add function */ coa.func = ftn; coa.arg = arg; coa.precision = precision; coa.flags = flags; coa.cpu = cpu; /* compute the rest of the arguments needed */ if (coa.flags & C_ABSOLUTE) { coa.time = sbt; } else { sbintime_t pr; if ((coa.flags & C_HARDCLOCK) && (sbt < tick_sbt)) sbt = tick_sbt; if ((coa.flags & C_HARDCLOCK) || #ifdef NO_EVENTTIMERS sbt >= sbt_timethreshold) { coa.time = getsbinuptime(); /* Add safety belt for the case of hz > 1000. */ coa.time += tc_tick_sbt - tick_sbt; #else sbt >= sbt_tickthreshold) { /* * Obtain the time of the last hardclock() call on * this CPU directly from the kern_clocksource.c. * This value is per-CPU, but it is equal for all * active ones. */ #ifdef __LP64__ coa.time = DPCPU_GET(hardclocktime); #else spinlock_enter(); coa.time = DPCPU_GET(hardclocktime); spinlock_exit(); #endif #endif if ((coa.flags & C_HARDCLOCK) == 0) coa.time += tick_sbt; } else coa.time = sbinuptime(); if (SBT_MAX - coa.time < sbt) coa.time = SBT_MAX; else coa.time += sbt; pr = ((C_PRELGET(coa.flags) < 0) ? sbt >> tc_precexp : sbt >> C_PRELGET(coa.flags)); if (pr > coa.precision) coa.precision = pr; } /* get callback started, if any */ return (callout_restart_async(c, &coa, NULL)); } /* * Common idioms that can be optimized in the future. */ int callout_schedule_on(struct callout *c, int to_ticks, int cpu) { return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, cpu); } int callout_schedule(struct callout *c, int to_ticks) { return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, c->c_cpu); } int callout_stop(struct callout *c) { /* get callback stopped, if any */ return (callout_restart_async(c, NULL, NULL)); } static void callout_drain_function(void *arg) { wakeup(&callout_drain_function); } int callout_async_drain(struct callout *c, callout_func_t *fn) { /* get callback stopped, if any */ return (callout_restart_async(c, NULL, fn)); } int callout_drain(struct callout *c) { int retval; WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Draining callout"); callout_lock_client(c->c_flags, c->c_lock); /* at this point the "c->c_cpu" field is not changing */ retval = callout_async_drain(c, &callout_drain_function); if (retval == CALLOUT_RET_DRAINING) { struct callout_cpu *cc; int direct; CTR3(KTR_CALLOUT, "need to drain %p func %p arg %p", c, c->c_func, c->c_arg); cc = callout_lock(c); direct = ((c->c_flags & CALLOUT_DIRECT) != 0); /* * We've gotten our callout CPU lock, it is safe to * drop the initial lock: */ callout_unlock_client(c->c_flags, c->c_lock); /* Wait for drain to complete */ while (cc_exec_curr(cc, direct) == c) { msleep_spin(&callout_drain_function, (struct mtx *)&cc->cc_lock, "codrain", 0); } CC_UNLOCK(cc); } else { callout_unlock_client(c->c_flags, c->c_lock); } CTR4(KTR_CALLOUT, "%s: %p func %p arg %p", callout_retvalstring(retval), c, c->c_func, c->c_arg); return (retval); } void callout_init(struct callout *c, int mpsafe) { if (mpsafe) { _callout_init_lock(c, NULL, CALLOUT_RETURNUNLOCKED); } else { _callout_init_lock(c, &Giant.lock_object, 0); } } void _callout_init_lock(struct callout *c, struct lock_object *lock, int flags) { bzero(c, sizeof *c); KASSERT((flags & ~CALLOUT_RETURNUNLOCKED) == 0, ("callout_init_lock: bad flags 0x%08x", flags)); flags &= CALLOUT_RETURNUNLOCKED; if (lock != NULL) { struct lock_class *class = LOCK_CLASS(lock); if (class == &lock_class_mtx_sleep) flags |= CALLOUT_SET_LC(CALLOUT_LC_MUTEX); else if (class == &lock_class_mtx_spin) flags |= CALLOUT_SET_LC(CALLOUT_LC_SPIN); else if (class == &lock_class_rm) flags |= CALLOUT_SET_LC(CALLOUT_LC_RM); else if (class == &lock_class_rw) flags |= CALLOUT_SET_LC(CALLOUT_LC_RW); else panic("callout_init_lock: Unsupported lock class '%s'\n", class->lc_name); } else { flags |= CALLOUT_SET_LC(CALLOUT_LC_UNUSED_0); } c->c_lock = lock; c->c_flags = flags; c->c_cpu = timeout_cpu; } #ifdef APM_FIXUP_CALLTODO /* * Adjust the kernel calltodo timeout list. This routine is used after * an APM resume to recalculate the calltodo timer list values with the * number of hz's we have been sleeping. The next hardclock() will detect * that there are fired timers and run softclock() to execute them. * * Please note, I have not done an exhaustive analysis of what code this * might break. I am motivated to have my select()'s and alarm()'s that * have expired during suspend firing upon resume so that the applications * which set the timer can do the maintanence the timer was for as close * as possible to the originally intended time. Testing this code for a * week showed that resuming from a suspend resulted in 22 to 25 timers * firing, which seemed independent on whether the suspend was 2 hours or * 2 days. Your milage may vary. - Ken Key */ void adjust_timeout_calltodo(struct timeval *time_change) { register struct callout *p; unsigned long delta_ticks; /* * How many ticks were we asleep? * (stolen from tvtohz()). */ /* Don't do anything */ if (time_change->tv_sec < 0) return; else if (time_change->tv_sec <= LONG_MAX / 1000000) delta_ticks = howmany(time_change->tv_sec * 1000000 + time_change->tv_usec, tick) + 1; else if (time_change->tv_sec <= LONG_MAX / hz) delta_ticks = time_change->tv_sec * hz + howmany(time_change->tv_usec, tick) + 1; else delta_ticks = LONG_MAX; if (delta_ticks > INT_MAX) delta_ticks = INT_MAX; /* * Now rip through the timer calltodo list looking for timers * to expire. */ /* don't collide with softclock() */ CC_LOCK(cc); for (p = calltodo.c_next; p != NULL; p = p->c_next) { p->c_time -= delta_ticks; /* Break if the timer had more time on it than delta_ticks */ if (p->c_time > 0) break; /* take back the ticks the timer didn't use (p->c_time <= 0) */ delta_ticks = -p->c_time; } CC_UNLOCK(cc); return; } #endif /* APM_FIXUP_CALLTODO */ static int flssbt(sbintime_t sbt) { sbt += (uint64_t)sbt >> 1; if (sizeof(long) >= sizeof(sbintime_t)) return (flsl(sbt)); if (sbt >= SBT_1S) return (flsl(((uint64_t)sbt) >> 32) + 32); return (flsl(sbt)); } /* * Dump immediate statistic snapshot of the scheduled callouts. */ static int sysctl_kern_callout_stat(SYSCTL_HANDLER_ARGS) { struct callout *tmp; struct callout_cpu *cc; struct callout_list *sc; sbintime_t maxpr, maxt, medpr, medt, now, spr, st, t; int ct[64], cpr[64], ccpbk[32]; int error, val, i, count, tcum, pcum, maxc, c, medc; #ifdef SMP int cpu; #endif val = 0; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); count = maxc = 0; st = spr = maxt = maxpr = 0; bzero(ccpbk, sizeof(ccpbk)); bzero(ct, sizeof(ct)); bzero(cpr, sizeof(cpr)); now = sbinuptime(); #ifdef SMP CPU_FOREACH(cpu) { cc = CC_CPU(cpu); #else cc = CC_CPU(timeout_cpu); #endif CC_LOCK(cc); for (i = 0; i < callwheelsize; i++) { sc = &cc->cc_callwheel[i]; c = 0; LIST_FOREACH(tmp, sc, c_links.le) { c++; t = tmp->c_time - now; if (t < 0) t = 0; st += t / SBT_1US; spr += tmp->c_precision / SBT_1US; if (t > maxt) maxt = t; if (tmp->c_precision > maxpr) maxpr = tmp->c_precision; ct[flssbt(t)]++; cpr[flssbt(tmp->c_precision)]++; } if (c > maxc) maxc = c; ccpbk[fls(c + c / 2)]++; count += c; } CC_UNLOCK(cc); #ifdef SMP } #endif for (i = 0, tcum = 0; i < 64 && tcum < count / 2; i++) tcum += ct[i]; medt = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0; for (i = 0, pcum = 0; i < 64 && pcum < count / 2; i++) pcum += cpr[i]; medpr = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0; for (i = 0, c = 0; i < 32 && c < count / 2; i++) c += ccpbk[i]; medc = (i >= 2) ? (1 << (i - 2)) : 0; printf("Scheduled callouts statistic snapshot:\n"); printf(" Callouts: %6d Buckets: %6d*%-3d Bucket size: 0.%06ds\n", count, callwheelsize, mp_ncpus, 1000000 >> CC_HASH_SHIFT); printf(" C/Bk: med %5d avg %6d.%06jd max %6d\n", medc, count / callwheelsize / mp_ncpus, (uint64_t)count * 1000000 / callwheelsize / mp_ncpus % 1000000, maxc); printf(" Time: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n", medt / SBT_1S, (medt & 0xffffffff) * 1000000 >> 32, (st / count) / 1000000, (st / count) % 1000000, maxt / SBT_1S, (maxt & 0xffffffff) * 1000000 >> 32); printf(" Prec: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n", medpr / SBT_1S, (medpr & 0xffffffff) * 1000000 >> 32, (spr / count) / 1000000, (spr / count) % 1000000, maxpr / SBT_1S, (maxpr & 0xffffffff) * 1000000 >> 32); printf(" Distribution: \tbuckets\t time\t tcum\t" " prec\t pcum\n"); for (i = 0, tcum = pcum = 0; i < 64; i++) { if (ct[i] == 0 && cpr[i] == 0) continue; t = (i != 0) ? (((sbintime_t)1) << (i - 1)) : 0; tcum += ct[i]; pcum += cpr[i]; printf(" %10jd.%06jds\t 2**%d\t%7d\t%7d\t%7d\t%7d\n", t / SBT_1S, (t & 0xffffffff) * 1000000 >> 32, i - 1 - (32 - CC_HASH_SHIFT), ct[i], tcum, cpr[i], pcum); } return (error); } SYSCTL_PROC(_kern, OID_AUTO, callout_stat, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, sysctl_kern_callout_stat, "I", "Dump immediate statistic snapshot of the scheduled callouts"); #ifdef DDB static void _show_callout(struct callout *c) { db_printf("callout %p\n", c); #define C_DB_PRINTF(f, e) db_printf(" %s = " f "\n", #e, c->e); db_printf(" &c_links = %p\n", &(c->c_links)); C_DB_PRINTF("%" PRId64, c_time); C_DB_PRINTF("%" PRId64, c_precision); C_DB_PRINTF("%p", c_arg); C_DB_PRINTF("%p", c_func); C_DB_PRINTF("%p", c_lock); C_DB_PRINTF("%#x", c_flags); C_DB_PRINTF("%d", c_cpu); #undef C_DB_PRINTF } DB_SHOW_COMMAND(callout, db_show_callout) { if (!have_addr) { db_printf("usage: show callout \n"); return; } _show_callout((struct callout *)addr); } #endif /* DDB */