Page MenuHomeFreeBSD

D30859.1778512672.diff
No OneTemporary

Size
17 KB
Referenced Files
None
Subscribers
None

D30859.1778512672.diff

diff --git a/sys/kern/kern_racct.c b/sys/kern/kern_racct.c
--- a/sys/kern/kern_racct.c
+++ b/sys/kern/kern_racct.c
@@ -113,6 +113,10 @@
"struct proc *", "const struct buf *", "int");
SDT_PROBE_DEFINE3(racct, , rusage, add__cred,
"struct ucred *", "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, , rusage, add__cred__checked,
+ "struct ucred *", "int", "uint64_t");
+SDT_PROBE_DEFINE3(racct, , rusage, add__cred__checked__failure,
+ "struct ucred *", "int", "uint64_t");
SDT_PROBE_DEFINE3(racct, , rusage, add__force,
"struct proc *", "int", "uint64_t");
SDT_PROBE_DEFINE3(racct, , rusage, set,
@@ -628,7 +632,6 @@
void
racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
{
-
if (!racct_enable)
return;
@@ -639,6 +642,46 @@
RACCT_UNLOCK();
}
+static int
+racct_add_cred_checked_locked(struct ucred *cred, int resource, uint64_t amount)
+{
+#ifdef RCTL
+ int error;
+#endif
+
+ ASSERT_RACCT_ENABLED();
+
+#ifdef RCTL
+ error = rctl_enforce_cred(cred, resource, amount);
+ if (error && RACCT_IS_DENIABLE(resource)) {
+ SDT_PROBE3(racct, , rusage, add__cred__checked__failure, cred,
+ resource, amount);
+ return (error);
+ }
+#endif
+ racct_add_cred_locked(cred, resource, amount);
+
+ return (0);
+}
+
+/*
+ * Increase allocation of 'resource' by 'amount' for credential 'cred'.
+ * Return 0 if it's below limits, or errno, if it's not.
+ */
+int
+racct_add_cred_checked(struct ucred *cred, int resource, uint64_t amount)
+{
+ int error;
+ if (!racct_enable)
+ return (0);
+
+ SDT_PROBE3(racct, , rusage, add__cred__checked, cred, resource, amount);
+ RACCT_LOCK();
+ error = racct_add_cred_checked_locked(cred, resource, amount);
+ RACCT_UNLOCK();
+ return (error);
+}
+
/*
* Account for disk IO resource consumption. Checks for limits,
* but never fails, due to disk limits being undeniable.
diff --git a/sys/kern/kern_rctl.c b/sys/kern/kern_rctl.c
--- a/sys/kern/kern_rctl.c
+++ b/sys/kern/kern_rctl.c
@@ -221,6 +221,8 @@
static int rctl_rule_fully_specified(const struct rctl_rule *rule);
static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
+static int rctl_enforce_racct(struct racct *racct, int resource, uint64_t amount,
+ struct ucred *cred);
static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
@@ -332,16 +334,13 @@
}
static struct racct *
-rctl_proc_rule_to_racct(const struct proc *p, const struct rctl_rule *rule)
+rctl_proc_rule_to_racct_cred(const struct ucred *cred,
+ const struct rctl_rule *rule)
{
- struct ucred *cred = p->p_ucred;
-
- ASSERT_RACCT_ENABLED();
- RACCT_LOCK_ASSERT();
+ KASSERT(rule->rr_per != RCTL_SUBJECT_TYPE_PROCESS,
+ ("rctl_proc_rule_to_racct_cred: cannot get process racct"));
switch (rule->rr_per) {
- case RCTL_SUBJECT_TYPE_PROCESS:
- return (p->p_racct);
case RCTL_SUBJECT_TYPE_USER:
return (cred->cr_ruidinfo->ui_racct);
case RCTL_SUBJECT_TYPE_LOGINCLASS:
@@ -353,6 +352,22 @@
}
}
+static struct racct *
+rctl_proc_rule_to_racct(const struct proc *p, const struct rctl_rule *rule)
+{
+ struct ucred *cred = p->p_ucred;
+
+ ASSERT_RACCT_ENABLED();
+ RACCT_LOCK_ASSERT();
+
+ switch (rule->rr_per) {
+ case RCTL_SUBJECT_TYPE_PROCESS:
+ return (p->p_racct);
+ default:
+ return (rctl_proc_rule_to_racct_cred(cred, rule));
+ }
+}
+
/*
* Return the amount of resource that can be allocated by 'p' before
* hitting 'rule'.
@@ -372,6 +387,26 @@
return (available);
}
+/*
+ * Return the amount of resource that can be allocated by 'cred' before
+ * hitting 'rule'.
+ */
+static int64_t
+rctl_available_resource_cred(const struct ucred *cred,
+ const struct rctl_rule *rule)
+{
+ const struct racct *racct;
+ int64_t available;
+
+ ASSERT_RACCT_ENABLED();
+ RACCT_LOCK_ASSERT();
+
+ racct = rctl_proc_rule_to_racct_cred(cred, rule);
+ available = rule->rr_amount - racct->r_resources[rule->rr_resource];
+
+ return (available);
+}
+
/*
* Called every second for proc, uidinfo, loginclass, and jail containers.
* If the limit isn't exceeded, it decreases the usage amount to zero.
@@ -489,6 +524,258 @@
return (a * b);
}
+/*
+ * Check whether the credential 'cred' can allocate 'amount' of 'resource' in
+ * addition to what it keeps allocated now. Returns non-zero if the allocation
+ * should be denied, 0 otherwise. Does not enforce rules whose actions require
+ * a process, i.e., throttle and sig*.
+ */
+int
+rctl_enforce_cred(struct ucred *cred, int resource, uint64_t amount)
+{
+ int error = 0;
+ error |= rctl_enforce_racct(cred->cr_ruidinfo->ui_racct,
+ resource, amount, cred);
+ error |= rctl_enforce_racct(cred->cr_loginclass->lc_racct,
+ resource, amount, cred);
+ error |= rctl_enforce_racct(cred->cr_prison->pr_prison_racct->prr_racct,
+ resource, amount, cred);
+ return (error);
+}
+
+static void
+rctl_log_handler(struct rctl_rule *rule, struct proc *p, struct ucred *cred)
+{
+ static struct timeval log_lasttime;
+ static int log_curtime = 0;
+ struct sbuf sb;
+ char *buf;
+
+ if (!ppsratecheck(&log_lasttime, &log_curtime,
+ rctl_log_rate_limit))
+ return;
+
+ if (p) {
+ cred = p->p_ucred;
+ }
+ buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
+ if (buf == NULL) {
+ printf("rctl_enforce_racct: out of memory\n");
+ return;
+ }
+ sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
+ rctl_rule_to_sbuf(&sb, rule);
+ sbuf_finish(&sb);
+ if (p) {
+ printf("rctl: rule \"%s\" matched by pid %d "
+ "(%s), uid %d, jail %s\n", sbuf_data(&sb),
+ p->p_pid, p->p_comm, p->p_ucred->cr_uid,
+ p->p_ucred->cr_prison->pr_prison_racct->prr_name);
+ }
+ else {
+ printf("rctl: rule \"%s\" matched by uid %d, jail %s\n",
+ sbuf_data(&sb), cred->cr_uid,
+ cred->cr_prison->pr_prison_racct->prr_name);
+ }
+ sbuf_delete(&sb);
+ free(buf, M_RCTL);
+}
+
+static void
+rctl_devctl_handler(struct rctl_rule *rule, struct proc *p, struct ucred *cred)
+{
+ static struct timeval devctl_lasttime;
+ static int devctl_curtime = 0;
+ struct sbuf sb;
+ char *buf;
+
+ if (!ppsratecheck(&devctl_lasttime, &devctl_curtime,
+ rctl_devctl_rate_limit))
+ return;
+
+ if (p) {
+ cred = p->p_ucred;
+ }
+ buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
+ if (buf == NULL) {
+ printf("rctl_enforce_racct: out of memory\n");
+ return;
+ }
+ sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
+ sbuf_printf(&sb, "rule=");
+ rctl_rule_to_sbuf(&sb, rule);
+ if (p) {
+ sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
+ p->p_pid, p->p_ucred->cr_ruid,
+ p->p_ucred->cr_prison->pr_prison_racct->prr_name);
+ }
+ else {
+ sbuf_printf(&sb, " ruid=%d jail=%s", cred->cr_ruid,
+ cred->cr_prison->pr_prison_racct->prr_name);
+ }
+ sbuf_finish(&sb);
+ devctl_notify("RCTL", "rule", "matched",
+ sbuf_data(&sb));
+ sbuf_delete(&sb);
+ free(buf, M_RCTL);
+}
+
+static void
+rctl_throttle_handler(struct rctl_rule *rule, int resource, struct proc * p)
+{
+ uint64_t sleep_ms, sleep_ratio;
+ int64_t available;
+
+ if (rule->rr_amount == 0) {
+ racct_proc_throttle(p, rctl_throttle_max);
+ return;
+ }
+
+ /*
+ * Make the process sleep for a fraction of second
+ * proportional to the ratio of process' resource
+ * utilization compared to the limit. The point is
+ * to penalize resource hogs: processes that consume
+ * more of the available resources sleep for longer.
+ *
+ * We're trying to defer division until the very end,
+ * to minimize the rounding effects. The following
+ * calculation could have been written in a clearer
+ * way like this:
+ *
+ * sleep_ms = hz * p->p_racct->r_resources[resource] /
+ * rule->rr_amount;
+ * sleep_ms *= rctl_throttle_pct / 100;
+ * if (sleep_ms < rctl_throttle_min)
+ * sleep_ms = rctl_throttle_min;
+ *
+ */
+ sleep_ms = xmul(hz, p->p_racct->r_resources[resource]);
+ sleep_ms = xmul(sleep_ms, rctl_throttle_pct) / 100;
+ if (sleep_ms < rctl_throttle_min * rule->rr_amount)
+ sleep_ms = rctl_throttle_min * rule->rr_amount;
+
+ /*
+ * Multiply that by the ratio of the resource
+ * consumption for the container compared to the limit,
+ * squared. In other words, a process in a container
+ * that is two times over the limit will be throttled
+ * four times as much for hitting the same rule. The
+ * point is to penalize processes more if the container
+ * itself (eg certain UID or jail) is above the limit.
+ */
+ available = rctl_available_resource(p, rule);
+ if (available < 0)
+ sleep_ratio = -available / rule->rr_amount;
+ else
+ sleep_ratio = 0;
+ sleep_ratio = xmul(sleep_ratio, sleep_ratio);
+ sleep_ratio = xmul(sleep_ratio, rctl_throttle_pct2) / 100;
+ sleep_ms = xadd(sleep_ms, xmul(sleep_ms, sleep_ratio));
+
+ /*
+ * Finally the division.
+ */
+ sleep_ms /= rule->rr_amount;
+
+ if (sleep_ms > rctl_throttle_max)
+ sleep_ms = rctl_throttle_max;
+#if 0
+ printf("%s: pid %d (%s), %jd of %jd, will sleep for %ju ms (ratio %ju, available %jd)\n",
+ __func__, p->p_pid, p->p_comm,
+ p->p_racct->r_resources[resource],
+ rule->rr_amount, (uintmax_t)sleep_ms,
+ (uintmax_t)sleep_ratio, (intmax_t)available);
+#endif
+
+ KASSERT(sleep_ms >= rctl_throttle_min, ("%s: %ju < %d\n",
+ __func__, (uintmax_t)sleep_ms, rctl_throttle_min));
+ racct_proc_throttle(p, sleep_ms);
+}
+
+static void
+rctl_sig_handler(struct rctl_rule *rule, struct proc *p)
+{
+ KASSERT(rule->rr_action > 0 &&
+ rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
+ ("rctl_sig: unknown action %d",
+ rule->rr_action));
+
+ /*
+ * We're using the fact that RCTL_ACTION_SIG* values
+ * are equal to their counterparts from sys/signal.h.
+ */
+ kern_psignal(p, rule->rr_action);
+}
+
+static int
+rctl_enforce_racct(struct racct *racct, int resource, uint64_t amount, struct ucred *cred)
+{
+ struct rctl_rule *rule;
+ struct rctl_rule_link *link;
+ int64_t available;
+ int should_deny = 0;
+
+ ASSERT_RACCT_ENABLED();
+ RACCT_LOCK_ASSERT();
+
+ /*
+ * There may be more than one matching rule; go through all of them.
+ * Denial should be done last, after logging and sending signals.
+ */
+ LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
+ rule = link->rrl_rule;
+ if (rule->rr_resource != resource)
+ continue;
+ if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS)
+ continue;
+
+ available = rctl_available_resource_cred(cred, rule);
+ if (available >= (int64_t)amount) {
+ link->rrl_exceeded = 0;
+ continue;
+ }
+
+ switch (rule->rr_action) {
+ case RCTL_ACTION_DENY:
+ should_deny = 1;
+ continue;
+ case RCTL_ACTION_LOG:
+ /*
+ * If rrl_exceeded != 0, it means we've already
+ * logged a warning for this process.
+ */
+ if (link->rrl_exceeded != 0)
+ continue;
+
+ rctl_log_handler(rule, NULL, cred);
+ link->rrl_exceeded = 1;
+ continue;
+ case RCTL_ACTION_DEVCTL:
+ if (link->rrl_exceeded != 0)
+ continue;
+
+ rctl_devctl_handler(rule, NULL, cred);
+ link->rrl_exceeded = 1;
+ continue;
+ case RCTL_ACTION_THROTTLE:
+ continue;
+ default:
+ continue;
+ }
+ }
+
+ if (should_deny) {
+ /*
+ * Return fake error code; the caller should change it
+ * into one proper for the situation - EFSIZ, ENOMEM etc.
+ */
+ return (EDOOFUS);
+ }
+
+ return (0);
+}
+
/*
* Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
* to what it keeps allocated now. Returns non-zero if the allocation should
@@ -497,14 +784,9 @@
int
rctl_enforce(struct proc *p, int resource, uint64_t amount)
{
- static struct timeval log_lasttime, devctl_lasttime;
- static int log_curtime = 0, devctl_curtime = 0;
struct rctl_rule *rule;
struct rctl_rule_link *link;
- struct sbuf sb;
- char *buf;
int64_t available;
- uint64_t sleep_ms, sleep_ratio;
int should_deny = 0;
ASSERT_RACCT_ENABLED();
@@ -547,24 +829,7 @@
if (p->p_state != PRS_NORMAL)
continue;
- if (!ppsratecheck(&log_lasttime, &log_curtime,
- rctl_log_rate_limit))
- continue;
-
- buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
- if (buf == NULL) {
- printf("rctl_enforce: out of memory\n");
- continue;
- }
- sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
- rctl_rule_to_sbuf(&sb, rule);
- sbuf_finish(&sb);
- printf("rctl: rule \"%s\" matched by pid %d "
- "(%s), uid %d, jail %s\n", sbuf_data(&sb),
- p->p_pid, p->p_comm, p->p_ucred->cr_uid,
- p->p_ucred->cr_prison->pr_prison_racct->prr_name);
- sbuf_delete(&sb);
- free(buf, M_RCTL);
+ rctl_log_handler(rule, p, NULL);
link->rrl_exceeded = 1;
continue;
case RCTL_ACTION_DEVCTL:
@@ -574,96 +839,14 @@
if (p->p_state != PRS_NORMAL)
continue;
- if (!ppsratecheck(&devctl_lasttime, &devctl_curtime,
- rctl_devctl_rate_limit))
- continue;
-
- buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
- if (buf == NULL) {
- printf("rctl_enforce: out of memory\n");
- continue;
- }
- sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
- sbuf_printf(&sb, "rule=");
- rctl_rule_to_sbuf(&sb, rule);
- sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
- p->p_pid, p->p_ucred->cr_ruid,
- p->p_ucred->cr_prison->pr_prison_racct->prr_name);
- sbuf_finish(&sb);
- devctl_notify("RCTL", "rule", "matched",
- sbuf_data(&sb));
- sbuf_delete(&sb);
- free(buf, M_RCTL);
+ rctl_devctl_handler(rule, p, NULL);
link->rrl_exceeded = 1;
continue;
case RCTL_ACTION_THROTTLE:
if (p->p_state != PRS_NORMAL)
continue;
- if (rule->rr_amount == 0) {
- racct_proc_throttle(p, rctl_throttle_max);
- continue;
- }
-
- /*
- * Make the process sleep for a fraction of second
- * proportional to the ratio of process' resource
- * utilization compared to the limit. The point is
- * to penalize resource hogs: processes that consume
- * more of the available resources sleep for longer.
- *
- * We're trying to defer division until the very end,
- * to minimize the rounding effects. The following
- * calculation could have been written in a clearer
- * way like this:
- *
- * sleep_ms = hz * p->p_racct->r_resources[resource] /
- * rule->rr_amount;
- * sleep_ms *= rctl_throttle_pct / 100;
- * if (sleep_ms < rctl_throttle_min)
- * sleep_ms = rctl_throttle_min;
- *
- */
- sleep_ms = xmul(hz, p->p_racct->r_resources[resource]);
- sleep_ms = xmul(sleep_ms, rctl_throttle_pct) / 100;
- if (sleep_ms < rctl_throttle_min * rule->rr_amount)
- sleep_ms = rctl_throttle_min * rule->rr_amount;
-
- /*
- * Multiply that by the ratio of the resource
- * consumption for the container compared to the limit,
- * squared. In other words, a process in a container
- * that is two times over the limit will be throttled
- * four times as much for hitting the same rule. The
- * point is to penalize processes more if the container
- * itself (eg certain UID or jail) is above the limit.
- */
- if (available < 0)
- sleep_ratio = -available / rule->rr_amount;
- else
- sleep_ratio = 0;
- sleep_ratio = xmul(sleep_ratio, sleep_ratio);
- sleep_ratio = xmul(sleep_ratio, rctl_throttle_pct2) / 100;
- sleep_ms = xadd(sleep_ms, xmul(sleep_ms, sleep_ratio));
-
- /*
- * Finally the division.
- */
- sleep_ms /= rule->rr_amount;
-
- if (sleep_ms > rctl_throttle_max)
- sleep_ms = rctl_throttle_max;
-#if 0
- printf("%s: pid %d (%s), %jd of %jd, will sleep for %ju ms (ratio %ju, available %jd)\n",
- __func__, p->p_pid, p->p_comm,
- p->p_racct->r_resources[resource],
- rule->rr_amount, (uintmax_t)sleep_ms,
- (uintmax_t)sleep_ratio, (intmax_t)available);
-#endif
-
- KASSERT(sleep_ms >= rctl_throttle_min, ("%s: %ju < %d\n",
- __func__, (uintmax_t)sleep_ms, rctl_throttle_min));
- racct_proc_throttle(p, sleep_ms);
+ rctl_throttle_handler(rule, resource, p);
continue;
default:
if (link->rrl_exceeded != 0)
@@ -672,16 +855,7 @@
if (p->p_state != PRS_NORMAL)
continue;
- KASSERT(rule->rr_action > 0 &&
- rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
- ("rctl_enforce: unknown action %d",
- rule->rr_action));
-
- /*
- * We're using the fact that RCTL_ACTION_SIG* values
- * are equal to their counterparts from sys/signal.h.
- */
- kern_psignal(p, rule->rr_action);
+ rctl_sig_handler(rule, p);
link->rrl_exceeded = 1;
continue;
}
diff --git a/sys/sys/racct.h b/sys/sys/racct.h
--- a/sys/sys/racct.h
+++ b/sys/sys/racct.h
@@ -176,6 +176,8 @@
int racct_add(struct proc *p, int resource, uint64_t amount);
void racct_add_cred(struct ucred *cred, int resource, uint64_t amount);
+int racct_add_cred_checked(struct ucred *cred, int resource,
+ uint64_t amount);
void racct_add_force(struct proc *p, int resource, uint64_t amount);
void racct_add_buf(struct proc *p, const struct buf *bufp, int is_write);
int racct_set(struct proc *p, int resource, uint64_t amount);
diff --git a/sys/sys/rctl.h b/sys/sys/rctl.h
--- a/sys/sys/rctl.h
+++ b/sys/sys/rctl.h
@@ -142,10 +142,12 @@
int rctl_rule_add(struct rctl_rule *rule);
int rctl_rule_remove(struct rctl_rule *filter);
int rctl_enforce(struct proc *p, int resource, uint64_t amount);
+int rctl_enforce_cred(struct ucred *cred, int resource, uint64_t amount);
void rctl_throttle_decay(struct racct *racct, int resource);
int64_t rctl_pcpu_available(const struct proc *p);
uint64_t rctl_get_limit(struct proc *p, int resource);
uint64_t rctl_get_available(struct proc *p, int resource);
+uint64_t rctl_get_available_cred(struct ucred *cred, int resource);
const char *rctl_resource_name(int resource);
void rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred);
int rctl_proc_fork(struct proc *parent, struct proc *child);

File Metadata

Mime Type
text/plain
Expires
Mon, May 11, 3:17 PM (10 h, 30 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
28685091
Default Alt Text
D30859.1778512672.diff (17 KB)

Event Timeline