Print this page
patch setfrontbackdq
patch cpu-pause-func-deglobalize

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/disp/disp.c
          +++ new/usr/src/uts/common/disp/disp.c
↓ open down ↓ 332 lines elided ↑ open up ↑
 333  333           */
 334  334          i = 0;
 335  335          cpup = cpu_list;
 336  336          do {
 337  337                  disp_dq_alloc(&disp_mem[i], numpris, cpup->cpu_disp);
 338  338                  i++;
 339  339                  cpup = cpup->cpu_next;
 340  340          } while (cpup != cpu_list);
 341  341          num = i;
 342  342  
 343      -        pause_cpus(NULL);
      343 +        pause_cpus(NULL, NULL);
 344  344          for (i = 0; i < num; i++)
 345  345                  disp_dq_assign(&disp_mem[i], numpris);
 346  346          start_cpus();
 347  347  
 348  348          /*
 349  349           * I must free all of the memory after starting the cpus because
 350  350           * I can not risk sleeping in kmem_free while the cpus are stopped.
 351  351           */
 352  352          for (i = 0; i < num; i++)
 353  353                  disp_dq_free(&disp_mem[i]);
↓ open down ↓ 808 lines elided ↑ open up ↑
1162 1162  /*
1163 1163   * Macro that evaluates to true if it is likely that the thread has cache
1164 1164   * warmth. This is based on the amount of time that has elapsed since the
1165 1165   * thread last ran. If that amount of time is less than "rechoose_interval"
1166 1166   * ticks, then we decide that the thread has enough cache warmth to warrant
1167 1167   * some affinity for t->t_cpu.
1168 1168   */
1169 1169  #define THREAD_HAS_CACHE_WARMTH(thread) \
1170 1170          ((thread == curthread) ||       \
1171 1171          ((ddi_get_lbolt() - thread->t_disp_time) <= rechoose_interval))
     1172 +
1172 1173  /*
1173      - * Put the specified thread on the back of the dispatcher
1174      - * queue corresponding to its current priority.
     1174 + * Put the specified thread on the front/back of the dispatcher queue
     1175 + * corresponding to its current priority.
1175 1176   *
1176      - * Called with the thread in transition, onproc or stopped state
1177      - * and locked (transition implies locked) and at high spl.
1178      - * Returns with the thread in TS_RUN state and still locked.
     1177 + * Called with the thread in transition, onproc or stopped state and locked
     1178 + * (transition implies locked) and at high spl.  Returns with the thread in
     1179 + * TS_RUN state and still locked.
1179 1180   */
1180      -void
1181      -setbackdq(kthread_t *tp)
     1181 +static void
     1182 +setfrontbackdq(kthread_t *tp, boolean_t front)
1182 1183  {
1183      -        dispq_t *dq;
     1184 +        dispq_t         *dq;
1184 1185          disp_t          *dp;
1185 1186          cpu_t           *cp;
1186 1187          pri_t           tpri;
1187      -        int             bound;
     1188 +        boolean_t       bound;
1188 1189          boolean_t       self;
1189 1190  
1190 1191          ASSERT(THREAD_LOCK_HELD(tp));
1191 1192          ASSERT((tp->t_schedflag & TS_ALLSTART) == 0);
1192 1193          ASSERT(!thread_on_queue(tp));   /* make sure tp isn't on a runq */
1193 1194  
1194 1195          /*
1195 1196           * If thread is "swapped" or on the swap queue don't
1196 1197           * queue it, but wake sched.
1197 1198           */
1198 1199          if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) {
1199 1200                  disp_swapped_setrun(tp);
1200 1201                  return;
1201 1202          }
1202 1203  
1203      -        self = (tp == curthread);
1204      -
1205      -        if (tp->t_bound_cpu || tp->t_weakbound_cpu)
1206      -                bound = 1;
1207      -        else
1208      -                bound = 0;
     1204 +        self  = (tp == curthread);
     1205 +        bound = (tp->t_bound_cpu || tp->t_weakbound_cpu);
1209 1206  
1210 1207          tpri = DISP_PRIO(tp);
1211 1208          if (ncpus == 1)
1212 1209                  cp = tp->t_cpu;
1213 1210          else if (!bound) {
1214 1211                  if (tpri >= kpqpri) {
1215      -                        setkpdq(tp, SETKP_BACK);
     1212 +                        setkpdq(tp, front ? SETKP_FRONT : SETKP_BACK);
1216 1213                          return;
1217 1214                  }
1218 1215  
1219      -                /*
1220      -                 * We'll generally let this thread continue to run where
1221      -                 * it last ran...but will consider migration if:
1222      -                 * - We thread probably doesn't have much cache warmth.
1223      -                 * - The CPU where it last ran is the target of an offline
1224      -                 *   request.
1225      -                 * - The thread last ran outside it's home lgroup.
1226      -                 */
1227      -                if ((!THREAD_HAS_CACHE_WARMTH(tp)) ||
1228      -                    (tp->t_cpu == cpu_inmotion)) {
1229      -                        cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri, NULL);
1230      -                } else if (!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, tp->t_cpu)) {
1231      -                        cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri,
1232      -                            self ? tp->t_cpu : NULL);
1233      -                } else {
1234      -                        cp = tp->t_cpu;
1235      -                }
1236      -
1237      -                if (tp->t_cpupart == cp->cpu_part) {
1238      -                        int     qlen;
     1216 +                cp = tp->t_cpu;
1239 1217  
     1218 +                if (!front) {
1240 1219                          /*
1241      -                         * Perform any CMT load balancing
     1220 +                         * We'll generally let this thread continue to run where
     1221 +                         * it last ran...but will consider migration if:
     1222 +                         * - We thread probably doesn't have much cache warmth.
     1223 +                         * - The CPU where it last ran is the target of an offline
     1224 +                         *   request.
     1225 +                         * - The thread last ran outside it's home lgroup.
1242 1226                           */
1243      -                        cp = cmt_balance(tp, cp);
     1227 +                        if ((!THREAD_HAS_CACHE_WARMTH(tp)) || (cp == cpu_inmotion)) {
     1228 +                                cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri, NULL);
     1229 +                        } else if (!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, tp->t_cpu)) {
     1230 +                                cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri,
     1231 +                                    self ? tp->t_cpu : NULL);
     1232 +                        }
1244 1233  
1245      -                        /*
1246      -                         * Balance across the run queues
1247      -                         */
1248      -                        qlen = RUNQ_LEN(cp, tpri);
1249      -                        if (tpri >= RUNQ_MATCH_PRI &&
1250      -                            !(tp->t_schedflag & TS_RUNQMATCH))
1251      -                                qlen -= RUNQ_MAX_DIFF;
1252      -                        if (qlen > 0) {
1253      -                                cpu_t *newcp;
1254      -
1255      -                                if (tp->t_lpl->lpl_lgrpid == LGRP_ROOTID) {
1256      -                                        newcp = cp->cpu_next_part;
1257      -                                } else if ((newcp = cp->cpu_next_lpl) == cp) {
1258      -                                        newcp = cp->cpu_next_part;
     1234 +                }
     1235 +
     1236 +                if (tp->t_cpupart == cp->cpu_part) {
     1237 +                        if (front) {
     1238 +                                /*
     1239 +                                 * We'll generally let this thread continue to run
     1240 +                                 * where it last ran, but will consider migration if:
     1241 +                                 * - The thread last ran outside it's home lgroup.
     1242 +                                 * - The CPU where it last ran is the target of an
     1243 +                                 *   offline request (a thread_nomigrate() on the in
     1244 +                                 *   motion CPU relies on this when forcing a preempt).
     1245 +                                 * - The thread isn't the highest priority thread where
     1246 +                                 *   it last ran, and it is considered not likely to
     1247 +                                 *   have significant cache warmth.
     1248 +                                 */
     1249 +                                if ((!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, cp)) ||
     1250 +                                    (cp == cpu_inmotion)) {
     1251 +                                        cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri,
     1252 +                                            self ? cp : NULL);
     1253 +                                } else if ((tpri < cp->cpu_disp->disp_maxrunpri) &&
     1254 +                                    (!THREAD_HAS_CACHE_WARMTH(tp))) {
     1255 +                                        cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri,
     1256 +                                            NULL);
1259 1257                                  }
     1258 +                        } else {
     1259 +                                int     qlen;
1260 1260  
1261      -                                if (RUNQ_LEN(newcp, tpri) < qlen) {
1262      -                                        DTRACE_PROBE3(runq__balance,
1263      -                                            kthread_t *, tp,
1264      -                                            cpu_t *, cp, cpu_t *, newcp);
1265      -                                        cp = newcp;
     1261 +                                /*
     1262 +                                 * Perform any CMT load balancing
     1263 +                                 */
     1264 +                                cp = cmt_balance(tp, cp);
     1265 +
     1266 +                                /*
     1267 +                                 * Balance across the run queues
     1268 +                                 */
     1269 +                                qlen = RUNQ_LEN(cp, tpri);
     1270 +                                if (tpri >= RUNQ_MATCH_PRI &&
     1271 +                                    !(tp->t_schedflag & TS_RUNQMATCH))
     1272 +                                        qlen -= RUNQ_MAX_DIFF;
     1273 +                                if (qlen > 0) {
     1274 +                                        cpu_t *newcp;
     1275 +
     1276 +                                        if (tp->t_lpl->lpl_lgrpid == LGRP_ROOTID) {
     1277 +                                                newcp = cp->cpu_next_part;
     1278 +                                        } else if ((newcp = cp->cpu_next_lpl) == cp) {
     1279 +                                                newcp = cp->cpu_next_part;
     1280 +                                        }
     1281 +
     1282 +                                        if (RUNQ_LEN(newcp, tpri) < qlen) {
     1283 +                                                DTRACE_PROBE3(runq__balance,
     1284 +                                                    kthread_t *, tp,
     1285 +                                                    cpu_t *, cp, cpu_t *, newcp);
     1286 +                                                cp = newcp;
     1287 +                                        }
1266 1288                                  }
1267 1289                          }
1268 1290                  } else {
1269 1291                          /*
1270 1292                           * Migrate to a cpu in the new partition.
1271 1293                           */
1272 1294                          cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist,
1273 1295                              tp->t_lpl, tp->t_pri, NULL);
1274 1296                  }
     1297 +
1275 1298                  ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0);
1276 1299          } else {
1277 1300                  /*
1278 1301                   * It is possible that t_weakbound_cpu != t_bound_cpu (for
1279 1302                   * a short time until weak binding that existed when the
1280 1303                   * strong binding was established has dropped) so we must
1281 1304                   * favour weak binding over strong.
1282 1305                   */
1283 1306                  cp = tp->t_weakbound_cpu ?
1284 1307                      tp->t_weakbound_cpu : tp->t_bound_cpu;
1285 1308          }
     1309 +
1286 1310          /*
1287 1311           * A thread that is ONPROC may be temporarily placed on the run queue
1288 1312           * but then chosen to run again by disp.  If the thread we're placing on
1289 1313           * the queue is in TS_ONPROC state, don't set its t_waitrq until a
1290 1314           * replacement process is actually scheduled in swtch().  In this
1291 1315           * situation, curthread is the only thread that could be in the ONPROC
1292 1316           * state.
1293 1317           */
1294 1318          if ((!self) && (tp->t_waitrq == 0)) {
1295 1319                  hrtime_t curtime;
↓ open down ↓ 1 lines elided ↑ open up ↑
1297 1321                  curtime = gethrtime_unscaled();
1298 1322                  (void) cpu_update_pct(tp, curtime);
1299 1323                  tp->t_waitrq = curtime;
1300 1324          } else {
1301 1325                  (void) cpu_update_pct(tp, gethrtime_unscaled());
1302 1326          }
1303 1327  
1304 1328          dp = cp->cpu_disp;
1305 1329          disp_lock_enter_high(&dp->disp_lock);
1306 1330  
1307      -        DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, 0);
1308      -        TRACE_3(TR_FAC_DISP, TR_BACKQ, "setbackdq:pri %d cpu %p tid %p",
1309      -            tpri, cp, tp);
     1331 +        DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, front);
     1332 +        if (front) {
     1333 +                TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri,
     1334 +                        tp);
     1335 +        } else {
     1336 +                TRACE_3(TR_FAC_DISP, TR_BACKQ, "setbackdq:pri %d cpu %p tid %p",
     1337 +                        tpri, cp, tp);
     1338 +        }
1310 1339  
1311 1340  #ifndef NPROBE
1312 1341          /* Kernel probe */
1313 1342          if (tnf_tracing_active)
1314 1343                  tnf_thread_queue(tp, cp, tpri);
1315 1344  #endif /* NPROBE */
1316 1345  
1317 1346          ASSERT(tpri >= 0 && tpri < dp->disp_npri);
1318 1347  
1319 1348          THREAD_RUN(tp, &dp->disp_lock);         /* set t_state to TS_RUN */
1320 1349          tp->t_disp_queue = dp;
1321 1350          tp->t_link = NULL;
1322 1351  
1323 1352          dq = &dp->disp_q[tpri];
1324 1353          dp->disp_nrunnable++;
1325 1354          if (!bound)
1326 1355                  dp->disp_steal = 0;
1327 1356          membar_enter();
1328 1357  
1329 1358          if (dq->dq_sruncnt++ != 0) {
1330      -                ASSERT(dq->dq_first != NULL);
1331      -                dq->dq_last->t_link = tp;
1332      -                dq->dq_last = tp;
     1359 +                if (front) {
     1360 +                        ASSERT(dq->dq_last != NULL);
     1361 +                        tp->t_link = dq->dq_first;
     1362 +                        dq->dq_first = tp;
     1363 +                } else {
     1364 +                        ASSERT(dq->dq_first != NULL);
     1365 +                        dq->dq_last->t_link = tp;
     1366 +                        dq->dq_last = tp;
     1367 +                }
1333 1368          } else {
1334 1369                  ASSERT(dq->dq_first == NULL);
1335 1370                  ASSERT(dq->dq_last == NULL);
1336 1371                  dq->dq_first = dq->dq_last = tp;
1337 1372                  BT_SET(dp->disp_qactmap, tpri);
1338 1373                  if (tpri > dp->disp_maxrunpri) {
1339 1374                          dp->disp_maxrunpri = tpri;
1340 1375                          membar_enter();
1341 1376                          cpu_resched(cp, tpri);
1342 1377                  }
↓ open down ↓ 6 lines elided ↑ open up ↑
1349 1384                           * run queue, don't allow other CPUs to steal
1350 1385                           * this thread while we are in the middle of a
1351 1386                           * context switch. We may just switch to it
1352 1387                           * again right away. CPU_DISP_DONTSTEAL is cleared
1353 1388                           * in swtch and swtch_to.
1354 1389                           */
1355 1390                          cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL;
1356 1391                  }
1357 1392                  dp->disp_max_unbound_pri = tpri;
1358 1393          }
     1394 +
1359 1395          (*disp_enq_thread)(cp, bound);
1360 1396  }
1361 1397  
1362 1398  /*
     1399 + * Put the specified thread on the back of the dispatcher
     1400 + * queue corresponding to its current priority.
     1401 + *
     1402 + * Called with the thread in transition, onproc or stopped state
     1403 + * and locked (transition implies locked) and at high spl.
     1404 + * Returns with the thread in TS_RUN state and still locked.
     1405 + */
     1406 +void
     1407 +setbackdq(kthread_t *tp)
     1408 +{
     1409 +        setfrontbackdq(tp, B_FALSE);
     1410 +}
     1411 +
     1412 +/*
1363 1413   * Put the specified thread on the front of the dispatcher
1364 1414   * queue corresponding to its current priority.
1365 1415   *
1366 1416   * Called with the thread in transition, onproc or stopped state
1367 1417   * and locked (transition implies locked) and at high spl.
1368 1418   * Returns with the thread in TS_RUN state and still locked.
1369 1419   */
1370 1420  void
1371 1421  setfrontdq(kthread_t *tp)
1372 1422  {
1373      -        disp_t          *dp;
1374      -        dispq_t         *dq;
1375      -        cpu_t           *cp;
1376      -        pri_t           tpri;
1377      -        int             bound;
1378      -
1379      -        ASSERT(THREAD_LOCK_HELD(tp));
1380      -        ASSERT((tp->t_schedflag & TS_ALLSTART) == 0);
1381      -        ASSERT(!thread_on_queue(tp));   /* make sure tp isn't on a runq */
1382      -
1383      -        /*
1384      -         * If thread is "swapped" or on the swap queue don't
1385      -         * queue it, but wake sched.
1386      -         */
1387      -        if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) {
1388      -                disp_swapped_setrun(tp);
1389      -                return;
1390      -        }
1391      -
1392      -        if (tp->t_bound_cpu || tp->t_weakbound_cpu)
1393      -                bound = 1;
1394      -        else
1395      -                bound = 0;
1396      -
1397      -        tpri = DISP_PRIO(tp);
1398      -        if (ncpus == 1)
1399      -                cp = tp->t_cpu;
1400      -        else if (!bound) {
1401      -                if (tpri >= kpqpri) {
1402      -                        setkpdq(tp, SETKP_FRONT);
1403      -                        return;
1404      -                }
1405      -                cp = tp->t_cpu;
1406      -                if (tp->t_cpupart == cp->cpu_part) {
1407      -                        /*
1408      -                         * We'll generally let this thread continue to run
1409      -                         * where it last ran, but will consider migration if:
1410      -                         * - The thread last ran outside it's home lgroup.
1411      -                         * - The CPU where it last ran is the target of an
1412      -                         *   offline request (a thread_nomigrate() on the in
1413      -                         *   motion CPU relies on this when forcing a preempt).
1414      -                         * - The thread isn't the highest priority thread where
1415      -                         *   it last ran, and it is considered not likely to
1416      -                         *   have significant cache warmth.
1417      -                         */
1418      -                        if ((!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, cp)) ||
1419      -                            (cp == cpu_inmotion)) {
1420      -                                cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri,
1421      -                                    (tp == curthread) ? cp : NULL);
1422      -                        } else if ((tpri < cp->cpu_disp->disp_maxrunpri) &&
1423      -                            (!THREAD_HAS_CACHE_WARMTH(tp))) {
1424      -                                cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri,
1425      -                                    NULL);
1426      -                        }
1427      -                } else {
1428      -                        /*
1429      -                         * Migrate to a cpu in the new partition.
1430      -                         */
1431      -                        cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist,
1432      -                            tp->t_lpl, tp->t_pri, NULL);
1433      -                }
1434      -                ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0);
1435      -        } else {
1436      -                /*
1437      -                 * It is possible that t_weakbound_cpu != t_bound_cpu (for
1438      -                 * a short time until weak binding that existed when the
1439      -                 * strong binding was established has dropped) so we must
1440      -                 * favour weak binding over strong.
1441      -                 */
1442      -                cp = tp->t_weakbound_cpu ?
1443      -                    tp->t_weakbound_cpu : tp->t_bound_cpu;
1444      -        }
1445      -
1446      -        /*
1447      -         * A thread that is ONPROC may be temporarily placed on the run queue
1448      -         * but then chosen to run again by disp.  If the thread we're placing on
1449      -         * the queue is in TS_ONPROC state, don't set its t_waitrq until a
1450      -         * replacement process is actually scheduled in swtch().  In this
1451      -         * situation, curthread is the only thread that could be in the ONPROC
1452      -         * state.
1453      -         */
1454      -        if ((tp != curthread) && (tp->t_waitrq == 0)) {
1455      -                hrtime_t curtime;
1456      -
1457      -                curtime = gethrtime_unscaled();
1458      -                (void) cpu_update_pct(tp, curtime);
1459      -                tp->t_waitrq = curtime;
1460      -        } else {
1461      -                (void) cpu_update_pct(tp, gethrtime_unscaled());
1462      -        }
1463      -
1464      -        dp = cp->cpu_disp;
1465      -        disp_lock_enter_high(&dp->disp_lock);
1466      -
1467      -        TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri, tp);
1468      -        DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, 1);
1469      -
1470      -#ifndef NPROBE
1471      -        /* Kernel probe */
1472      -        if (tnf_tracing_active)
1473      -                tnf_thread_queue(tp, cp, tpri);
1474      -#endif /* NPROBE */
1475      -
1476      -        ASSERT(tpri >= 0 && tpri < dp->disp_npri);
1477      -
1478      -        THREAD_RUN(tp, &dp->disp_lock);         /* set TS_RUN state and lock */
1479      -        tp->t_disp_queue = dp;
1480      -
1481      -        dq = &dp->disp_q[tpri];
1482      -        dp->disp_nrunnable++;
1483      -        if (!bound)
1484      -                dp->disp_steal = 0;
1485      -        membar_enter();
1486      -
1487      -        if (dq->dq_sruncnt++ != 0) {
1488      -                ASSERT(dq->dq_last != NULL);
1489      -                tp->t_link = dq->dq_first;
1490      -                dq->dq_first = tp;
1491      -        } else {
1492      -                ASSERT(dq->dq_last == NULL);
1493      -                ASSERT(dq->dq_first == NULL);
1494      -                tp->t_link = NULL;
1495      -                dq->dq_first = dq->dq_last = tp;
1496      -                BT_SET(dp->disp_qactmap, tpri);
1497      -                if (tpri > dp->disp_maxrunpri) {
1498      -                        dp->disp_maxrunpri = tpri;
1499      -                        membar_enter();
1500      -                        cpu_resched(cp, tpri);
1501      -                }
1502      -        }
1503      -
1504      -        if (!bound && tpri > dp->disp_max_unbound_pri) {
1505      -                if (tp == curthread && dp->disp_max_unbound_pri == -1 &&
1506      -                    cp == CPU) {
1507      -                        /*
1508      -                         * If there are no other unbound threads on the
1509      -                         * run queue, don't allow other CPUs to steal
1510      -                         * this thread while we are in the middle of a
1511      -                         * context switch. We may just switch to it
1512      -                         * again right away. CPU_DISP_DONTSTEAL is cleared
1513      -                         * in swtch and swtch_to.
1514      -                         */
1515      -                        cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL;
1516      -                }
1517      -                dp->disp_max_unbound_pri = tpri;
1518      -        }
1519      -        (*disp_enq_thread)(cp, bound);
     1423 +        setfrontbackdq(tp, B_TRUE);
1520 1424  }
1521 1425  
1522 1426  /*
1523 1427   * Put a high-priority unbound thread on the kp queue
1524 1428   */
1525 1429  static void
1526 1430  setkpdq(kthread_t *tp, int borf)
1527 1431  {
1528 1432          dispq_t *dq;
1529 1433          disp_t  *dp;
↓ open down ↓ 1172 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX