1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #include <sys/systm.h>
  27 #include <sys/sysmacros.h>
  28 #include <sys/bootconf.h>
  29 #include <sys/atomic.h>
  30 #include <sys/lgrp.h>
  31 #include <sys/memlist.h>
  32 #include <sys/memnode.h>
  33 #include <sys/platform_module.h>
  34 #include <vm/vm_dep.h>
  35 
  36 int     max_mem_nodes = 1;
  37 
  38 struct mem_node_conf mem_node_config[MAX_MEM_NODES];
  39 int mem_node_pfn_shift;
  40 /*
  41  * num_memnodes should be updated atomically and always >=
  42  * the number of bits in memnodes_mask or the algorithm may fail.
  43  */
  44 uint16_t num_memnodes;
  45 mnodeset_t memnodes_mask; /* assumes 8*(sizeof(mnodeset_t)) >= MAX_MEM_NODES */
  46 
  47 /*
  48  * If set, mem_node_physalign should be a power of two, and
  49  * should reflect the minimum address alignment of each node.
  50  */
  51 uint64_t mem_node_physalign;
  52 
  53 /*
  54  * Platform hooks we will need.
  55  */
  56 
  57 #pragma weak plat_build_mem_nodes
  58 #pragma weak plat_slice_add
  59 #pragma weak plat_slice_del
  60 
  61 /*
  62  * Adjust the memnode config after a DR operation.
  63  *
  64  * It is rather tricky to do these updates since we can't
  65  * protect the memnode structures with locks, so we must
  66  * be mindful of the order in which updates and reads to
  67  * these values can occur.
  68  */
  69 
  70 void
  71 mem_node_add_slice(pfn_t start, pfn_t end)
  72 {
  73         int mnode;
  74         mnodeset_t newmask, oldmask;
  75 
  76         /*
  77          * DR will pass us the first pfn that is allocatable.
  78          * We need to round down to get the real start of
  79          * the slice.
  80          */
  81         if (mem_node_physalign) {
  82                 start &= ~(btop(mem_node_physalign) - 1);
  83                 end = roundup(end, btop(mem_node_physalign)) - 1;
  84         }
  85 
  86         mnode = PFN_2_MEM_NODE(start);
  87         ASSERT(mnode >= 0 && mnode < max_mem_nodes);
  88 
  89         if (cas32((uint32_t *)&mem_node_config[mnode].exists, 0, 1)) {
  90                 /*
  91                  * Add slice to existing node.
  92                  */
  93                 if (start < mem_node_config[mnode].physbase)
  94                         mem_node_config[mnode].physbase = start;
  95                 if (end > mem_node_config[mnode].physmax)
  96                         mem_node_config[mnode].physmax = end;
  97         } else {
  98                 mem_node_config[mnode].physbase = start;
  99                 mem_node_config[mnode].physmax = end;
 100                 atomic_add_16(&num_memnodes, 1);
 101                 do {
 102                         oldmask = memnodes_mask;
 103                         newmask = memnodes_mask | (1ull << mnode);
 104                 } while (cas64(&memnodes_mask, oldmask, newmask) != oldmask);
 105         }
 106 
 107         /*
 108          * Inform the common lgrp framework about the new memory
 109          */
 110         lgrp_config(LGRP_CONFIG_MEM_ADD, mnode, MEM_NODE_2_LGRPHAND(mnode));
 111 }
 112 
 113 /*
 114  * Remove a PFN range from a memnode.  On some platforms,
 115  * the memnode will be created with physbase at the first
 116  * allocatable PFN, but later deleted with the MC slice
 117  * base address converted to a PFN, in which case we need
 118  * to assume physbase and up.
 119  */
 120 void
 121 mem_node_del_slice(pfn_t start, pfn_t end)
 122 {
 123         int mnode;
 124         pgcnt_t delta_pgcnt, node_size;
 125         mnodeset_t omask, nmask;
 126 
 127         if (mem_node_physalign) {
 128                 start &= ~(btop(mem_node_physalign) - 1);
 129                 end = roundup(end, btop(mem_node_physalign)) - 1;
 130         }
 131         mnode = PFN_2_MEM_NODE(start);
 132 
 133         ASSERT(mnode >= 0 && mnode < max_mem_nodes);
 134         ASSERT(mem_node_config[mnode].exists == 1);
 135 
 136         delta_pgcnt = end - start;
 137         node_size = mem_node_config[mnode].physmax -
 138             mem_node_config[mnode].physbase;
 139 
 140         if (node_size > delta_pgcnt) {
 141                 /*
 142                  * Subtract the slice from the memnode.
 143                  */
 144                 if (start <= mem_node_config[mnode].physbase)
 145                         mem_node_config[mnode].physbase = end + 1;
 146                 ASSERT(end <= mem_node_config[mnode].physmax);
 147                 if (end == mem_node_config[mnode].physmax)
 148                         mem_node_config[mnode].physmax = start - 1;
 149         } else {
 150                 /*
 151                  * Let the common lgrp framework know this mnode is
 152                  * leaving
 153                  */
 154                 lgrp_config(LGRP_CONFIG_MEM_DEL,
 155                     mnode, MEM_NODE_2_LGRPHAND(mnode));
 156 
 157                 /*
 158                  * Delete the whole node.
 159                  */
 160                 ASSERT(MNODE_PGCNT(mnode) == 0);
 161                 do {
 162                         omask = memnodes_mask;
 163                         nmask = omask & ~(1ull << mnode);
 164                 } while (cas64(&memnodes_mask, omask, nmask) != omask);
 165                 atomic_add_16(&num_memnodes, -1);
 166                 mem_node_config[mnode].exists = 0;
 167         }
 168 }
 169 
 170 void
 171 mem_node_add_range(pfn_t start, pfn_t end)
 172 {
 173         if (&plat_slice_add)
 174                 plat_slice_add(start, end);
 175         else
 176                 mem_node_add_slice(start, end);
 177 }
 178 
 179 void
 180 mem_node_del_range(pfn_t start, pfn_t end)
 181 {
 182         if (&plat_slice_del)
 183                 plat_slice_del(start, end);
 184         else
 185                 mem_node_del_slice(start, end);
 186 }
 187 
 188 void
 189 startup_build_mem_nodes(struct memlist *list)
 190 {
 191         pfn_t   start, end;
 192 
 193         /* LINTED: ASSERT will always true or false */
 194         ASSERT(NBBY * sizeof (mnodeset_t) >= max_mem_nodes);
 195 
 196         if (&plat_build_mem_nodes) {
 197                 plat_build_mem_nodes(list);
 198         } else {
 199                 /*
 200                  * Boot install lists are arranged <addr, len>, ...
 201                  */
 202                 while (list) {
 203                         start = list->ml_address >> PAGESHIFT;
 204                         if (start > physmax)
 205                                 continue;
 206                         end =
 207                             (list->ml_address + list->ml_size - 1) >> PAGESHIFT;
 208                         if (end > physmax)
 209                                 end = physmax;
 210                         mem_node_add_range(start, end);
 211                         list = list->ml_next;
 212                 }
 213                 mem_node_physalign = 0;
 214                 mem_node_pfn_shift = 0;
 215         }
 216 }
 217 
 218 /*
 219  * Allocate an unassigned memnode.
 220  */
 221 int
 222 mem_node_alloc()
 223 {
 224         int mnode;
 225         mnodeset_t newmask, oldmask;
 226 
 227         /*
 228          * Find an unused memnode.  Update it atomically to prevent
 229          * a first time memnode creation race.
 230          */
 231         for (mnode = 0; mnode < max_mem_nodes; mnode++)
 232                 if (cas32((uint32_t *)&mem_node_config[mnode].exists,
 233                     0, 1) == 0)
 234                         break;
 235 
 236         if (mnode >= max_mem_nodes)
 237                 panic("Out of free memnodes\n");
 238 
 239         mem_node_config[mnode].physbase = (pfn_t)-1l;
 240         mem_node_config[mnode].physmax = 0;
 241         atomic_add_16(&num_memnodes, 1);
 242         do {
 243                 oldmask = memnodes_mask;
 244                 newmask = memnodes_mask | (1ull << mnode);
 245         } while (cas64(&memnodes_mask, oldmask, newmask) != oldmask);
 246 
 247         return (mnode);
 248 }
 249 
 250 /*
 251  * Find the intersection between a memnode and a memlist
 252  * and returns the number of pages that overlap.
 253  *
 254  * Assumes the list is protected from DR operations by
 255  * the memlist lock.
 256  */
 257 pgcnt_t
 258 mem_node_memlist_pages(int mnode, struct memlist *mlist)
 259 {
 260         pfn_t           base, end;
 261         pfn_t           cur_base, cur_end;
 262         pgcnt_t         npgs;
 263         struct memlist  *pmem;
 264 
 265         base = mem_node_config[mnode].physbase;
 266         end = mem_node_config[mnode].physmax;
 267         npgs = 0;
 268 
 269         memlist_read_lock();
 270 
 271         for (pmem = mlist; pmem; pmem = pmem->ml_next) {
 272                 cur_base = btop(pmem->ml_address);
 273                 cur_end = cur_base + btop(pmem->ml_size) - 1;
 274                 if (end < cur_base || base > cur_end)
 275                         continue;
 276                 npgs = npgs + (MIN(cur_end, end) -
 277                     MAX(cur_base, base)) + 1;
 278         }
 279 
 280         memlist_read_unlock();
 281 
 282         return (npgs);
 283 }