1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #include <sys/systm.h>
  27 #include <sys/sysmacros.h>
  28 #include <sys/bootconf.h>
  29 #include <sys/atomic.h>
  30 #include <sys/lgrp.h>
  31 #include <sys/memlist.h>
  32 #include <sys/memnode.h>
  33 #include <sys/platform_module.h>
  34 #include <vm/vm_dep.h>
  35 
  36 int     max_mem_nodes = 1;
  37 
  38 struct mem_node_conf mem_node_config[MAX_MEM_NODES];
  39 int mem_node_pfn_shift;
  40 /*
  41  * num_memnodes should be updated atomically and always >=
  42  * the number of bits in memnodes_mask or the algorithm may fail.
  43  */
  44 uint16_t num_memnodes;
  45 mnodeset_t memnodes_mask; /* assumes 8*(sizeof(mnodeset_t)) >= MAX_MEM_NODES */
  46 
  47 /*
  48  * If set, mem_node_physalign should be a power of two, and
  49  * should reflect the minimum address alignment of each node.
  50  */
  51 uint64_t mem_node_physalign;
  52 
  53 /*
  54  * Platform hooks we will need.
  55  */
  56 
  57 #pragma weak plat_build_mem_nodes
  58 #pragma weak plat_slice_add
  59 #pragma weak plat_slice_del
  60 
  61 /*
  62  * Adjust the memnode config after a DR operation.
  63  *
  64  * It is rather tricky to do these updates since we can't
  65  * protect the memnode structures with locks, so we must
  66  * be mindful of the order in which updates and reads to
  67  * these values can occur.
  68  */
  69 
  70 void
  71 mem_node_add_slice(pfn_t start, pfn_t end)
  72 {
  73         int mnode;
  74         mnodeset_t newmask, oldmask;
  75 
  76         /*
  77          * DR will pass us the first pfn that is allocatable.
  78          * We need to round down to get the real start of
  79          * the slice.
  80          */
  81         if (mem_node_physalign) {
  82                 start &= ~(btop(mem_node_physalign) - 1);
  83                 end = roundup(end, btop(mem_node_physalign)) - 1;
  84         }
  85 
  86         mnode = PFN_2_MEM_NODE(start);
  87         ASSERT(mnode >= 0 && mnode < max_mem_nodes);
  88 
  89         if (atomic_cas_32((uint32_t *)&mem_node_config[mnode].exists, 0, 1)) {
  90                 /*
  91                  * Add slice to existing node.
  92                  */
  93                 if (start < mem_node_config[mnode].physbase)
  94                         mem_node_config[mnode].physbase = start;
  95                 if (end > mem_node_config[mnode].physmax)
  96                         mem_node_config[mnode].physmax = end;
  97         } else {
  98                 mem_node_config[mnode].physbase = start;
  99                 mem_node_config[mnode].physmax = end;
 100                 atomic_inc_16(&num_memnodes);
 101                 do {
 102                         oldmask = memnodes_mask;
 103                         newmask = memnodes_mask | (1ull << mnode);
 104                 } while (atomic_cas_64(&memnodes_mask, oldmask, newmask) !=
 105                     oldmask);
 106         }
 107 
 108         /*
 109          * Inform the common lgrp framework about the new memory
 110          */
 111         lgrp_config(LGRP_CONFIG_MEM_ADD, mnode, MEM_NODE_2_LGRPHAND(mnode));
 112 }
 113 
 114 /*
 115  * Remove a PFN range from a memnode.  On some platforms,
 116  * the memnode will be created with physbase at the first
 117  * allocatable PFN, but later deleted with the MC slice
 118  * base address converted to a PFN, in which case we need
 119  * to assume physbase and up.
 120  */
 121 void
 122 mem_node_del_slice(pfn_t start, pfn_t end)
 123 {
 124         int mnode;
 125         pgcnt_t delta_pgcnt, node_size;
 126         mnodeset_t omask, nmask;
 127 
 128         if (mem_node_physalign) {
 129                 start &= ~(btop(mem_node_physalign) - 1);
 130                 end = roundup(end, btop(mem_node_physalign)) - 1;
 131         }
 132         mnode = PFN_2_MEM_NODE(start);
 133 
 134         ASSERT(mnode >= 0 && mnode < max_mem_nodes);
 135         ASSERT(mem_node_config[mnode].exists == 1);
 136 
 137         delta_pgcnt = end - start;
 138         node_size = mem_node_config[mnode].physmax -
 139             mem_node_config[mnode].physbase;
 140 
 141         if (node_size > delta_pgcnt) {
 142                 /*
 143                  * Subtract the slice from the memnode.
 144                  */
 145                 if (start <= mem_node_config[mnode].physbase)
 146                         mem_node_config[mnode].physbase = end + 1;
 147                 ASSERT(end <= mem_node_config[mnode].physmax);
 148                 if (end == mem_node_config[mnode].physmax)
 149                         mem_node_config[mnode].physmax = start - 1;
 150         } else {
 151                 /*
 152                  * Let the common lgrp framework know this mnode is
 153                  * leaving
 154                  */
 155                 lgrp_config(LGRP_CONFIG_MEM_DEL,
 156                     mnode, MEM_NODE_2_LGRPHAND(mnode));
 157 
 158                 /*
 159                  * Delete the whole node.
 160                  */
 161                 ASSERT(MNODE_PGCNT(mnode) == 0);
 162                 do {
 163                         omask = memnodes_mask;
 164                         nmask = omask & ~(1ull << mnode);
 165                 } while (atomic_cas_64(&memnodes_mask, omask, nmask) != omask);
 166                 atomic_dec_16(&num_memnodes);
 167                 mem_node_config[mnode].exists = 0;
 168         }
 169 }
 170 
 171 void
 172 mem_node_add_range(pfn_t start, pfn_t end)
 173 {
 174         if (&plat_slice_add)
 175                 plat_slice_add(start, end);
 176         else
 177                 mem_node_add_slice(start, end);
 178 }
 179 
 180 void
 181 mem_node_del_range(pfn_t start, pfn_t end)
 182 {
 183         if (&plat_slice_del)
 184                 plat_slice_del(start, end);
 185         else
 186                 mem_node_del_slice(start, end);
 187 }
 188 
 189 void
 190 startup_build_mem_nodes(struct memlist *list)
 191 {
 192         pfn_t   start, end;
 193 
 194         /* LINTED: ASSERT will always true or false */
 195         ASSERT(NBBY * sizeof (mnodeset_t) >= max_mem_nodes);
 196 
 197         if (&plat_build_mem_nodes) {
 198                 plat_build_mem_nodes(list);
 199         } else {
 200                 /*
 201                  * Boot install lists are arranged <addr, len>, ...
 202                  */
 203                 while (list) {
 204                         start = list->ml_address >> PAGESHIFT;
 205                         if (start > physmax)
 206                                 continue;
 207                         end =
 208                             (list->ml_address + list->ml_size - 1) >> PAGESHIFT;
 209                         if (end > physmax)
 210                                 end = physmax;
 211                         mem_node_add_range(start, end);
 212                         list = list->ml_next;
 213                 }
 214                 mem_node_physalign = 0;
 215                 mem_node_pfn_shift = 0;
 216         }
 217 }
 218 
 219 /*
 220  * Allocate an unassigned memnode.
 221  */
 222 int
 223 mem_node_alloc()
 224 {
 225         int mnode;
 226         mnodeset_t newmask, oldmask;
 227 
 228         /*
 229          * Find an unused memnode.  Update it atomically to prevent
 230          * a first time memnode creation race.
 231          */
 232         for (mnode = 0; mnode < max_mem_nodes; mnode++)
 233                 if (atomic_cas_32((uint32_t *)&mem_node_config[mnode].exists,
 234                     0, 1) == 0)
 235                         break;
 236 
 237         if (mnode >= max_mem_nodes)
 238                 panic("Out of free memnodes\n");
 239 
 240         mem_node_config[mnode].physbase = (pfn_t)-1l;
 241         mem_node_config[mnode].physmax = 0;
 242         atomic_inc_16(&num_memnodes);
 243         do {
 244                 oldmask = memnodes_mask;
 245                 newmask = memnodes_mask | (1ull << mnode);
 246         } while (atomic_cas_64(&memnodes_mask, oldmask, newmask) != oldmask);
 247 
 248         return (mnode);
 249 }
 250 
 251 /*
 252  * Find the intersection between a memnode and a memlist
 253  * and returns the number of pages that overlap.
 254  *
 255  * Assumes the list is protected from DR operations by
 256  * the memlist lock.
 257  */
 258 pgcnt_t
 259 mem_node_memlist_pages(int mnode, struct memlist *mlist)
 260 {
 261         pfn_t           base, end;
 262         pfn_t           cur_base, cur_end;
 263         pgcnt_t         npgs;
 264         struct memlist  *pmem;
 265 
 266         base = mem_node_config[mnode].physbase;
 267         end = mem_node_config[mnode].physmax;
 268         npgs = 0;
 269 
 270         memlist_read_lock();
 271 
 272         for (pmem = mlist; pmem; pmem = pmem->ml_next) {
 273                 cur_base = btop(pmem->ml_address);
 274                 cur_end = cur_base + btop(pmem->ml_size) - 1;
 275                 if (end < cur_base || base > cur_end)
 276                         continue;
 277                 npgs = npgs + (MIN(cur_end, end) -
 278                     MAX(cur_base, base)) + 1;
 279         }
 280 
 281         memlist_read_unlock();
 282 
 283         return (npgs);
 284 }