Print this page
5291 x86 {high,low}bit rely on undefined behavior
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/intel/amd64/ml/amd64.il
+++ new/usr/src/uts/intel/amd64/ml/amd64.il
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 /
27 27 / In-line functions for amd64 kernels.
28 28 /
29 29
30 30 /
31 31 / return current thread pointer
32 32 /
33 33 / NOTE: the "0x18" should be replaced by the computed value of the
34 34 / offset of "cpu_thread" from the beginning of the struct cpu.
35 35 / Including "assym.h" does not work, however, since that stuff
36 36 / is PSM-specific and is only visible to the 'unix' build anyway.
37 37 / Same with current cpu pointer, where "0xc" should be replaced
38 38 / by the computed value of the offset of "cpu_self".
39 39 / Ugh -- what a disaster.
40 40 /
41 41 .inline threadp,0
42 42 movq %gs:0x18, %rax
43 43 .end
44 44
45 45 /
46 46 / return current cpu pointer
47 47 /
48 48 .inline curcpup,0
49 49 movq %gs:0x10, %rax
50 50 .end
51 51
52 52 /
53 53 / return caller
54 54 /
55 55 .inline caller,0
56 56 movq 8(%rbp), %rax
↓ open down ↓ |
56 lines elided |
↑ open up ↑ |
57 57 .end
58 58
59 59 /
60 60 / convert ipl to spl. This is the identity function for i86
61 61 /
62 62 .inline ipltospl,0
63 63 movq %rdi, %rax
64 64 .end
65 65
66 66 /
67 -/ find the low order bit in a word
68 -/
69 - .inline lowbit,4
70 - movq $-1, %rax
71 - bsfq %rdi, %rax
72 - incq %rax
73 - .end
74 -
75 -/
76 67 / Networking byte order functions (too bad, Intel has the wrong byte order)
77 68 /
78 69
79 70 .inline htonll,4
80 71 movq %rdi, %rax
81 72 bswapq %rax
82 73 .end
83 74
84 75 .inline ntohll,4
85 76 movq %rdi, %rax
86 77 bswapq %rax
87 78 .end
88 79
89 80 .inline htonl,4
90 81 movl %edi, %eax
91 82 bswap %eax
92 83 .end
93 84
94 85 .inline ntohl,4
95 86 movl %edi, %eax
96 87 bswap %eax
97 88 .end
98 89
99 90 .inline htons,4
100 91 movl %edi, %eax
101 92 bswap %eax
102 93 shrl $16, %eax
103 94 .end
104 95
105 96 .inline ntohs,4
106 97 movl %edi, %eax
107 98 bswap %eax
108 99 shrl $16, %eax
109 100 .end
110 101
111 102 /*
112 103 * multiply two long numbers and yield a u_lonlong_t result
113 104 * Provided to manipulate hrtime_t values.
114 105 */
115 106 /* XX64 These don't work correctly with SOS9 build 13.0 yet
116 107 .inline mul32, 8
117 108 xorl %edx, %edx
118 109 movl %edi, %eax
119 110 mull %esi
120 111 shlq $32, %rdx
121 112 orq %rdx, %rax
122 113 ret
123 114 .end
124 115 */
125 116 /*
126 117 * Unlock hres_lock and increment the count value. (See clock.h)
127 118 */
128 119 .inline unlock_hres_lock, 0
129 120 lock
130 121 incl hres_lock
131 122 .end
132 123
133 124 .inline atomic_orb,8
134 125 movl %esi, %eax
135 126 lock
136 127 orb %al,(%rdi)
137 128 .end
138 129
139 130 .inline atomic_andb,8
140 131 movl %esi, %eax
141 132 lock
142 133 andb %al,(%rdi)
143 134 .end
144 135
145 136 /*
146 137 * atomic inc/dec operations.
147 138 * void atomic_inc16(uint16_t *addr) { ++*addr; }
148 139 * void atomic_dec16(uint16_t *addr) { --*addr; }
149 140 */
150 141 .inline atomic_inc16,4
151 142 lock
152 143 incw (%rdi)
153 144 .end
154 145
155 146 .inline atomic_dec16,4
156 147 lock
157 148 decw (%rdi)
158 149 .end
159 150
160 151 /*
161 152 * atomic bit clear
162 153 */
163 154 .inline atomic_btr32,8
164 155 lock
165 156 btrl %esi, (%rdi)
166 157 setc %al
167 158 .end
168 159
169 160 /*
170 161 * Call the pause instruction. To the Pentium 4 Xeon processor, it acts as
171 162 * a hint that the code sequence is a busy spin-wait loop. Without a pause
172 163 * instruction in these loops, the P4 Xeon processor may suffer a severe
173 164 * penalty when exiting the loop because the processor detects a possible
174 165 * memory violation. Inserting the pause instruction significantly reduces
175 166 * the likelihood of a memory order violation, improving performance.
176 167 * The pause instruction is a NOP on all other IA-32 processors.
177 168 */
178 169 .inline ht_pause, 0
179 170 pause
180 171 .end
181 172
182 173 /*
183 174 * inlines for update_sregs().
184 175 */
185 176 .inline __set_ds, 0
186 177 movw %di, %ds
187 178 .end
188 179
189 180 .inline __set_es, 0
190 181 movw %di, %es
191 182 .end
192 183
193 184 .inline __set_fs, 0
194 185 movw %di, %fs
195 186 .end
196 187
197 188 .inline __set_gs, 0
198 189 movw %di, %gs
199 190 .end
200 191
201 192 /*
202 193 * OPTERON_ERRATUM_88 requires mfence
203 194 */
204 195 .inline __swapgs, 0
205 196 mfence
206 197 swapgs
207 198 .end
208 199
209 200 /*
210 201 * prefetch 64 bytes
211 202 */
212 203
213 204 .inline prefetch_read_many,8
214 205 prefetcht0 (%rdi)
215 206 prefetcht0 32(%rdi)
216 207 .end
217 208
218 209 .inline prefetch_read_once,8
219 210 prefetchnta (%rdi)
220 211 prefetchnta 32(%rdi)
221 212 .end
222 213
223 214 .inline prefetch_write_many,8
224 215 prefetcht0 (%rdi)
225 216 prefetcht0 32(%rdi)
226 217 .end
227 218
228 219 .inline prefetch_write_once,8
229 220 prefetcht0 (%rdi)
230 221 prefetcht0 32(%rdi)
231 222 .end
↓ open down ↓ |
146 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX