/[resiprocate]/main/contrib/db/btree/bt_rsearch.c
ViewVC logotype

Contents of /main/contrib/db/btree/bt_rsearch.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5295 - (show annotations) (download)
Mon Aug 22 00:30:05 2005 UTC (14 years, 2 months ago) by jason
File MIME type: text/plain
File size: 11858 byte(s)
merged 5270:HEAD from b-directory-reorg
1 /*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996-2004
5 * Sleepycat Software. All rights reserved.
6 */
7 /*
8 * Copyright (c) 1990, 1993, 1994, 1995, 1996
9 * Keith Bostic. All rights reserved.
10 */
11 /*
12 * Copyright (c) 1990, 1993
13 * The Regents of the University of California. All rights reserved.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 * 3. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * $Id: bt_rsearch.c,v 11.40 2004/07/23 17:21:09 bostic Exp $
40 */
41
42 #include "db_config.h"
43
44 #ifndef NO_SYSTEM_INCLUDES
45 #include <sys/types.h>
46 #endif
47
48 #include "db_int.h"
49 #include "dbinc/db_page.h"
50 #include "dbinc/btree.h"
51 #include "dbinc/db_shash.h"
52 #include "dbinc/lock.h"
53 #include "dbinc/mp.h"
54
55 /*
56 * __bam_rsearch --
57 * Search a btree for a record number.
58 *
59 * PUBLIC: int __bam_rsearch __P((DBC *, db_recno_t *, u_int32_t, int, int *));
60 */
61 int
62 __bam_rsearch(dbc, recnop, flags, stop, exactp)
63 DBC *dbc;
64 db_recno_t *recnop;
65 u_int32_t flags;
66 int stop, *exactp;
67 {
68 BINTERNAL *bi;
69 BTREE_CURSOR *cp;
70 DB *dbp;
71 DB_LOCK lock;
72 DB_MPOOLFILE *mpf;
73 PAGE *h;
74 RINTERNAL *ri;
75 db_indx_t adjust, deloffset, indx, top;
76 db_lockmode_t lock_mode;
77 db_pgno_t pg;
78 db_recno_t recno, t_recno, total;
79 int ret, stack, t_ret;
80
81 dbp = dbc->dbp;
82 mpf = dbp->mpf;
83 cp = (BTREE_CURSOR *)dbc->internal;
84 h = NULL;
85
86 BT_STK_CLR(cp);
87
88 /*
89 * There are several ways we search a btree tree. The flags argument
90 * specifies if we're acquiring read or write locks and if we are
91 * locking pairs of pages. In addition, if we're adding or deleting
92 * an item, we have to lock the entire tree, regardless. See btree.h
93 * for more details.
94 *
95 * If write-locking pages, we need to know whether or not to acquire a
96 * write lock on a page before getting it. This depends on how deep it
97 * is in tree, which we don't know until we acquire the root page. So,
98 * if we need to lock the root page we may have to upgrade it later,
99 * because we won't get the correct lock initially.
100 *
101 * Retrieve the root page.
102 */
103 pg = cp->root;
104 stack = LF_ISSET(S_STACK) ? 1 : 0;
105 lock_mode = stack ? DB_LOCK_WRITE : DB_LOCK_READ;
106 if ((ret = __db_lget(dbc, 0, pg, lock_mode, 0, &lock)) != 0)
107 return (ret);
108 if ((ret = __memp_fget(mpf, &pg, 0, &h)) != 0) {
109 /* Did not read it, so we can release the lock */
110 (void)__LPUT(dbc, lock);
111 return (ret);
112 }
113
114 /*
115 * Decide if we need to save this page; if we do, write lock it.
116 * We deliberately don't lock-couple on this call. If the tree
117 * is tiny, i.e., one page, and two threads are busily updating
118 * the root page, we're almost guaranteed deadlocks galore, as
119 * each one gets a read lock and then blocks the other's attempt
120 * for a write lock.
121 */
122 if (!stack &&
123 ((LF_ISSET(S_PARENT) && (u_int8_t)(stop + 1) >= h->level) ||
124 (LF_ISSET(S_WRITE) && h->level == LEAFLEVEL))) {
125 ret = __memp_fput(mpf, h, 0);
126 if ((t_ret = __LPUT(dbc, lock)) != 0 && ret == 0)
127 ret = t_ret;
128 if (ret != 0)
129 return (ret);
130 lock_mode = DB_LOCK_WRITE;
131 if ((ret = __db_lget(dbc, 0, pg, lock_mode, 0, &lock)) != 0)
132 return (ret);
133 if ((ret = __memp_fget(mpf, &pg, 0, &h)) != 0) {
134 /* Did not read it, so we can release the lock */
135 (void)__LPUT(dbc, lock);
136 return (ret);
137 }
138 stack = 1;
139 }
140
141 /*
142 * If appending to the tree, set the record number now -- we have the
143 * root page locked.
144 *
145 * Delete only deletes exact matches, read only returns exact matches.
146 * Note, this is different from __bam_search(), which returns non-exact
147 * matches for read.
148 *
149 * The record may not exist. We can only return the correct location
150 * for the record immediately after the last record in the tree, so do
151 * a fast check now.
152 */
153 total = RE_NREC(h);
154 if (LF_ISSET(S_APPEND)) {
155 *exactp = 0;
156 *recnop = recno = total + 1;
157 } else {
158 recno = *recnop;
159 if (recno <= total)
160 *exactp = 1;
161 else {
162 *exactp = 0;
163 if (!LF_ISSET(S_PAST_EOF) || recno > total + 1) {
164 /*
165 * Keep the page locked for serializability.
166 *
167 * XXX
168 * This leaves the root page locked, which will
169 * eliminate any concurrency. A possible fix
170 * would be to lock the last leaf page instead.
171 */
172 ret = __memp_fput(mpf, h, 0);
173 if ((t_ret =
174 __TLPUT(dbc, lock)) != 0 && ret == 0)
175 ret = t_ret;
176 return (ret == 0 ? DB_NOTFOUND : ret);
177 }
178 }
179 }
180
181 /*
182 * !!!
183 * Record numbers in the tree are 0-based, but the recno is
184 * 1-based. All of the calculations below have to take this
185 * into account.
186 */
187 for (total = 0;;) {
188 switch (TYPE(h)) {
189 case P_LBTREE:
190 case P_LDUP:
191 recno -= total;
192 /*
193 * There may be logically deleted records on the page.
194 * If there are enough, the record may not exist.
195 */
196 if (TYPE(h) == P_LBTREE) {
197 adjust = P_INDX;
198 deloffset = O_INDX;
199 } else {
200 adjust = O_INDX;
201 deloffset = 0;
202 }
203 for (t_recno = 0, indx = 0;; indx += adjust) {
204 if (indx >= NUM_ENT(h)) {
205 *exactp = 0;
206 if (!LF_ISSET(S_PAST_EOF) ||
207 recno > t_recno + 1) {
208 ret = __memp_fput(mpf, h, 0);
209 h = NULL;
210 if ((t_ret = __TLPUT(dbc,
211 lock)) != 0 && ret == 0)
212 ret = t_ret;
213 if (ret == 0)
214 ret = DB_NOTFOUND;
215 goto err;
216 }
217 }
218 if (!B_DISSET(GET_BKEYDATA(dbp, h,
219 indx + deloffset)->type) &&
220 ++t_recno == recno)
221 break;
222 }
223
224 /* Correct from 1-based to 0-based for a page offset. */
225 BT_STK_ENTER(dbp->dbenv,
226 cp, h, indx, lock, lock_mode, ret);
227 if (ret != 0)
228 goto err;
229 return (0);
230 case P_IBTREE:
231 for (indx = 0, top = NUM_ENT(h);;) {
232 bi = GET_BINTERNAL(dbp, h, indx);
233 if (++indx == top || total + bi->nrecs >= recno)
234 break;
235 total += bi->nrecs;
236 }
237 pg = bi->pgno;
238 break;
239 case P_LRECNO:
240 recno -= total;
241
242 /* Correct from 1-based to 0-based for a page offset. */
243 --recno;
244 BT_STK_ENTER(dbp->dbenv,
245 cp, h, recno, lock, lock_mode, ret);
246 if (ret != 0)
247 goto err;
248 return (0);
249 case P_IRECNO:
250 for (indx = 0, top = NUM_ENT(h);;) {
251 ri = GET_RINTERNAL(dbp, h, indx);
252 if (++indx == top || total + ri->nrecs >= recno)
253 break;
254 total += ri->nrecs;
255 }
256 pg = ri->pgno;
257 break;
258 default:
259 return (__db_pgfmt(dbp->dbenv, h->pgno));
260 }
261 --indx;
262
263 if (stack) {
264 /* Return if this is the lowest page wanted. */
265 if (LF_ISSET(S_PARENT) && stop == h->level) {
266 BT_STK_ENTER(dbp->dbenv,
267 cp, h, indx, lock, lock_mode, ret);
268 if (ret != 0)
269 goto err;
270 return (0);
271 }
272 BT_STK_PUSH(dbp->dbenv,
273 cp, h, indx, lock, lock_mode, ret);
274 if (ret != 0)
275 goto err;
276 h = NULL;
277
278 lock_mode = DB_LOCK_WRITE;
279 if ((ret =
280 __db_lget(dbc, 0, pg, lock_mode, 0, &lock)) != 0)
281 goto err;
282 } else {
283 /*
284 * Decide if we want to return a pointer to the next
285 * page in the stack. If we do, write lock it and
286 * never unlock it.
287 */
288 if ((LF_ISSET(S_PARENT) &&
289 (u_int8_t)(stop + 1) >= (u_int8_t)(h->level - 1)) ||
290 (h->level - 1) == LEAFLEVEL)
291 stack = 1;
292
293 if ((ret = __memp_fput(mpf, h, 0)) != 0)
294 goto err;
295 h = NULL;
296
297 lock_mode = stack &&
298 LF_ISSET(S_WRITE) ? DB_LOCK_WRITE : DB_LOCK_READ;
299 if ((ret = __db_lget(dbc,
300 LCK_COUPLE_ALWAYS, pg, lock_mode, 0, &lock)) != 0) {
301 /*
302 * If we fail, discard the lock we held. This
303 * is OK because this only happens when we are
304 * descending the tree holding read-locks.
305 */
306 (void)__LPUT(dbc, lock);
307 goto err;
308 }
309 }
310
311 if ((ret = __memp_fget(mpf, &pg, 0, &h)) != 0)
312 goto err;
313 }
314 /* NOTREACHED */
315
316 err: if (h != NULL && (t_ret = __memp_fput(mpf, h, 0)) != 0 && ret == 0)
317 ret = t_ret;
318
319 BT_STK_POP(cp);
320 __bam_stkrel(dbc, 0);
321
322 return (ret);
323 }
324
325 /*
326 * __bam_adjust --
327 * Adjust the tree after adding or deleting a record.
328 *
329 * PUBLIC: int __bam_adjust __P((DBC *, int32_t));
330 */
331 int
332 __bam_adjust(dbc, adjust)
333 DBC *dbc;
334 int32_t adjust;
335 {
336 BTREE_CURSOR *cp;
337 DB *dbp;
338 DB_MPOOLFILE *mpf;
339 EPG *epg;
340 PAGE *h;
341 db_pgno_t root_pgno;
342 int ret;
343
344 dbp = dbc->dbp;
345 mpf = dbp->mpf;
346 cp = (BTREE_CURSOR *)dbc->internal;
347 root_pgno = cp->root;
348
349 /* Update the record counts for the tree. */
350 for (epg = cp->sp; epg <= cp->csp; ++epg) {
351 h = epg->page;
352 if (TYPE(h) == P_IBTREE || TYPE(h) == P_IRECNO) {
353 if (DBC_LOGGING(dbc)) {
354 if ((ret = __bam_cadjust_log(dbp, dbc->txn,
355 &LSN(h), 0, PGNO(h), &LSN(h),
356 (u_int32_t)epg->indx, adjust,
357 PGNO(h) == root_pgno ?
358 CAD_UPDATEROOT : 0)) != 0)
359 return (ret);
360 } else
361 LSN_NOT_LOGGED(LSN(h));
362
363 if (TYPE(h) == P_IBTREE)
364 GET_BINTERNAL(dbp, h, epg->indx)->nrecs +=
365 adjust;
366 else
367 GET_RINTERNAL(dbp, h, epg->indx)->nrecs +=
368 adjust;
369
370 if (PGNO(h) == root_pgno)
371 RE_NREC_ADJ(h, adjust);
372
373 if ((ret = __memp_fset(mpf, h, DB_MPOOL_DIRTY)) != 0)
374 return (ret);
375 }
376 }
377 return (0);
378 }
379
380 /*
381 * __bam_nrecs --
382 * Return the number of records in the tree.
383 *
384 * PUBLIC: int __bam_nrecs __P((DBC *, db_recno_t *));
385 */
386 int
387 __bam_nrecs(dbc, rep)
388 DBC *dbc;
389 db_recno_t *rep;
390 {
391 DB *dbp;
392 DB_LOCK lock;
393 DB_MPOOLFILE *mpf;
394 PAGE *h;
395 db_pgno_t pgno;
396 int ret, t_ret;
397
398 dbp = dbc->dbp;
399 mpf = dbp->mpf;
400
401 pgno = dbc->internal->root;
402 if ((ret = __db_lget(dbc, 0, pgno, DB_LOCK_READ, 0, &lock)) != 0)
403 return (ret);
404 if ((ret = __memp_fget(mpf, &pgno, 0, &h)) != 0)
405 return (ret);
406
407 *rep = RE_NREC(h);
408
409 ret = __memp_fput(mpf, h, 0);
410 if ((t_ret = __TLPUT(dbc, lock)) != 0 && ret == 0)
411 ret = t_ret;
412
413 return (ret);
414 }
415
416 /*
417 * __bam_total --
418 * Return the number of records below a page.
419 *
420 * PUBLIC: db_recno_t __bam_total __P((DB *, PAGE *));
421 */
422 db_recno_t
423 __bam_total(dbp, h)
424 DB *dbp;
425 PAGE *h;
426 {
427 db_recno_t nrecs;
428 db_indx_t indx, top;
429
430 nrecs = 0;
431 top = NUM_ENT(h);
432
433 switch (TYPE(h)) {
434 case P_LBTREE:
435 /* Check for logically deleted records. */
436 for (indx = 0; indx < top; indx += P_INDX)
437 if (!B_DISSET(
438 GET_BKEYDATA(dbp, h, indx + O_INDX)->type))
439 ++nrecs;
440 break;
441 case P_LDUP:
442 /* Check for logically deleted records. */
443 for (indx = 0; indx < top; indx += O_INDX)
444 if (!B_DISSET(GET_BKEYDATA(dbp, h, indx)->type))
445 ++nrecs;
446 break;
447 case P_IBTREE:
448 for (indx = 0; indx < top; indx += O_INDX)
449 nrecs += GET_BINTERNAL(dbp, h, indx)->nrecs;
450 break;
451 case P_LRECNO:
452 nrecs = NUM_ENT(h);
453 break;
454 case P_IRECNO:
455 for (indx = 0; indx < top; indx += O_INDX)
456 nrecs += GET_RINTERNAL(dbp, h, indx)->nrecs;
457 break;
458 }
459
460 return (nrecs);
461 }

webmaster AT resiprocate DOT org
ViewVC Help
Powered by ViewVC 1.1.27