/[resiprocate]/main/contrib/db/btree/bt_curadj.c
ViewVC logotype

Contents of /main/contrib/db/btree/bt_curadj.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5295 - (show annotations) (download)
Mon Aug 22 00:30:05 2005 UTC (14 years, 2 months ago) by jason
File MIME type: text/plain
File size: 15624 byte(s)
merged 5270:HEAD from b-directory-reorg
1 /*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996-2004
5 * Sleepycat Software. All rights reserved.
6 *
7 * $Id: bt_curadj.c,v 11.37 2004/03/13 14:11:33 bostic Exp $
8 */
9
10 #include "db_config.h"
11
12 #ifndef NO_SYSTEM_INCLUDES
13 #include <sys/types.h>
14 #endif
15
16 #include "db_int.h"
17 #include "dbinc/db_page.h"
18 #include "dbinc/btree.h"
19
20 static int __bam_opd_cursor __P((DB *, DBC *, db_pgno_t, u_int32_t, u_int32_t));
21
22 /*
23 * Cursor adjustments are logged if they are for subtransactions. This is
24 * because it's possible for a subtransaction to adjust cursors which will
25 * still be active after the subtransaction aborts, and so which must be
26 * restored to their previous locations. Cursors that can be both affected
27 * by our cursor adjustments and active after our transaction aborts can
28 * only be found in our parent transaction -- cursors in other transactions,
29 * including other child transactions of our parent, must have conflicting
30 * locker IDs, and so cannot be affected by adjustments in this transaction.
31 */
32
33 /*
34 * __bam_ca_delete --
35 * Update the cursors when items are deleted and when already deleted
36 * items are overwritten. Return the number of relevant cursors found.
37 *
38 * PUBLIC: int __bam_ca_delete __P((DB *, db_pgno_t, u_int32_t, int));
39 */
40 int
41 __bam_ca_delete(dbp, pgno, indx, delete)
42 DB *dbp;
43 db_pgno_t pgno;
44 u_int32_t indx;
45 int delete;
46 {
47 BTREE_CURSOR *cp;
48 DB *ldbp;
49 DB_ENV *dbenv;
50 DBC *dbc;
51 int count; /* !!!: Has to contain max number of cursors. */
52
53 dbenv = dbp->dbenv;
54
55 /*
56 * Adjust the cursors. We have the page write locked, so the
57 * only other cursors that can be pointing at a page are
58 * those in the same thread of control. Unfortunately, we don't
59 * know that they're using the same DB handle, so traverse
60 * all matching DB handles in the same DB_ENV, then all cursors
61 * on each matching DB handle.
62 *
63 * Each cursor is single-threaded, so we only need to lock the
64 * list of DBs and then the list of cursors in each DB.
65 */
66 MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
67 for (count = 0, ldbp = __dblist_get(dbenv, dbp->adj_fileid);
68 ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
69 ldbp = LIST_NEXT(ldbp, dblistlinks)) {
70 MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
71 for (dbc = TAILQ_FIRST(&ldbp->active_queue);
72 dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
73 cp = (BTREE_CURSOR *)dbc->internal;
74 if (cp->pgno == pgno && cp->indx == indx) {
75 /*
76 * [#8032] This assert is checking
77 * for possible race conditions where we
78 * hold a cursor position without a lock.
79 * Unfortunately, there are paths in the
80 * Btree code that do not satisfy these
81 * conditions. None of them are known to
82 * be a problem, but this assert should
83 * be re-activated when the Btree stack
84 * code is re-written.
85 DB_ASSERT(!STD_LOCKING(dbc) ||
86 cp->lock_mode != DB_LOCK_NG);
87 */
88 if (delete)
89 F_SET(cp, C_DELETED);
90 else
91 F_CLR(cp, C_DELETED);
92 ++count;
93 }
94 }
95 MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
96 }
97 MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
98
99 return (count);
100 }
101
102 /*
103 * __ram_ca_delete --
104 * Return the number of relevant cursors.
105 *
106 * PUBLIC: int __ram_ca_delete __P((DB *, db_pgno_t));
107 */
108 int
109 __ram_ca_delete(dbp, root_pgno)
110 DB *dbp;
111 db_pgno_t root_pgno;
112 {
113 DB *ldbp;
114 DBC *dbc;
115 DB_ENV *dbenv;
116 int found;
117
118 found = 0;
119 dbenv = dbp->dbenv;
120
121 /*
122 * Review the cursors. See the comment in __bam_ca_delete().
123 */
124 MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
125 for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
126 found == 0 && ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
127 ldbp = LIST_NEXT(ldbp, dblistlinks)) {
128 MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
129 for (dbc = TAILQ_FIRST(&ldbp->active_queue);
130 found == 0 && dbc != NULL; dbc = TAILQ_NEXT(dbc, links))
131 if (dbc->internal->root == root_pgno)
132 found = 1;
133 MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
134 }
135 MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
136 return (found);
137 }
138
139 /*
140 * __bam_ca_di --
141 * Adjust the cursors during a delete or insert.
142 *
143 * PUBLIC: int __bam_ca_di __P((DBC *, db_pgno_t, u_int32_t, int));
144 */
145 int
146 __bam_ca_di(my_dbc, pgno, indx, adjust)
147 DBC *my_dbc;
148 db_pgno_t pgno;
149 u_int32_t indx;
150 int adjust;
151 {
152 DB *dbp, *ldbp;
153 DB_ENV *dbenv;
154 DB_LSN lsn;
155 DB_TXN *my_txn;
156 DBC *dbc;
157 DBC_INTERNAL *cp;
158 int found, ret;
159
160 dbp = my_dbc->dbp;
161 dbenv = dbp->dbenv;
162
163 my_txn = IS_SUBTRANSACTION(my_dbc->txn) ? my_dbc->txn : NULL;
164
165 /*
166 * Adjust the cursors. See the comment in __bam_ca_delete().
167 */
168 found = 0;
169 MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
170 for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
171 ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
172 ldbp = LIST_NEXT(ldbp, dblistlinks)) {
173 MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
174 for (dbc = TAILQ_FIRST(&ldbp->active_queue);
175 dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
176 if (dbc->dbtype == DB_RECNO)
177 continue;
178 cp = dbc->internal;
179 if (cp->pgno == pgno && cp->indx >= indx) {
180 /* Cursor indices should never be negative. */
181 DB_ASSERT(cp->indx != 0 || adjust > 0);
182 /* [#8032]
183 DB_ASSERT(!STD_LOCKING(dbc) ||
184 cp->lock_mode != DB_LOCK_NG);
185 */
186 cp->indx += adjust;
187 if (my_txn != NULL && dbc->txn != my_txn)
188 found = 1;
189 }
190 }
191 MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
192 }
193 MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
194
195 if (found != 0 && DBC_LOGGING(my_dbc)) {
196 if ((ret = __bam_curadj_log(dbp, my_dbc->txn, &lsn, 0,
197 DB_CA_DI, pgno, 0, 0, (u_int32_t)adjust, indx, 0)) != 0)
198 return (ret);
199 }
200
201 return (0);
202 }
203
204 /*
205 * __bam_opd_cursor -- create a new opd cursor.
206 */
207 static int
208 __bam_opd_cursor(dbp, dbc, first, tpgno, ti)
209 DB *dbp;
210 DBC *dbc;
211 db_pgno_t tpgno;
212 u_int32_t first, ti;
213 {
214 BTREE_CURSOR *cp, *orig_cp;
215 DBC *dbc_nopd;
216 int ret;
217
218 orig_cp = (BTREE_CURSOR *)dbc->internal;
219 dbc_nopd = NULL;
220
221 /*
222 * Allocate a new cursor and create the stack. If duplicates
223 * are sorted, we've just created an off-page duplicate Btree.
224 * If duplicates aren't sorted, we've just created a Recno tree.
225 *
226 * Note that in order to get here at all, there shouldn't be
227 * an old off-page dup cursor--to augment the checking db_c_newopd
228 * will do, assert this.
229 */
230 DB_ASSERT(orig_cp->opd == NULL);
231 if ((ret = __db_c_newopd(dbc, tpgno, orig_cp->opd, &dbc_nopd)) != 0)
232 return (ret);
233
234 cp = (BTREE_CURSOR *)dbc_nopd->internal;
235 cp->pgno = tpgno;
236 cp->indx = ti;
237
238 if (dbp->dup_compare == NULL) {
239 /*
240 * Converting to off-page Recno trees is tricky. The
241 * record number for the cursor is the index + 1 (to
242 * convert to 1-based record numbers).
243 */
244 cp->recno = ti + 1;
245 }
246
247 /*
248 * Transfer the deleted flag from the top-level cursor to the
249 * created one.
250 */
251 if (F_ISSET(orig_cp, C_DELETED)) {
252 F_SET(cp, C_DELETED);
253 F_CLR(orig_cp, C_DELETED);
254 }
255
256 /* Stack the cursors and reset the initial cursor's index. */
257 orig_cp->opd = dbc_nopd;
258 orig_cp->indx = first;
259 return (0);
260 }
261
262 /*
263 * __bam_ca_dup --
264 * Adjust the cursors when moving items from a leaf page to a duplicates
265 * page.
266 *
267 * PUBLIC: int __bam_ca_dup __P((DBC *,
268 * PUBLIC: u_int32_t, db_pgno_t, u_int32_t, db_pgno_t, u_int32_t));
269 */
270 int
271 __bam_ca_dup(my_dbc, first, fpgno, fi, tpgno, ti)
272 DBC *my_dbc;
273 db_pgno_t fpgno, tpgno;
274 u_int32_t first, fi, ti;
275 {
276 BTREE_CURSOR *orig_cp;
277 DB *dbp, *ldbp;
278 DBC *dbc;
279 DB_ENV *dbenv;
280 DB_LSN lsn;
281 DB_TXN *my_txn;
282 int found, ret;
283
284 dbp = my_dbc->dbp;
285 dbenv = dbp->dbenv;
286 my_txn = IS_SUBTRANSACTION(my_dbc->txn) ? my_dbc->txn : NULL;
287
288 /*
289 * Adjust the cursors. See the comment in __bam_ca_delete().
290 */
291 found = 0;
292 MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
293 for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
294 ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
295 ldbp = LIST_NEXT(ldbp, dblistlinks)) {
296 loop: MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
297 for (dbc = TAILQ_FIRST(&ldbp->active_queue);
298 dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
299 /* Find cursors pointing to this record. */
300 orig_cp = (BTREE_CURSOR *)dbc->internal;
301 if (orig_cp->pgno != fpgno || orig_cp->indx != fi)
302 continue;
303
304 /*
305 * Since we rescan the list see if this is already
306 * converted.
307 */
308 if (orig_cp->opd != NULL)
309 continue;
310
311 MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
312 /* [#8032]
313 DB_ASSERT(!STD_LOCKING(dbc) ||
314 orig_cp->lock_mode != DB_LOCK_NG);
315 */
316 if ((ret = __bam_opd_cursor(dbp,
317 dbc, first, tpgno, ti)) !=0)
318 return (ret);
319 if (my_txn != NULL && dbc->txn != my_txn)
320 found = 1;
321 /* We released the mutex to get a cursor, start over. */
322 goto loop;
323 }
324 MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
325 }
326 MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
327
328 if (found != 0 && DBC_LOGGING(my_dbc)) {
329 if ((ret = __bam_curadj_log(dbp, my_dbc->txn,
330 &lsn, 0, DB_CA_DUP, fpgno, tpgno, 0, first, fi, ti)) != 0)
331 return (ret);
332 }
333 return (0);
334 }
335
336 /*
337 * __bam_ca_undodup --
338 * Adjust the cursors when returning items to a leaf page
339 * from a duplicate page.
340 * Called only during undo processing.
341 *
342 * PUBLIC: int __bam_ca_undodup __P((DB *,
343 * PUBLIC: u_int32_t, db_pgno_t, u_int32_t, u_int32_t));
344 */
345 int
346 __bam_ca_undodup(dbp, first, fpgno, fi, ti)
347 DB *dbp;
348 db_pgno_t fpgno;
349 u_int32_t first, fi, ti;
350 {
351 BTREE_CURSOR *orig_cp;
352 DB *ldbp;
353 DBC *dbc;
354 DB_ENV *dbenv;
355 int ret;
356
357 dbenv = dbp->dbenv;
358
359 /*
360 * Adjust the cursors. See the comment in __bam_ca_delete().
361 */
362 MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
363 for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
364 ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
365 ldbp = LIST_NEXT(ldbp, dblistlinks)) {
366 loop: MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
367 for (dbc = TAILQ_FIRST(&ldbp->active_queue);
368 dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
369 orig_cp = (BTREE_CURSOR *)dbc->internal;
370
371 /*
372 * A note on the orig_cp->opd != NULL requirement here:
373 * it's possible that there's a cursor that refers to
374 * the same duplicate set, but which has no opd cursor,
375 * because it refers to a different item and we took
376 * care of it while processing a previous record.
377 */
378 if (orig_cp->pgno != fpgno ||
379 orig_cp->indx != first ||
380 orig_cp->opd == NULL ||
381 ((BTREE_CURSOR *)orig_cp->opd->internal)->indx
382 != ti)
383 continue;
384 MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
385 if ((ret = __db_c_close(orig_cp->opd)) != 0)
386 return (ret);
387 orig_cp->opd = NULL;
388 orig_cp->indx = fi;
389 /*
390 * We released the mutex to free a cursor,
391 * start over.
392 */
393 goto loop;
394 }
395 MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
396 }
397 MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
398
399 return (0);
400 }
401
402 /*
403 * __bam_ca_rsplit --
404 * Adjust the cursors when doing reverse splits.
405 *
406 * PUBLIC: int __bam_ca_rsplit __P((DBC *, db_pgno_t, db_pgno_t));
407 */
408 int
409 __bam_ca_rsplit(my_dbc, fpgno, tpgno)
410 DBC* my_dbc;
411 db_pgno_t fpgno, tpgno;
412 {
413 DB *dbp, *ldbp;
414 DBC *dbc;
415 DB_ENV *dbenv;
416 DB_LSN lsn;
417 DB_TXN *my_txn;
418 int found, ret;
419
420 dbp = my_dbc->dbp;
421 dbenv = dbp->dbenv;
422 my_txn = IS_SUBTRANSACTION(my_dbc->txn) ? my_dbc->txn : NULL;
423
424 /*
425 * Adjust the cursors. See the comment in __bam_ca_delete().
426 */
427 found = 0;
428 MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
429 for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
430 ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
431 ldbp = LIST_NEXT(ldbp, dblistlinks)) {
432 MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
433 for (dbc = TAILQ_FIRST(&ldbp->active_queue);
434 dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
435 if (dbc->dbtype == DB_RECNO)
436 continue;
437 if (dbc->internal->pgno == fpgno) {
438 dbc->internal->pgno = tpgno;
439 /* [#8032]
440 DB_ASSERT(!STD_LOCKING(dbc) ||
441 dbc->internal->lock_mode != DB_LOCK_NG);
442 */
443 if (my_txn != NULL && dbc->txn != my_txn)
444 found = 1;
445 }
446 }
447 MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
448 }
449 MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
450
451 if (found != 0 && DBC_LOGGING(my_dbc)) {
452 if ((ret = __bam_curadj_log(dbp, my_dbc->txn,
453 &lsn, 0, DB_CA_RSPLIT, fpgno, tpgno, 0, 0, 0, 0)) != 0)
454 return (ret);
455 }
456 return (0);
457 }
458
459 /*
460 * __bam_ca_split --
461 * Adjust the cursors when splitting a page.
462 *
463 * PUBLIC: int __bam_ca_split __P((DBC *,
464 * PUBLIC: db_pgno_t, db_pgno_t, db_pgno_t, u_int32_t, int));
465 */
466 int
467 __bam_ca_split(my_dbc, ppgno, lpgno, rpgno, split_indx, cleft)
468 DBC *my_dbc;
469 db_pgno_t ppgno, lpgno, rpgno;
470 u_int32_t split_indx;
471 int cleft;
472 {
473 DB *dbp, *ldbp;
474 DBC *dbc;
475 DBC_INTERNAL *cp;
476 DB_ENV *dbenv;
477 DB_LSN lsn;
478 DB_TXN *my_txn;
479 int found, ret;
480
481 dbp = my_dbc->dbp;
482 dbenv = dbp->dbenv;
483 my_txn = IS_SUBTRANSACTION(my_dbc->txn) ? my_dbc->txn : NULL;
484
485 /*
486 * Adjust the cursors. See the comment in __bam_ca_delete().
487 *
488 * If splitting the page that a cursor was on, the cursor has to be
489 * adjusted to point to the same record as before the split. Most
490 * of the time we don't adjust pointers to the left page, because
491 * we're going to copy its contents back over the original page. If
492 * the cursor is on the right page, it is decremented by the number of
493 * records split to the left page.
494 */
495 found = 0;
496 MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
497 for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
498 ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
499 ldbp = LIST_NEXT(ldbp, dblistlinks)) {
500 MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
501 for (dbc = TAILQ_FIRST(&ldbp->active_queue);
502 dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
503 if (dbc->dbtype == DB_RECNO)
504 continue;
505 cp = dbc->internal;
506 if (cp->pgno == ppgno) {
507 /* [#8032]
508 DB_ASSERT(!STD_LOCKING(dbc) ||
509 cp->lock_mode != DB_LOCK_NG);
510 */
511 if (my_txn != NULL && dbc->txn != my_txn)
512 found = 1;
513 if (cp->indx < split_indx) {
514 if (cleft)
515 cp->pgno = lpgno;
516 } else {
517 cp->pgno = rpgno;
518 cp->indx -= split_indx;
519 }
520 }
521 }
522 MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
523 }
524 MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
525
526 if (found != 0 && DBC_LOGGING(my_dbc)) {
527 if ((ret = __bam_curadj_log(dbp,
528 my_dbc->txn, &lsn, 0, DB_CA_SPLIT, ppgno, rpgno,
529 cleft ? lpgno : PGNO_INVALID, 0, split_indx, 0)) != 0)
530 return (ret);
531 }
532
533 return (0);
534 }
535
536 /*
537 * __bam_ca_undosplit --
538 * Adjust the cursors when undoing a split of a page.
539 * If we grew a level we will execute this for both the
540 * left and the right pages.
541 * Called only during undo processing.
542 *
543 * PUBLIC: void __bam_ca_undosplit __P((DB *,
544 * PUBLIC: db_pgno_t, db_pgno_t, db_pgno_t, u_int32_t));
545 */
546 void
547 __bam_ca_undosplit(dbp, frompgno, topgno, lpgno, split_indx)
548 DB *dbp;
549 db_pgno_t frompgno, topgno, lpgno;
550 u_int32_t split_indx;
551 {
552 DB *ldbp;
553 DBC *dbc;
554 DB_ENV *dbenv;
555 DBC_INTERNAL *cp;
556
557 dbenv = dbp->dbenv;
558
559 /*
560 * Adjust the cursors. See the comment in __bam_ca_delete().
561 *
562 * When backing out a split, we move the cursor back
563 * to the original offset and bump it by the split_indx.
564 */
565 MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
566 for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
567 ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
568 ldbp = LIST_NEXT(ldbp, dblistlinks)) {
569 MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
570 for (dbc = TAILQ_FIRST(&ldbp->active_queue);
571 dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
572 if (dbc->dbtype == DB_RECNO)
573 continue;
574 cp = dbc->internal;
575 if (cp->pgno == topgno) {
576 cp->pgno = frompgno;
577 cp->indx += split_indx;
578 } else if (cp->pgno == lpgno)
579 cp->pgno = frompgno;
580 }
581 MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
582 }
583 MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
584 }

webmaster AT resiprocate DOT org
ViewVC Help
Powered by ViewVC 1.1.27