/[resiprocate]/main/contrib/db/btree/bt_curadj.c
ViewVC logotype

Contents of /main/contrib/db/btree/bt_curadj.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 9632 - (show annotations) (download)
Wed May 2 19:24:54 2012 UTC (7 years, 7 months ago) by sgodin
File MIME type: text/plain
File size: 15913 byte(s)
-update BerkeleyDb to 4.8 in contrib
-added MaxMind GeoIP library to contrib
-added MySQL client connector library to contrib
1 /*-
2 * See the file LICENSE for redistribution information.
3 *
4 * Copyright (c) 1996-2009 Oracle. All rights reserved.
5 *
6 * $Id$
7 */
8
9 #include "db_config.h"
10
11 #include "db_int.h"
12 #include "dbinc/db_page.h"
13 #include "dbinc/btree.h"
14 #include "dbinc/mp.h"
15
16 static int __bam_opd_cursor __P((DB *, DBC *, db_pgno_t, u_int32_t, u_int32_t));
17
18 /*
19 * Cursor adjustments are logged if they are for subtransactions. This is
20 * because it's possible for a subtransaction to adjust cursors which will
21 * still be active after the subtransaction aborts, and so which must be
22 * restored to their previous locations. Cursors that can be both affected
23 * by our cursor adjustments and active after our transaction aborts can
24 * only be found in our parent transaction -- cursors in other transactions,
25 * including other child transactions of our parent, must have conflicting
26 * locker IDs, and so cannot be affected by adjustments in this transaction.
27 */
28
29 /*
30 * __bam_ca_delete --
31 * Update the cursors when items are deleted and when already deleted
32 * items are overwritten. Return the number of relevant cursors found.
33 *
34 * PUBLIC: int __bam_ca_delete __P((DB *, db_pgno_t, u_int32_t, int, int *));
35 */
36 int
37 __bam_ca_delete(dbp, pgno, indx, delete, countp)
38 DB *dbp;
39 db_pgno_t pgno;
40 u_int32_t indx;
41 int delete, *countp;
42 {
43 BTREE_CURSOR *cp;
44 DB *ldbp;
45 DBC *dbc;
46 ENV *env;
47 int count; /* !!!: Has to contain max number of cursors. */
48
49 env = dbp->env;
50
51 /*
52 * Adjust the cursors. We have the page write locked, so the
53 * only other cursors that can be pointing at a page are
54 * those in the same thread of control. Unfortunately, we don't
55 * know that they're using the same DB handle, so traverse
56 * all matching DB handles in the same ENV, then all cursors
57 * on each matching DB handle.
58 *
59 * Each cursor is single-threaded, so we only need to lock the
60 * list of DBs and then the list of cursors in each DB.
61 */
62 MUTEX_LOCK(env, env->mtx_dblist);
63 FIND_FIRST_DB_MATCH(env, dbp, ldbp);
64 for (count = 0;
65 ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
66 ldbp = TAILQ_NEXT(ldbp, dblistlinks)) {
67 MUTEX_LOCK(env, dbp->mutex);
68 TAILQ_FOREACH(dbc, &ldbp->active_queue, links) {
69 cp = (BTREE_CURSOR *)dbc->internal;
70 if (cp->pgno == pgno && cp->indx == indx &&
71 !MVCC_SKIP_CURADJ(dbc, pgno)) {
72 /*
73 * [#8032] This assert is checking
74 * for possible race conditions where we
75 * hold a cursor position without a lock.
76 * Unfortunately, there are paths in the
77 * Btree code that do not satisfy these
78 * conditions. None of them are known to
79 * be a problem, but this assert should
80 * be re-activated when the Btree stack
81 * code is re-written.
82 DB_ASSERT(env, !STD_LOCKING(dbc) ||
83 cp->lock_mode != DB_LOCK_NG);
84 */
85 if (delete) {
86 F_SET(cp, C_DELETED);
87 /*
88 * If we're deleting the item, we can't
89 * keep a streaming offset cached.
90 */
91 cp->stream_start_pgno = PGNO_INVALID;
92 } else
93 F_CLR(cp, C_DELETED);
94
95 #ifdef HAVE_COMPRESSION
96 /*
97 * We also set the C_COMPRESS_MODIFIED flag,
98 * which prompts the compression code to look
99 * for it's current entry again if it needs to.
100 *
101 * The flag isn't cleared, because the
102 * compression code still needs to do that even
103 * for an entry that becomes undeleted.
104 *
105 * This flag also needs to be set if an entry is
106 * updated, but since the compression code
107 * always deletes before an update, setting it
108 * here is sufficient.
109 */
110 F_SET(cp, C_COMPRESS_MODIFIED);
111 #endif
112
113 ++count;
114 }
115 }
116 MUTEX_UNLOCK(env, dbp->mutex);
117 }
118 MUTEX_UNLOCK(env, env->mtx_dblist);
119
120 if (countp != NULL)
121 *countp = count;
122 return (0);
123 }
124
125 /*
126 * __ram_ca_delete --
127 * Return if any relevant cursors found.
128 *
129 * PUBLIC: int __ram_ca_delete __P((DB *, db_pgno_t, int *));
130 */
131 int
132 __ram_ca_delete(dbp, root_pgno, foundp)
133 DB *dbp;
134 db_pgno_t root_pgno;
135 int *foundp;
136 {
137 DB *ldbp;
138 DBC *dbc;
139 ENV *env;
140 int found;
141
142 env = dbp->env;
143
144 /*
145 * Review the cursors. See the comment in __bam_ca_delete().
146 */
147 MUTEX_LOCK(env, env->mtx_dblist);
148 FIND_FIRST_DB_MATCH(env, dbp, ldbp);
149 for (found = 0;
150 found == 0 && ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
151 ldbp = TAILQ_NEXT(ldbp, dblistlinks)) {
152 MUTEX_LOCK(env, dbp->mutex);
153 TAILQ_FOREACH(dbc, &ldbp->active_queue, links)
154 if (dbc->internal->root == root_pgno &&
155 !MVCC_SKIP_CURADJ(dbc, root_pgno)) {
156 found = 1;
157 break;
158 }
159 MUTEX_UNLOCK(env, dbp->mutex);
160 }
161 MUTEX_UNLOCK(env, env->mtx_dblist);
162
163 *foundp = found;
164 return (0);
165 }
166
167 /*
168 * __bam_ca_di --
169 * Adjust the cursors during a delete or insert.
170 *
171 * PUBLIC: int __bam_ca_di __P((DBC *, db_pgno_t, u_int32_t, int));
172 */
173 int
174 __bam_ca_di(my_dbc, pgno, indx, adjust)
175 DBC *my_dbc;
176 db_pgno_t pgno;
177 u_int32_t indx;
178 int adjust;
179 {
180 DB *dbp, *ldbp;
181 DBC *dbc;
182 DBC_INTERNAL *cp;
183 DB_LSN lsn;
184 DB_TXN *my_txn;
185 ENV *env;
186 int found, ret;
187
188 dbp = my_dbc->dbp;
189 env = dbp->env;
190
191 my_txn = IS_SUBTRANSACTION(my_dbc->txn) ? my_dbc->txn : NULL;
192
193 /*
194 * Adjust the cursors. See the comment in __bam_ca_delete().
195 */
196 MUTEX_LOCK(env, env->mtx_dblist);
197 FIND_FIRST_DB_MATCH(env, dbp, ldbp);
198 for (found = 0;
199 ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
200 ldbp = TAILQ_NEXT(ldbp, dblistlinks)) {
201 MUTEX_LOCK(env, dbp->mutex);
202 TAILQ_FOREACH(dbc, &ldbp->active_queue, links) {
203 if (dbc->dbtype == DB_RECNO)
204 continue;
205 cp = dbc->internal;
206 if (cp->pgno == pgno && cp->indx >= indx &&
207 (dbc == my_dbc || !MVCC_SKIP_CURADJ(dbc, pgno))) {
208 /* Cursor indices should never be negative. */
209 DB_ASSERT(env, cp->indx != 0 || adjust > 0);
210 /* [#8032]
211 DB_ASSERT(env, !STD_LOCKING(dbc) ||
212 cp->lock_mode != DB_LOCK_NG);
213 */
214 cp->indx += adjust;
215 if (my_txn != NULL && dbc->txn != my_txn)
216 found = 1;
217 }
218 }
219 MUTEX_UNLOCK(env, dbp->mutex);
220 }
221 MUTEX_UNLOCK(env, env->mtx_dblist);
222
223 if (found != 0 && DBC_LOGGING(my_dbc)) {
224 if ((ret = __bam_curadj_log(dbp, my_dbc->txn, &lsn, 0,
225 DB_CA_DI, pgno, 0, 0, (u_int32_t)adjust, indx, 0)) != 0)
226 return (ret);
227 }
228
229 return (0);
230 }
231
232 /*
233 * __bam_opd_cursor -- create a new opd cursor.
234 */
235 static int
236 __bam_opd_cursor(dbp, dbc, first, tpgno, ti)
237 DB *dbp;
238 DBC *dbc;
239 db_pgno_t tpgno;
240 u_int32_t first, ti;
241 {
242 BTREE_CURSOR *cp, *orig_cp;
243 DBC *dbc_nopd;
244 int ret;
245
246 orig_cp = (BTREE_CURSOR *)dbc->internal;
247 dbc_nopd = NULL;
248
249 /*
250 * Allocate a new cursor and create the stack. If duplicates
251 * are sorted, we've just created an off-page duplicate Btree.
252 * If duplicates aren't sorted, we've just created a Recno tree.
253 *
254 * Note that in order to get here at all, there shouldn't be
255 * an old off-page dup cursor--to augment the checking dbc_newopd
256 * will do, assert this.
257 */
258 DB_ASSERT(dbp->env, orig_cp->opd == NULL);
259 if ((ret = __dbc_newopd(dbc, tpgno, orig_cp->opd, &dbc_nopd)) != 0)
260 return (ret);
261
262 cp = (BTREE_CURSOR *)dbc_nopd->internal;
263 cp->pgno = tpgno;
264 cp->indx = ti;
265
266 if (dbp->dup_compare == NULL) {
267 /*
268 * Converting to off-page Recno trees is tricky. The
269 * record number for the cursor is the index + 1 (to
270 * convert to 1-based record numbers).
271 */
272 cp->recno = ti + 1;
273 }
274
275 /*
276 * Transfer the deleted flag from the top-level cursor to the
277 * created one.
278 */
279 if (F_ISSET(orig_cp, C_DELETED)) {
280 F_SET(cp, C_DELETED);
281 F_CLR(orig_cp, C_DELETED);
282 }
283
284 /* Stack the cursors and reset the initial cursor's index. */
285 orig_cp->opd = dbc_nopd;
286 orig_cp->indx = first;
287 return (0);
288 }
289
290 /*
291 * __bam_ca_dup --
292 * Adjust the cursors when moving items from a leaf page to a duplicates
293 * page.
294 *
295 * PUBLIC: int __bam_ca_dup __P((DBC *,
296 * PUBLIC: u_int32_t, db_pgno_t, u_int32_t, db_pgno_t, u_int32_t));
297 */
298 int
299 __bam_ca_dup(my_dbc, first, fpgno, fi, tpgno, ti)
300 DBC *my_dbc;
301 db_pgno_t fpgno, tpgno;
302 u_int32_t first, fi, ti;
303 {
304 BTREE_CURSOR *orig_cp;
305 DB *dbp, *ldbp;
306 DBC *dbc;
307 DB_LSN lsn;
308 DB_TXN *my_txn;
309 ENV *env;
310 int found, ret, t_ret;
311
312 dbp = my_dbc->dbp;
313 env = dbp->env;
314 my_txn = IS_SUBTRANSACTION(my_dbc->txn) ? my_dbc->txn : NULL;
315 ret = 0;
316
317 /*
318 * Adjust the cursors. See the comment in __bam_ca_delete().
319 */
320 MUTEX_LOCK(env, env->mtx_dblist);
321 FIND_FIRST_DB_MATCH(env, dbp, ldbp);
322 for (found = 0;
323 ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
324 ldbp = TAILQ_NEXT(ldbp, dblistlinks)) {
325 loop: MUTEX_LOCK(env, dbp->mutex);
326 TAILQ_FOREACH(dbc, &ldbp->active_queue, links) {
327 /* Find cursors pointing to this record. */
328 orig_cp = (BTREE_CURSOR *)dbc->internal;
329 if (orig_cp->pgno != fpgno || orig_cp->indx != fi ||
330 MVCC_SKIP_CURADJ(dbc, fpgno))
331 continue;
332
333 /*
334 * Since we rescan the list see if this is already
335 * converted.
336 */
337 if (orig_cp->opd != NULL)
338 continue;
339
340 MUTEX_UNLOCK(env, dbp->mutex);
341 /* [#8032]
342 DB_ASSERT(env, !STD_LOCKING(dbc) ||
343 orig_cp->lock_mode != DB_LOCK_NG);
344 */
345 if ((ret = __bam_opd_cursor(dbp,
346 dbc, first, tpgno, ti)) != 0)
347 goto err;
348 if (my_txn != NULL && dbc->txn != my_txn)
349 found = 1;
350 /* We released the mutex to get a cursor, start over. */
351 goto loop;
352 }
353 MUTEX_UNLOCK(env, dbp->mutex);
354 }
355 err: MUTEX_UNLOCK(env, env->mtx_dblist);
356
357 if (found != 0 && DBC_LOGGING(my_dbc)) {
358 if ((t_ret = __bam_curadj_log(dbp, my_dbc->txn,
359 &lsn, 0, DB_CA_DUP, fpgno, tpgno, 0, first, fi, ti)) != 0 &&
360 ret == 0)
361 ret = t_ret;
362 }
363
364 return (ret);
365 }
366
367 /*
368 * __bam_ca_undodup --
369 * Adjust the cursors when returning items to a leaf page
370 * from a duplicate page.
371 * Called only during undo processing.
372 *
373 * PUBLIC: int __bam_ca_undodup __P((DB *,
374 * PUBLIC: u_int32_t, db_pgno_t, u_int32_t, u_int32_t));
375 */
376 int
377 __bam_ca_undodup(dbp, first, fpgno, fi, ti)
378 DB *dbp;
379 db_pgno_t fpgno;
380 u_int32_t first, fi, ti;
381 {
382 BTREE_CURSOR *orig_cp;
383 DB *ldbp;
384 DBC *dbc;
385 ENV *env;
386 int ret;
387
388 env = dbp->env;
389 ret = 0;
390
391 /*
392 * Adjust the cursors. See the comment in __bam_ca_delete().
393 */
394 MUTEX_LOCK(env, env->mtx_dblist);
395 FIND_FIRST_DB_MATCH(env, dbp, ldbp);
396 for (;
397 ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
398 ldbp = TAILQ_NEXT(ldbp, dblistlinks)) {
399 loop: MUTEX_LOCK(env, dbp->mutex);
400 TAILQ_FOREACH(dbc, &ldbp->active_queue, links) {
401 orig_cp = (BTREE_CURSOR *)dbc->internal;
402
403 /*
404 * A note on the orig_cp->opd != NULL requirement here:
405 * it's possible that there's a cursor that refers to
406 * the same duplicate set, but which has no opd cursor,
407 * because it refers to a different item and we took
408 * care of it while processing a previous record.
409 */
410 if (orig_cp->pgno != fpgno ||
411 orig_cp->indx != first ||
412 orig_cp->opd == NULL || ((BTREE_CURSOR *)
413 orig_cp->opd->internal)->indx != ti ||
414 MVCC_SKIP_CURADJ(dbc, fpgno))
415 continue;
416 MUTEX_UNLOCK(env, dbp->mutex);
417 if ((ret = __dbc_close(orig_cp->opd)) != 0)
418 goto err;
419 orig_cp->opd = NULL;
420 orig_cp->indx = fi;
421 /*
422 * We released the mutex to free a cursor,
423 * start over.
424 */
425 goto loop;
426 }
427 MUTEX_UNLOCK(env, dbp->mutex);
428 }
429 err: MUTEX_UNLOCK(env, env->mtx_dblist);
430
431 return (ret);
432 }
433
434 /*
435 * __bam_ca_rsplit --
436 * Adjust the cursors when doing reverse splits.
437 *
438 * PUBLIC: int __bam_ca_rsplit __P((DBC *, db_pgno_t, db_pgno_t));
439 */
440 int
441 __bam_ca_rsplit(my_dbc, fpgno, tpgno)
442 DBC* my_dbc;
443 db_pgno_t fpgno, tpgno;
444 {
445 DB *dbp, *ldbp;
446 DBC *dbc;
447 DB_LSN lsn;
448 DB_TXN *my_txn;
449 ENV *env;
450 int found, ret;
451
452 dbp = my_dbc->dbp;
453 env = dbp->env;
454 my_txn = IS_SUBTRANSACTION(my_dbc->txn) ? my_dbc->txn : NULL;
455
456 /*
457 * Adjust the cursors. See the comment in __bam_ca_delete().
458 */
459 MUTEX_LOCK(env, env->mtx_dblist);
460 FIND_FIRST_DB_MATCH(env, dbp, ldbp);
461 for (found = 0;
462 ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
463 ldbp = TAILQ_NEXT(ldbp, dblistlinks)) {
464 MUTEX_LOCK(env, dbp->mutex);
465 TAILQ_FOREACH(dbc, &ldbp->active_queue, links) {
466 if (dbc->dbtype == DB_RECNO)
467 continue;
468 if (dbc->internal->pgno == fpgno &&
469 !MVCC_SKIP_CURADJ(dbc, fpgno)) {
470 dbc->internal->pgno = tpgno;
471 /* [#8032]
472 DB_ASSERT(env, !STD_LOCKING(dbc) ||
473 dbc->internal->lock_mode != DB_LOCK_NG);
474 */
475 if (my_txn != NULL && dbc->txn != my_txn)
476 found = 1;
477 }
478 }
479 MUTEX_UNLOCK(env, dbp->mutex);
480 }
481 MUTEX_UNLOCK(env, env->mtx_dblist);
482
483 if (found != 0 && DBC_LOGGING(my_dbc)) {
484 if ((ret = __bam_curadj_log(dbp, my_dbc->txn,
485 &lsn, 0, DB_CA_RSPLIT, fpgno, tpgno, 0, 0, 0, 0)) != 0)
486 return (ret);
487 }
488 return (0);
489 }
490
491 /*
492 * __bam_ca_split --
493 * Adjust the cursors when splitting a page.
494 *
495 * PUBLIC: int __bam_ca_split __P((DBC *,
496 * PUBLIC: db_pgno_t, db_pgno_t, db_pgno_t, u_int32_t, int));
497 */
498 int
499 __bam_ca_split(my_dbc, ppgno, lpgno, rpgno, split_indx, cleft)
500 DBC *my_dbc;
501 db_pgno_t ppgno, lpgno, rpgno;
502 u_int32_t split_indx;
503 int cleft;
504 {
505 DB *dbp, *ldbp;
506 DBC *dbc;
507 DBC_INTERNAL *cp;
508 DB_LSN lsn;
509 DB_TXN *my_txn;
510 ENV *env;
511 int found, ret;
512
513 dbp = my_dbc->dbp;
514 env = dbp->env;
515 my_txn = IS_SUBTRANSACTION(my_dbc->txn) ? my_dbc->txn : NULL;
516
517 /*
518 * Adjust the cursors. See the comment in __bam_ca_delete().
519 *
520 * If splitting the page that a cursor was on, the cursor has to be
521 * adjusted to point to the same record as before the split. Most
522 * of the time we don't adjust pointers to the left page, because
523 * we're going to copy its contents back over the original page. If
524 * the cursor is on the right page, it is decremented by the number of
525 * records split to the left page.
526 */
527 MUTEX_LOCK(env, env->mtx_dblist);
528 FIND_FIRST_DB_MATCH(env, dbp, ldbp);
529 for (found = 0;
530 ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
531 ldbp = TAILQ_NEXT(ldbp, dblistlinks)) {
532 MUTEX_LOCK(env, dbp->mutex);
533 TAILQ_FOREACH(dbc, &ldbp->active_queue, links) {
534 if (dbc->dbtype == DB_RECNO)
535 continue;
536 cp = dbc->internal;
537 if (cp->pgno == ppgno &&
538 !MVCC_SKIP_CURADJ(dbc, ppgno)) {
539 /* [#8032]
540 DB_ASSERT(env, !STD_LOCKING(dbc) ||
541 cp->lock_mode != DB_LOCK_NG);
542 */
543 if (my_txn != NULL && dbc->txn != my_txn)
544 found = 1;
545 if (cp->indx < split_indx) {
546 if (cleft)
547 cp->pgno = lpgno;
548 } else {
549 cp->pgno = rpgno;
550 cp->indx -= split_indx;
551 }
552 }
553 }
554 MUTEX_UNLOCK(env, dbp->mutex);
555 }
556 MUTEX_UNLOCK(env, env->mtx_dblist);
557
558 if (found != 0 && DBC_LOGGING(my_dbc)) {
559 if ((ret = __bam_curadj_log(dbp,
560 my_dbc->txn, &lsn, 0, DB_CA_SPLIT, ppgno, rpgno,
561 cleft ? lpgno : PGNO_INVALID, 0, split_indx, 0)) != 0)
562 return (ret);
563 }
564
565 return (0);
566 }
567
568 /*
569 * __bam_ca_undosplit --
570 * Adjust the cursors when undoing a split of a page.
571 * If we grew a level we will execute this for both the
572 * left and the right pages.
573 * Called only during undo processing.
574 *
575 * PUBLIC: int __bam_ca_undosplit __P((DB *,
576 * PUBLIC: db_pgno_t, db_pgno_t, db_pgno_t, u_int32_t));
577 */
578 int
579 __bam_ca_undosplit(dbp, frompgno, topgno, lpgno, split_indx)
580 DB *dbp;
581 db_pgno_t frompgno, topgno, lpgno;
582 u_int32_t split_indx;
583 {
584 DB *ldbp;
585 DBC *dbc;
586 DBC_INTERNAL *cp;
587 ENV *env;
588
589 env = dbp->env;
590
591 /*
592 * Adjust the cursors. See the comment in __bam_ca_delete().
593 *
594 * When backing out a split, we move the cursor back
595 * to the original offset and bump it by the split_indx.
596 */
597 MUTEX_LOCK(env, env->mtx_dblist);
598 FIND_FIRST_DB_MATCH(env, dbp, ldbp);
599 for (;
600 ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
601 ldbp = TAILQ_NEXT(ldbp, dblistlinks)) {
602 MUTEX_LOCK(env, dbp->mutex);
603 TAILQ_FOREACH(dbc, &ldbp->active_queue, links) {
604 if (dbc->dbtype == DB_RECNO)
605 continue;
606 cp = dbc->internal;
607 if (cp->pgno == topgno &&
608 !MVCC_SKIP_CURADJ(dbc, topgno)) {
609 cp->pgno = frompgno;
610 cp->indx += split_indx;
611 } else if (cp->pgno == lpgno &&
612 !MVCC_SKIP_CURADJ(dbc, lpgno))
613 cp->pgno = frompgno;
614 }
615 MUTEX_UNLOCK(env, dbp->mutex);
616 }
617 MUTEX_UNLOCK(env, env->mtx_dblist);
618
619 return (0);
620 }

Properties

Name Value
svn:eol-style native
svn:mime-type text/plain

webmaster AT resiprocate DOT org
ViewVC Help
Powered by ViewVC 1.1.27