/[resiprocate]/main/contrib/db/btree/bt_curadj.c
ViewVC logotype

Annotation of /main/contrib/db/btree/bt_curadj.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 5295 - (hide annotations) (download)
Mon Aug 22 00:30:05 2005 UTC (14 years, 3 months ago) by jason
File MIME type: text/plain
File size: 15624 byte(s)
merged 5270:HEAD from b-directory-reorg
1 fluffy 4437 /*-
2     * See the file LICENSE for redistribution information.
3     *
4     * Copyright (c) 1996-2004
5     * Sleepycat Software. All rights reserved.
6     *
7     * $Id: bt_curadj.c,v 11.37 2004/03/13 14:11:33 bostic Exp $
8     */
9    
10     #include "db_config.h"
11    
12     #ifndef NO_SYSTEM_INCLUDES
13     #include <sys/types.h>
14     #endif
15    
16     #include "db_int.h"
17     #include "dbinc/db_page.h"
18     #include "dbinc/btree.h"
19    
20     static int __bam_opd_cursor __P((DB *, DBC *, db_pgno_t, u_int32_t, u_int32_t));
21    
22     /*
23     * Cursor adjustments are logged if they are for subtransactions. This is
24     * because it's possible for a subtransaction to adjust cursors which will
25     * still be active after the subtransaction aborts, and so which must be
26     * restored to their previous locations. Cursors that can be both affected
27     * by our cursor adjustments and active after our transaction aborts can
28     * only be found in our parent transaction -- cursors in other transactions,
29     * including other child transactions of our parent, must have conflicting
30     * locker IDs, and so cannot be affected by adjustments in this transaction.
31     */
32    
33     /*
34     * __bam_ca_delete --
35     * Update the cursors when items are deleted and when already deleted
36     * items are overwritten. Return the number of relevant cursors found.
37     *
38     * PUBLIC: int __bam_ca_delete __P((DB *, db_pgno_t, u_int32_t, int));
39     */
40     int
41     __bam_ca_delete(dbp, pgno, indx, delete)
42     DB *dbp;
43     db_pgno_t pgno;
44     u_int32_t indx;
45     int delete;
46     {
47     BTREE_CURSOR *cp;
48     DB *ldbp;
49     DB_ENV *dbenv;
50     DBC *dbc;
51     int count; /* !!!: Has to contain max number of cursors. */
52    
53     dbenv = dbp->dbenv;
54    
55     /*
56     * Adjust the cursors. We have the page write locked, so the
57     * only other cursors that can be pointing at a page are
58     * those in the same thread of control. Unfortunately, we don't
59     * know that they're using the same DB handle, so traverse
60     * all matching DB handles in the same DB_ENV, then all cursors
61     * on each matching DB handle.
62     *
63     * Each cursor is single-threaded, so we only need to lock the
64     * list of DBs and then the list of cursors in each DB.
65     */
66     MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
67     for (count = 0, ldbp = __dblist_get(dbenv, dbp->adj_fileid);
68     ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
69     ldbp = LIST_NEXT(ldbp, dblistlinks)) {
70     MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
71     for (dbc = TAILQ_FIRST(&ldbp->active_queue);
72     dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
73     cp = (BTREE_CURSOR *)dbc->internal;
74     if (cp->pgno == pgno && cp->indx == indx) {
75     /*
76     * [#8032] This assert is checking
77     * for possible race conditions where we
78     * hold a cursor position without a lock.
79     * Unfortunately, there are paths in the
80     * Btree code that do not satisfy these
81     * conditions. None of them are known to
82     * be a problem, but this assert should
83     * be re-activated when the Btree stack
84     * code is re-written.
85     DB_ASSERT(!STD_LOCKING(dbc) ||
86     cp->lock_mode != DB_LOCK_NG);
87     */
88     if (delete)
89     F_SET(cp, C_DELETED);
90     else
91     F_CLR(cp, C_DELETED);
92     ++count;
93     }
94     }
95     MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
96     }
97     MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
98    
99     return (count);
100     }
101    
102     /*
103     * __ram_ca_delete --
104     * Return the number of relevant cursors.
105     *
106     * PUBLIC: int __ram_ca_delete __P((DB *, db_pgno_t));
107     */
108     int
109     __ram_ca_delete(dbp, root_pgno)
110     DB *dbp;
111     db_pgno_t root_pgno;
112     {
113     DB *ldbp;
114     DBC *dbc;
115     DB_ENV *dbenv;
116     int found;
117    
118     found = 0;
119     dbenv = dbp->dbenv;
120    
121     /*
122     * Review the cursors. See the comment in __bam_ca_delete().
123     */
124     MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
125     for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
126     found == 0 && ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
127     ldbp = LIST_NEXT(ldbp, dblistlinks)) {
128     MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
129     for (dbc = TAILQ_FIRST(&ldbp->active_queue);
130     found == 0 && dbc != NULL; dbc = TAILQ_NEXT(dbc, links))
131     if (dbc->internal->root == root_pgno)
132     found = 1;
133     MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
134     }
135     MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
136     return (found);
137     }
138    
139     /*
140     * __bam_ca_di --
141     * Adjust the cursors during a delete or insert.
142     *
143     * PUBLIC: int __bam_ca_di __P((DBC *, db_pgno_t, u_int32_t, int));
144     */
145     int
146     __bam_ca_di(my_dbc, pgno, indx, adjust)
147     DBC *my_dbc;
148     db_pgno_t pgno;
149     u_int32_t indx;
150     int adjust;
151     {
152     DB *dbp, *ldbp;
153     DB_ENV *dbenv;
154     DB_LSN lsn;
155     DB_TXN *my_txn;
156     DBC *dbc;
157     DBC_INTERNAL *cp;
158     int found, ret;
159    
160     dbp = my_dbc->dbp;
161     dbenv = dbp->dbenv;
162    
163     my_txn = IS_SUBTRANSACTION(my_dbc->txn) ? my_dbc->txn : NULL;
164    
165     /*
166     * Adjust the cursors. See the comment in __bam_ca_delete().
167     */
168     found = 0;
169     MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
170     for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
171     ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
172     ldbp = LIST_NEXT(ldbp, dblistlinks)) {
173     MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
174     for (dbc = TAILQ_FIRST(&ldbp->active_queue);
175     dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
176     if (dbc->dbtype == DB_RECNO)
177     continue;
178     cp = dbc->internal;
179     if (cp->pgno == pgno && cp->indx >= indx) {
180     /* Cursor indices should never be negative. */
181     DB_ASSERT(cp->indx != 0 || adjust > 0);
182     /* [#8032]
183     DB_ASSERT(!STD_LOCKING(dbc) ||
184     cp->lock_mode != DB_LOCK_NG);
185     */
186     cp->indx += adjust;
187     if (my_txn != NULL && dbc->txn != my_txn)
188     found = 1;
189     }
190     }
191     MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
192     }
193     MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
194    
195     if (found != 0 && DBC_LOGGING(my_dbc)) {
196     if ((ret = __bam_curadj_log(dbp, my_dbc->txn, &lsn, 0,
197     DB_CA_DI, pgno, 0, 0, (u_int32_t)adjust, indx, 0)) != 0)
198     return (ret);
199     }
200    
201     return (0);
202     }
203    
204     /*
205     * __bam_opd_cursor -- create a new opd cursor.
206     */
207     static int
208     __bam_opd_cursor(dbp, dbc, first, tpgno, ti)
209     DB *dbp;
210     DBC *dbc;
211     db_pgno_t tpgno;
212     u_int32_t first, ti;
213     {
214     BTREE_CURSOR *cp, *orig_cp;
215     DBC *dbc_nopd;
216     int ret;
217    
218     orig_cp = (BTREE_CURSOR *)dbc->internal;
219     dbc_nopd = NULL;
220    
221     /*
222     * Allocate a new cursor and create the stack. If duplicates
223     * are sorted, we've just created an off-page duplicate Btree.
224     * If duplicates aren't sorted, we've just created a Recno tree.
225     *
226     * Note that in order to get here at all, there shouldn't be
227     * an old off-page dup cursor--to augment the checking db_c_newopd
228     * will do, assert this.
229     */
230     DB_ASSERT(orig_cp->opd == NULL);
231     if ((ret = __db_c_newopd(dbc, tpgno, orig_cp->opd, &dbc_nopd)) != 0)
232     return (ret);
233    
234     cp = (BTREE_CURSOR *)dbc_nopd->internal;
235     cp->pgno = tpgno;
236     cp->indx = ti;
237    
238     if (dbp->dup_compare == NULL) {
239     /*
240     * Converting to off-page Recno trees is tricky. The
241     * record number for the cursor is the index + 1 (to
242     * convert to 1-based record numbers).
243     */
244     cp->recno = ti + 1;
245     }
246    
247     /*
248     * Transfer the deleted flag from the top-level cursor to the
249     * created one.
250     */
251     if (F_ISSET(orig_cp, C_DELETED)) {
252     F_SET(cp, C_DELETED);
253     F_CLR(orig_cp, C_DELETED);
254     }
255    
256     /* Stack the cursors and reset the initial cursor's index. */
257     orig_cp->opd = dbc_nopd;
258     orig_cp->indx = first;
259     return (0);
260     }
261    
262     /*
263     * __bam_ca_dup --
264     * Adjust the cursors when moving items from a leaf page to a duplicates
265     * page.
266     *
267     * PUBLIC: int __bam_ca_dup __P((DBC *,
268     * PUBLIC: u_int32_t, db_pgno_t, u_int32_t, db_pgno_t, u_int32_t));
269     */
270     int
271     __bam_ca_dup(my_dbc, first, fpgno, fi, tpgno, ti)
272     DBC *my_dbc;
273     db_pgno_t fpgno, tpgno;
274     u_int32_t first, fi, ti;
275     {
276     BTREE_CURSOR *orig_cp;
277     DB *dbp, *ldbp;
278     DBC *dbc;
279     DB_ENV *dbenv;
280     DB_LSN lsn;
281     DB_TXN *my_txn;
282     int found, ret;
283    
284     dbp = my_dbc->dbp;
285     dbenv = dbp->dbenv;
286     my_txn = IS_SUBTRANSACTION(my_dbc->txn) ? my_dbc->txn : NULL;
287    
288     /*
289     * Adjust the cursors. See the comment in __bam_ca_delete().
290     */
291     found = 0;
292     MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
293     for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
294     ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
295     ldbp = LIST_NEXT(ldbp, dblistlinks)) {
296     loop: MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
297     for (dbc = TAILQ_FIRST(&ldbp->active_queue);
298     dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
299     /* Find cursors pointing to this record. */
300     orig_cp = (BTREE_CURSOR *)dbc->internal;
301     if (orig_cp->pgno != fpgno || orig_cp->indx != fi)
302     continue;
303    
304     /*
305     * Since we rescan the list see if this is already
306     * converted.
307     */
308     if (orig_cp->opd != NULL)
309     continue;
310    
311     MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
312     /* [#8032]
313     DB_ASSERT(!STD_LOCKING(dbc) ||
314     orig_cp->lock_mode != DB_LOCK_NG);
315     */
316     if ((ret = __bam_opd_cursor(dbp,
317     dbc, first, tpgno, ti)) !=0)
318     return (ret);
319     if (my_txn != NULL && dbc->txn != my_txn)
320     found = 1;
321     /* We released the mutex to get a cursor, start over. */
322     goto loop;
323     }
324     MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
325     }
326     MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
327    
328     if (found != 0 && DBC_LOGGING(my_dbc)) {
329     if ((ret = __bam_curadj_log(dbp, my_dbc->txn,
330     &lsn, 0, DB_CA_DUP, fpgno, tpgno, 0, first, fi, ti)) != 0)
331     return (ret);
332     }
333     return (0);
334     }
335    
336     /*
337     * __bam_ca_undodup --
338     * Adjust the cursors when returning items to a leaf page
339     * from a duplicate page.
340     * Called only during undo processing.
341     *
342     * PUBLIC: int __bam_ca_undodup __P((DB *,
343     * PUBLIC: u_int32_t, db_pgno_t, u_int32_t, u_int32_t));
344     */
345     int
346     __bam_ca_undodup(dbp, first, fpgno, fi, ti)
347     DB *dbp;
348     db_pgno_t fpgno;
349     u_int32_t first, fi, ti;
350     {
351     BTREE_CURSOR *orig_cp;
352     DB *ldbp;
353     DBC *dbc;
354     DB_ENV *dbenv;
355     int ret;
356    
357     dbenv = dbp->dbenv;
358    
359     /*
360     * Adjust the cursors. See the comment in __bam_ca_delete().
361     */
362     MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
363     for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
364     ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
365     ldbp = LIST_NEXT(ldbp, dblistlinks)) {
366     loop: MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
367     for (dbc = TAILQ_FIRST(&ldbp->active_queue);
368     dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
369     orig_cp = (BTREE_CURSOR *)dbc->internal;
370    
371     /*
372     * A note on the orig_cp->opd != NULL requirement here:
373     * it's possible that there's a cursor that refers to
374     * the same duplicate set, but which has no opd cursor,
375     * because it refers to a different item and we took
376     * care of it while processing a previous record.
377     */
378     if (orig_cp->pgno != fpgno ||
379     orig_cp->indx != first ||
380     orig_cp->opd == NULL ||
381     ((BTREE_CURSOR *)orig_cp->opd->internal)->indx
382     != ti)
383     continue;
384     MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
385     if ((ret = __db_c_close(orig_cp->opd)) != 0)
386     return (ret);
387     orig_cp->opd = NULL;
388     orig_cp->indx = fi;
389     /*
390     * We released the mutex to free a cursor,
391     * start over.
392     */
393     goto loop;
394     }
395     MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
396     }
397     MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
398    
399     return (0);
400     }
401    
402     /*
403     * __bam_ca_rsplit --
404     * Adjust the cursors when doing reverse splits.
405     *
406     * PUBLIC: int __bam_ca_rsplit __P((DBC *, db_pgno_t, db_pgno_t));
407     */
408     int
409     __bam_ca_rsplit(my_dbc, fpgno, tpgno)
410     DBC* my_dbc;
411     db_pgno_t fpgno, tpgno;
412     {
413     DB *dbp, *ldbp;
414     DBC *dbc;
415     DB_ENV *dbenv;
416     DB_LSN lsn;
417     DB_TXN *my_txn;
418     int found, ret;
419    
420     dbp = my_dbc->dbp;
421     dbenv = dbp->dbenv;
422     my_txn = IS_SUBTRANSACTION(my_dbc->txn) ? my_dbc->txn : NULL;
423    
424     /*
425     * Adjust the cursors. See the comment in __bam_ca_delete().
426     */
427     found = 0;
428     MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
429     for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
430     ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
431     ldbp = LIST_NEXT(ldbp, dblistlinks)) {
432     MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
433     for (dbc = TAILQ_FIRST(&ldbp->active_queue);
434     dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
435     if (dbc->dbtype == DB_RECNO)
436     continue;
437     if (dbc->internal->pgno == fpgno) {
438     dbc->internal->pgno = tpgno;
439     /* [#8032]
440     DB_ASSERT(!STD_LOCKING(dbc) ||
441     dbc->internal->lock_mode != DB_LOCK_NG);
442     */
443     if (my_txn != NULL && dbc->txn != my_txn)
444     found = 1;
445     }
446     }
447     MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
448     }
449     MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
450    
451     if (found != 0 && DBC_LOGGING(my_dbc)) {
452     if ((ret = __bam_curadj_log(dbp, my_dbc->txn,
453     &lsn, 0, DB_CA_RSPLIT, fpgno, tpgno, 0, 0, 0, 0)) != 0)
454     return (ret);
455     }
456     return (0);
457     }
458    
459     /*
460     * __bam_ca_split --
461     * Adjust the cursors when splitting a page.
462     *
463     * PUBLIC: int __bam_ca_split __P((DBC *,
464     * PUBLIC: db_pgno_t, db_pgno_t, db_pgno_t, u_int32_t, int));
465     */
466     int
467     __bam_ca_split(my_dbc, ppgno, lpgno, rpgno, split_indx, cleft)
468     DBC *my_dbc;
469     db_pgno_t ppgno, lpgno, rpgno;
470     u_int32_t split_indx;
471     int cleft;
472     {
473     DB *dbp, *ldbp;
474     DBC *dbc;
475     DBC_INTERNAL *cp;
476     DB_ENV *dbenv;
477     DB_LSN lsn;
478     DB_TXN *my_txn;
479     int found, ret;
480    
481     dbp = my_dbc->dbp;
482     dbenv = dbp->dbenv;
483     my_txn = IS_SUBTRANSACTION(my_dbc->txn) ? my_dbc->txn : NULL;
484    
485     /*
486     * Adjust the cursors. See the comment in __bam_ca_delete().
487     *
488     * If splitting the page that a cursor was on, the cursor has to be
489     * adjusted to point to the same record as before the split. Most
490     * of the time we don't adjust pointers to the left page, because
491     * we're going to copy its contents back over the original page. If
492     * the cursor is on the right page, it is decremented by the number of
493     * records split to the left page.
494     */
495     found = 0;
496     MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
497     for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
498     ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
499     ldbp = LIST_NEXT(ldbp, dblistlinks)) {
500     MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
501     for (dbc = TAILQ_FIRST(&ldbp->active_queue);
502     dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
503     if (dbc->dbtype == DB_RECNO)
504     continue;
505     cp = dbc->internal;
506     if (cp->pgno == ppgno) {
507     /* [#8032]
508     DB_ASSERT(!STD_LOCKING(dbc) ||
509     cp->lock_mode != DB_LOCK_NG);
510     */
511     if (my_txn != NULL && dbc->txn != my_txn)
512     found = 1;
513     if (cp->indx < split_indx) {
514     if (cleft)
515     cp->pgno = lpgno;
516     } else {
517     cp->pgno = rpgno;
518     cp->indx -= split_indx;
519     }
520     }
521     }
522     MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
523     }
524     MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
525    
526     if (found != 0 && DBC_LOGGING(my_dbc)) {
527     if ((ret = __bam_curadj_log(dbp,
528     my_dbc->txn, &lsn, 0, DB_CA_SPLIT, ppgno, rpgno,
529     cleft ? lpgno : PGNO_INVALID, 0, split_indx, 0)) != 0)
530     return (ret);
531     }
532    
533     return (0);
534     }
535    
536     /*
537     * __bam_ca_undosplit --
538     * Adjust the cursors when undoing a split of a page.
539     * If we grew a level we will execute this for both the
540     * left and the right pages.
541     * Called only during undo processing.
542     *
543     * PUBLIC: void __bam_ca_undosplit __P((DB *,
544     * PUBLIC: db_pgno_t, db_pgno_t, db_pgno_t, u_int32_t));
545     */
546     void
547     __bam_ca_undosplit(dbp, frompgno, topgno, lpgno, split_indx)
548     DB *dbp;
549     db_pgno_t frompgno, topgno, lpgno;
550     u_int32_t split_indx;
551     {
552     DB *ldbp;
553     DBC *dbc;
554     DB_ENV *dbenv;
555     DBC_INTERNAL *cp;
556    
557     dbenv = dbp->dbenv;
558    
559     /*
560     * Adjust the cursors. See the comment in __bam_ca_delete().
561     *
562     * When backing out a split, we move the cursor back
563     * to the original offset and bump it by the split_indx.
564     */
565     MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
566     for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
567     ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
568     ldbp = LIST_NEXT(ldbp, dblistlinks)) {
569     MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
570     for (dbc = TAILQ_FIRST(&ldbp->active_queue);
571     dbc != NULL; dbc = TAILQ_NEXT(dbc, links)) {
572     if (dbc->dbtype == DB_RECNO)
573     continue;
574     cp = dbc->internal;
575     if (cp->pgno == topgno) {
576     cp->pgno = frompgno;
577     cp->indx += split_indx;
578     } else if (cp->pgno == lpgno)
579     cp->pgno = frompgno;
580     }
581     MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
582     }
583     MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
584     }

webmaster AT resiprocate DOT org
ViewVC Help
Powered by ViewVC 1.1.27