/[resiprocate]/main/contrib/db/btree/bt_recno.c
ViewVC logotype

Annotation of /main/contrib/db/btree/bt_recno.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 9632 - (hide annotations) (download)
Wed May 2 19:24:54 2012 UTC (7 years, 8 months ago) by sgodin
File MIME type: text/plain
File size: 36325 byte(s)
-update BerkeleyDb to 4.8 in contrib
-added MaxMind GeoIP library to contrib
-added MySQL client connector library to contrib
1 fluffy 4437 /*-
2     * See the file LICENSE for redistribution information.
3     *
4 sgodin 9632 * Copyright (c) 1997-2009 Oracle. All rights reserved.
5 fluffy 4437 *
6 sgodin 9632 * $Id$
7 fluffy 4437 */
8    
9     #include "db_config.h"
10    
11     #include "db_int.h"
12     #include "dbinc/db_page.h"
13     #include "dbinc/btree.h"
14     #include "dbinc/lock.h"
15 sgodin 9632 #include "dbinc/mp.h"
16 fluffy 4437
17     static int __ram_add __P((DBC *, db_recno_t *, DBT *, u_int32_t, u_int32_t));
18     static int __ram_source __P((DB *));
19     static int __ram_sread __P((DBC *, db_recno_t));
20     static int __ram_update __P((DBC *, db_recno_t, int));
21    
22     /*
23     * In recno, there are two meanings to the on-page "deleted" flag. If we're
24     * re-numbering records, it means the record was implicitly created. We skip
25     * over implicitly created records if doing a cursor "next" or "prev", and
26     * return DB_KEYEMPTY if they're explicitly requested.. If not re-numbering
27     * records, it means that the record was implicitly created, or was deleted.
28     * We skip over implicitly created or deleted records if doing a cursor "next"
29     * or "prev", and return DB_KEYEMPTY if they're explicitly requested.
30     *
31     * If we're re-numbering records, then we have to detect in the cursor that
32     * a record was deleted, and adjust the cursor as necessary on the next get.
33     * If we're not re-numbering records, then we can detect that a record has
34     * been deleted by looking at the actual on-page record, so we completely
35     * ignore the cursor's delete flag. This is different from the B+tree code.
36     * It also maintains whether the cursor references a deleted record in the
37     * cursor, and it doesn't always check the on-page value.
38     */
39     #define CD_SET(cp) { \
40     if (F_ISSET(cp, C_RENUMBER)) \
41     F_SET(cp, C_DELETED); \
42     }
43     #define CD_CLR(cp) { \
44     if (F_ISSET(cp, C_RENUMBER)) { \
45     F_CLR(cp, C_DELETED); \
46     cp->order = INVALID_ORDER; \
47     } \
48     }
49     #define CD_ISSET(cp) \
50     (F_ISSET(cp, C_RENUMBER) && F_ISSET(cp, C_DELETED) ? 1 : 0)
51    
52     /*
53     * Macros for comparing the ordering of two cursors.
54     * cp1 comes before cp2 iff one of the following holds:
55     * cp1's recno is less than cp2's recno
56     * recnos are equal, both deleted, and cp1's order is less than cp2's
57     * recnos are equal, cp1 deleted, and cp2 not deleted
58     */
59     #define C_LESSTHAN(cp1, cp2) \
60     (((cp1)->recno < (cp2)->recno) || \
61     (((cp1)->recno == (cp2)->recno) && \
62     ((CD_ISSET((cp1)) && CD_ISSET((cp2)) && (cp1)->order < (cp2)->order) || \
63     (CD_ISSET((cp1)) && !CD_ISSET((cp2))))))
64    
65     /*
66     * cp1 is equal to cp2 iff their recnos and delete flags are identical,
67     * and if the delete flag is set their orders are also identical.
68     */
69     #define C_EQUAL(cp1, cp2) \
70     ((cp1)->recno == (cp2)->recno && CD_ISSET((cp1)) == CD_ISSET((cp2)) && \
71     (!CD_ISSET((cp1)) || (cp1)->order == (cp2)->order))
72    
73     /*
74     * Do we need to log the current cursor adjustment?
75     */
76     #define CURADJ_LOG(dbc) \
77     (DBC_LOGGING((dbc)) && (dbc)->txn != NULL && (dbc)->txn->parent != NULL)
78    
79     /*
80     * After a search, copy the found page into the cursor, discarding any
81     * currently held lock.
82     */
83     #define STACK_TO_CURSOR(cp, ret) { \
84     int __t_ret; \
85     (cp)->page = (cp)->csp->page; \
86     (cp)->pgno = (cp)->csp->page->pgno; \
87     (cp)->indx = (cp)->csp->indx; \
88     if ((__t_ret = __TLPUT(dbc, (cp)->lock)) != 0 && (ret) == 0) \
89     ret = __t_ret; \
90     (cp)->lock = (cp)->csp->lock; \
91     (cp)->lock_mode = (cp)->csp->lock_mode; \
92     }
93    
94     /*
95     * __ram_open --
96     * Recno open function.
97     *
98 sgodin 9632 * PUBLIC: int __ram_open __P((DB *, DB_THREAD_INFO *,
99 fluffy 4437 * PUBLIC: DB_TXN *, const char *, db_pgno_t, u_int32_t));
100     */
101     int
102 sgodin 9632 __ram_open(dbp, ip, txn, name, base_pgno, flags)
103 fluffy 4437 DB *dbp;
104 sgodin 9632 DB_THREAD_INFO *ip;
105 fluffy 4437 DB_TXN *txn;
106     const char *name;
107     db_pgno_t base_pgno;
108     u_int32_t flags;
109     {
110     BTREE *t;
111     DBC *dbc;
112     int ret, t_ret;
113    
114     COMPQUIET(name, NULL);
115     t = dbp->bt_internal;
116    
117     /* Start up the tree. */
118 sgodin 9632 if ((ret = __bam_read_root(dbp, ip, txn, base_pgno, flags)) != 0)
119 fluffy 4437 return (ret);
120    
121     /*
122     * If the user specified a source tree, open it and map it in.
123     *
124     * !!!
125     * We don't complain if the user specified transactions or threads.
126     * It's possible to make it work, but you'd better know what you're
127     * doing!
128     */
129     if (t->re_source != NULL && (ret = __ram_source(dbp)) != 0)
130     return (ret);
131    
132     /* If we're snapshotting an underlying source file, do it now. */
133     if (F_ISSET(dbp, DB_AM_SNAPSHOT)) {
134     /* Allocate a cursor. */
135 sgodin 9632 if ((ret = __db_cursor(dbp, ip, NULL, &dbc, 0)) != 0)
136 fluffy 4437 return (ret);
137    
138     /* Do the snapshot. */
139     if ((ret = __ram_update(dbc,
140     DB_MAX_RECORDS, 0)) != 0 && ret == DB_NOTFOUND)
141     ret = 0;
142    
143     /* Discard the cursor. */
144 sgodin 9632 if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0)
145 fluffy 4437 ret = t_ret;
146     }
147    
148     return (ret);
149     }
150    
151     /*
152     * __ram_append --
153     * Recno append function.
154     *
155     * PUBLIC: int __ram_append __P((DBC *, DBT *, DBT *));
156     */
157     int
158     __ram_append(dbc, key, data)
159     DBC *dbc;
160     DBT *key, *data;
161     {
162     BTREE_CURSOR *cp;
163     int ret;
164    
165     cp = (BTREE_CURSOR *)dbc->internal;
166    
167     /*
168     * Make sure we've read in all of the backing source file. If
169     * we found the record or it simply didn't exist, add the
170     * user's record.
171     */
172     ret = __ram_update(dbc, DB_MAX_RECORDS, 0);
173     if (ret == 0 || ret == DB_NOTFOUND)
174     ret = __ram_add(dbc, &cp->recno, data, DB_APPEND, 0);
175    
176     /* Return the record number. */
177 sgodin 9632 if (ret == 0 && key != NULL)
178     ret = __db_retcopy(dbc->env, key, &cp->recno,
179 fluffy 4437 sizeof(cp->recno), &dbc->rkey->data, &dbc->rkey->ulen);
180    
181     return (ret);
182     }
183    
184     /*
185 sgodin 9632 * __ramc_del --
186     * Recno DBC->del function.
187 fluffy 4437 *
188 sgodin 9632 * PUBLIC: int __ramc_del __P((DBC *, u_int32_t));
189 fluffy 4437 */
190     int
191 sgodin 9632 __ramc_del(dbc, flags)
192 fluffy 4437 DBC *dbc;
193 sgodin 9632 u_int32_t flags;
194 fluffy 4437 {
195     BKEYDATA bk;
196     BTREE *t;
197     BTREE_CURSOR *cp;
198     DB *dbp;
199 sgodin 9632 DBT hdr, data;
200     DB_LOCK next_lock, prev_lock;
201 fluffy 4437 DB_LSN lsn;
202 sgodin 9632 db_pgno_t npgno, ppgno, save_npgno, save_ppgno;
203     int exact, nc, ret, stack, t_ret;
204 fluffy 4437
205     dbp = dbc->dbp;
206     cp = (BTREE_CURSOR *)dbc->internal;
207     t = dbp->bt_internal;
208     stack = 0;
209 sgodin 9632 save_npgno = save_ppgno = PGNO_INVALID;
210     LOCK_INIT(next_lock);
211     LOCK_INIT(prev_lock);
212     COMPQUIET(flags, 0);
213 fluffy 4437
214     /*
215     * The semantics of cursors during delete are as follows: in
216     * non-renumbering recnos, records are replaced with a marker
217     * containing a delete flag. If the record referenced by this cursor
218     * has already been deleted, we will detect that as part of the delete
219     * operation, and fail.
220     *
221     * In renumbering recnos, cursors which represent deleted items
222     * are flagged with the C_DELETED flag, and it is an error to
223     * call c_del a second time without an intervening cursor motion.
224     */
225     if (CD_ISSET(cp))
226     return (DB_KEYEMPTY);
227    
228     /* Search the tree for the key; delete only deletes exact matches. */
229 sgodin 9632 retry: if ((ret = __bam_rsearch(dbc, &cp->recno, SR_DELETE, 1, &exact)) != 0)
230 fluffy 4437 goto err;
231     if (!exact) {
232     ret = DB_NOTFOUND;
233     goto err;
234     }
235     stack = 1;
236    
237     /* Copy the page into the cursor. */
238     STACK_TO_CURSOR(cp, ret);
239     if (ret != 0)
240     goto err;
241    
242     /*
243     * If re-numbering records, the on-page deleted flag can only mean
244     * that this record was implicitly created. Applications aren't
245     * permitted to delete records they never created, return an error.
246     *
247     * If not re-numbering records, the on-page deleted flag means that
248     * this record was implicitly created, or, was deleted at some time.
249     * The former is an error because applications aren't permitted to
250     * delete records they never created, the latter is an error because
251     * if the record was "deleted", we could never have found it.
252     */
253     if (B_DISSET(GET_BKEYDATA(dbp, cp->page, cp->indx)->type)) {
254     ret = DB_KEYEMPTY;
255     goto err;
256     }
257    
258     if (F_ISSET(cp, C_RENUMBER)) {
259 sgodin 9632 /* If we are going to drop the page, lock its neighbors. */
260     if (STD_LOCKING(dbc) &&
261     NUM_ENT(cp->page) == 1 && PGNO(cp->page) != cp->root) {
262     if ((npgno = NEXT_PGNO(cp->page)) != PGNO_INVALID)
263     TRY_LOCK(dbc, npgno, save_npgno,
264     next_lock, DB_LOCK_WRITE, retry);
265     if (ret != 0)
266     goto err;
267     if ((ppgno = PREV_PGNO(cp->page)) != PGNO_INVALID)
268     TRY_LOCK(dbc, ppgno, save_ppgno,
269     prev_lock, DB_LOCK_WRITE, retry);
270     if (ret != 0)
271     goto err;
272     }
273 fluffy 4437 /* Delete the item, adjust the counts, adjust the cursors. */
274     if ((ret = __bam_ditem(dbc, cp->page, cp->indx)) != 0)
275     goto err;
276     if ((ret = __bam_adjust(dbc, -1)) != 0)
277     goto err;
278 sgodin 9632 if ((ret = __ram_ca(dbc, CA_DELETE, &nc)) != 0)
279     goto err;
280     if (nc > 0 &&
281 fluffy 4437 CURADJ_LOG(dbc) && (ret = __bam_rcuradj_log(dbp, dbc->txn,
282     &lsn, 0, CA_DELETE, cp->root, cp->recno, cp->order)) != 0)
283     goto err;
284    
285     /*
286     * If the page is empty, delete it.
287     *
288     * We never delete a root page. First, root pages of primary
289     * databases never go away, recno or otherwise. However, if
290     * it's the root page of an off-page duplicates database, then
291     * it can be deleted. We don't delete it here because we have
292     * no way of telling the primary database page holder (e.g.,
293     * the hash access method) that its page element should cleaned
294     * up because the underlying tree is gone. So, we keep the page
295     * around until the last cursor referencing the empty tree is
296     * are closed, and then clean it up.
297     */
298     if (NUM_ENT(cp->page) == 0 && PGNO(cp->page) != cp->root) {
299     /*
300     * We want to delete a single item out of the last page
301     * that we're not deleting.
302     */
303 sgodin 9632 ret = __bam_dpages(dbc, 0, BTD_RELINK);
304 fluffy 4437
305     /*
306     * Regardless of the return from __bam_dpages, it will
307     * discard our stack and pinned page.
308     */
309     stack = 0;
310     cp->page = NULL;
311 sgodin 9632 LOCK_INIT(cp->lock);
312     cp->lock_mode = DB_LOCK_NG;
313 fluffy 4437 }
314     } else {
315     /* Use a delete/put pair to replace the record with a marker. */
316     if ((ret = __bam_ditem(dbc, cp->page, cp->indx)) != 0)
317     goto err;
318    
319 sgodin 9632 B_TSET_DELETED(bk.type, B_KEYDATA);
320 fluffy 4437 bk.len = 0;
321 sgodin 9632 DB_INIT_DBT(hdr, &bk, SSZA(BKEYDATA, data));
322     DB_INIT_DBT(data, "", 0);
323 fluffy 4437 if ((ret = __db_pitem(dbc,
324     cp->page, cp->indx, BKEYDATA_SIZE(0), &hdr, &data)) != 0)
325     goto err;
326     }
327    
328     t->re_modified = 1;
329    
330     err: if (stack && (t_ret = __bam_stkrel(dbc, STK_CLRDBC)) != 0 && ret == 0)
331     ret = t_ret;
332 sgodin 9632 if ((t_ret = __TLPUT(dbc, next_lock)) != 0 && ret == 0)
333     ret = t_ret;
334     if ((t_ret = __TLPUT(dbc, prev_lock)) != 0 && ret == 0)
335     ret = t_ret;
336 fluffy 4437
337     return (ret);
338     }
339    
340     /*
341 sgodin 9632 * __ramc_get --
342     * Recno DBC->get function.
343 fluffy 4437 *
344 sgodin 9632 * PUBLIC: int __ramc_get
345 fluffy 4437 * PUBLIC: __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *));
346     */
347     int
348 sgodin 9632 __ramc_get(dbc, key, data, flags, pgnop)
349 fluffy 4437 DBC *dbc;
350     DBT *key, *data;
351     u_int32_t flags;
352     db_pgno_t *pgnop;
353     {
354     BTREE_CURSOR *cp;
355     DB *dbp;
356     int cmp, exact, ret;
357    
358     COMPQUIET(pgnop, NULL);
359    
360     dbp = dbc->dbp;
361     cp = (BTREE_CURSOR *)dbc->internal;
362    
363     LF_CLR(DB_MULTIPLE|DB_MULTIPLE_KEY);
364     retry: switch (flags) {
365     case DB_CURRENT:
366     /*
367     * If we're using mutable records and the deleted flag is
368     * set, the cursor is pointing at a nonexistent record;
369     * return an error.
370     */
371     if (CD_ISSET(cp))
372     return (DB_KEYEMPTY);
373     break;
374     case DB_NEXT_DUP:
375     /*
376     * If we're not in an off-page dup set, we know there's no
377     * next duplicate since recnos don't have them. If we
378     * are in an off-page dup set, the next item assuredly is
379     * a dup, so we set flags to DB_NEXT and keep going.
380     */
381     if (!F_ISSET(dbc, DBC_OPD))
382     return (DB_NOTFOUND);
383     /* FALLTHROUGH */
384     case DB_NEXT_NODUP:
385     /*
386     * Recno databases don't have duplicates, set flags to DB_NEXT
387     * and keep going.
388     */
389     /* FALLTHROUGH */
390     case DB_NEXT:
391     flags = DB_NEXT;
392     /*
393     * If record numbers are mutable: if we just deleted a record,
394     * we have to avoid incrementing the record number so that we
395     * return the right record by virtue of renumbering the tree.
396     */
397     if (CD_ISSET(cp)) {
398     /*
399     * Clear the flag, we've moved off the deleted record.
400     */
401     CD_CLR(cp);
402     break;
403     }
404    
405     if (cp->recno != RECNO_OOB) {
406     ++cp->recno;
407     break;
408     }
409     /* FALLTHROUGH */
410     case DB_FIRST:
411     flags = DB_NEXT;
412     cp->recno = 1;
413     break;
414 sgodin 9632 case DB_PREV_DUP:
415     /*
416     * If we're not in an off-page dup set, we know there's no
417     * previous duplicate since recnos don't have them. If we
418     * are in an off-page dup set, the previous item assuredly
419     * is a dup, so we set flags to DB_PREV and keep going.
420     */
421     if (!F_ISSET(dbc, DBC_OPD))
422     return (DB_NOTFOUND);
423     /* FALLTHROUGH */
424 fluffy 4437 case DB_PREV_NODUP:
425     /*
426     * Recno databases don't have duplicates, set flags to DB_PREV
427     * and keep going.
428     */
429     /* FALLTHROUGH */
430     case DB_PREV:
431     flags = DB_PREV;
432     if (cp->recno != RECNO_OOB) {
433     if (cp->recno == 1) {
434     ret = DB_NOTFOUND;
435     goto err;
436     }
437     --cp->recno;
438     break;
439     }
440     /* FALLTHROUGH */
441     case DB_LAST:
442     flags = DB_PREV;
443     if (((ret = __ram_update(dbc,
444     DB_MAX_RECORDS, 0)) != 0) && ret != DB_NOTFOUND)
445     goto err;
446     if ((ret = __bam_nrecs(dbc, &cp->recno)) != 0)
447     goto err;
448     if (cp->recno == 0) {
449     ret = DB_NOTFOUND;
450     goto err;
451     }
452     break;
453     case DB_GET_BOTHC:
454     /*
455     * If we're doing a join and these are offpage dups,
456     * we want to keep searching forward from after the
457     * current cursor position. Increment the recno by 1,
458     * then proceed as for a DB_SET.
459     *
460     * Otherwise, we know there are no additional matching
461     * data, as recnos don't have dups. return DB_NOTFOUND.
462     */
463     if (F_ISSET(dbc, DBC_OPD)) {
464     cp->recno++;
465     break;
466     }
467     ret = DB_NOTFOUND;
468     goto err;
469     /* NOTREACHED */
470     case DB_GET_BOTH:
471     case DB_GET_BOTH_RANGE:
472     /*
473     * If we're searching a set of off-page dups, we start
474     * a new linear search from the first record. Otherwise,
475     * we compare the single data item associated with the
476     * requested record for a match.
477     */
478     if (F_ISSET(dbc, DBC_OPD)) {
479     cp->recno = 1;
480     break;
481     }
482     /* FALLTHROUGH */
483     case DB_SET:
484     case DB_SET_RANGE:
485     if ((ret = __ram_getno(dbc, key, &cp->recno, 0)) != 0)
486     goto err;
487     break;
488     default:
489 sgodin 9632 ret = __db_unknown_flag(dbp->env, "__ramc_get", flags);
490 fluffy 4437 goto err;
491     }
492    
493     /*
494     * For DB_PREV, DB_LAST, DB_SET and DB_SET_RANGE, we have already
495     * called __ram_update() to make sure sufficient records have been
496     * read from the backing source file. Do it now for DB_CURRENT (if
497     * the current record was deleted we may need more records from the
498     * backing file for a DB_CURRENT operation), DB_FIRST and DB_NEXT.
499     * (We don't have to test for flags == DB_FIRST, because the switch
500     * statement above re-set flags to DB_NEXT in that case.)
501     */
502     if ((flags == DB_NEXT || flags == DB_CURRENT) && ((ret =
503     __ram_update(dbc, cp->recno, 0)) != 0) && ret != DB_NOTFOUND)
504     goto err;
505    
506     for (;; ++cp->recno) {
507     /* Search the tree for the record. */
508     if ((ret = __bam_rsearch(dbc, &cp->recno,
509 sgodin 9632 F_ISSET(dbc, DBC_RMW) ? SR_FIND_WR : SR_FIND,
510 fluffy 4437 1, &exact)) != 0)
511     goto err;
512     if (!exact) {
513     ret = DB_NOTFOUND;
514     goto err;
515     }
516    
517     /* Copy the page into the cursor. */
518     STACK_TO_CURSOR(cp, ret);
519     if (ret != 0)
520     goto err;
521    
522     /*
523     * If re-numbering records, the on-page deleted flag means this
524     * record was implicitly created. If not re-numbering records,
525     * the on-page deleted flag means this record was implicitly
526     * created, or, it was deleted at some time. Regardless, we
527     * skip such records if doing cursor next/prev operations or
528     * walking through off-page duplicates, and fail if they were
529     * requested explicitly by the application.
530     */
531     if (B_DISSET(GET_BKEYDATA(dbp, cp->page, cp->indx)->type))
532     switch (flags) {
533     case DB_NEXT:
534     case DB_PREV:
535     (void)__bam_stkrel(dbc, STK_CLRDBC);
536     goto retry;
537     case DB_GET_BOTH:
538     case DB_GET_BOTH_RANGE:
539     /*
540     * If we're an OPD tree, we don't care about
541     * matching a record number on a DB_GET_BOTH
542     * -- everything belongs to the same tree. A
543     * normal recno should give up and return
544     * DB_NOTFOUND if the matching recno is deleted.
545     */
546     if (F_ISSET(dbc, DBC_OPD)) {
547     (void)__bam_stkrel(dbc, STK_CLRDBC);
548     continue;
549     }
550     ret = DB_NOTFOUND;
551     goto err;
552     default:
553     ret = DB_KEYEMPTY;
554     goto err;
555     }
556    
557     if (flags == DB_GET_BOTH ||
558     flags == DB_GET_BOTHC || flags == DB_GET_BOTH_RANGE) {
559 sgodin 9632 if ((ret = __bam_cmp(dbc, data, cp->page, cp->indx,
560     __bam_defcmp, &cmp)) != 0)
561 fluffy 4437 return (ret);
562     if (cmp == 0)
563     break;
564     if (!F_ISSET(dbc, DBC_OPD)) {
565     ret = DB_NOTFOUND;
566     goto err;
567     }
568     (void)__bam_stkrel(dbc, STK_CLRDBC);
569     } else
570     break;
571     }
572    
573     /* Return the key if the user didn't give us one. */
574 sgodin 9632 if (!F_ISSET(dbc, DBC_OPD) && !F_ISSET(key, DB_DBT_ISSET)) {
575     ret = __db_retcopy(dbp->env,
576     key, &cp->recno, sizeof(cp->recno),
577     &dbc->rkey->data, &dbc->rkey->ulen);
578 fluffy 4437 F_SET(key, DB_DBT_ISSET);
579     }
580    
581     /* The cursor was reset, no further delete adjustment is necessary. */
582     err: CD_CLR(cp);
583    
584     return (ret);
585     }
586    
587     /*
588 sgodin 9632 * __ramc_put --
589     * Recno DBC->put function.
590 fluffy 4437 *
591 sgodin 9632 * PUBLIC: int __ramc_put __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *));
592 fluffy 4437 */
593     int
594 sgodin 9632 __ramc_put(dbc, key, data, flags, pgnop)
595 fluffy 4437 DBC *dbc;
596     DBT *key, *data;
597     u_int32_t flags;
598     db_pgno_t *pgnop;
599     {
600     BTREE_CURSOR *cp;
601     DB *dbp;
602     DB_LSN lsn;
603 sgodin 9632 ENV *env;
604     u_int32_t iiflags;
605 fluffy 4437 int exact, nc, ret, t_ret;
606     void *arg;
607    
608     COMPQUIET(pgnop, NULL);
609    
610     dbp = dbc->dbp;
611 sgodin 9632 env = dbp->env;
612 fluffy 4437 cp = (BTREE_CURSOR *)dbc->internal;
613    
614     /*
615     * DB_KEYFIRST and DB_KEYLAST mean different things if they're
616     * used in an off-page duplicate tree. If we're an off-page
617     * duplicate tree, they really mean "put at the beginning of the
618     * tree" and "put at the end of the tree" respectively, so translate
619     * them to something else.
620     */
621     if (F_ISSET(dbc, DBC_OPD))
622     switch (flags) {
623     case DB_KEYFIRST:
624     cp->recno = 1;
625     flags = DB_BEFORE;
626     break;
627     case DB_KEYLAST:
628     if ((ret = __ram_add(dbc,
629     &cp->recno, data, DB_APPEND, 0)) != 0)
630     return (ret);
631     if (CURADJ_LOG(dbc) &&
632     (ret = __bam_rcuradj_log(dbp, dbc->txn, &lsn, 0,
633     CA_ICURRENT, cp->root, cp->recno, cp->order)) != 0)
634     return (ret);
635     return (0);
636     default:
637     break;
638     }
639    
640     /*
641     * Handle normal DB_KEYFIRST/DB_KEYLAST; for a recno, which has
642     * no duplicates, these are identical and mean "put the given
643     * datum at the given recno".
644     */
645 sgodin 9632 if (flags == DB_KEYFIRST || flags == DB_KEYLAST ||
646     flags == DB_NOOVERWRITE || flags == DB_OVERWRITE_DUP) {
647 fluffy 4437 ret = __ram_getno(dbc, key, &cp->recno, 1);
648     if (ret == 0 || ret == DB_NOTFOUND)
649 sgodin 9632 ret = __ram_add(dbc, &cp->recno, data, flags, 0);
650 fluffy 4437 return (ret);
651     }
652    
653     /*
654     * If we're putting with a cursor that's marked C_DELETED, we need to
655     * take special care; the cursor doesn't "really" reference the item
656     * corresponding to its current recno, but instead is "between" that
657     * record and the current one. Translate the actual insert into
658     * DB_BEFORE, and let the __ram_ca work out the gory details of what
659     * should wind up pointing where.
660     */
661     if (CD_ISSET(cp))
662     iiflags = DB_BEFORE;
663     else
664     iiflags = flags;
665    
666 sgodin 9632 split: if ((ret = __bam_rsearch(dbc, &cp->recno, SR_INSERT, 1, &exact)) != 0)
667 fluffy 4437 goto err;
668     /*
669     * An inexact match is okay; it just means we're one record past the
670     * end, which is reasonable if we're marked deleted.
671     */
672 sgodin 9632 DB_ASSERT(env, exact || CD_ISSET(cp));
673 fluffy 4437
674     /* Copy the page into the cursor. */
675     STACK_TO_CURSOR(cp, ret);
676     if (ret != 0)
677     goto err;
678    
679     ret = __bam_iitem(dbc, key, data, iiflags, 0);
680     t_ret = __bam_stkrel(dbc, STK_CLRDBC);
681    
682     if (t_ret != 0 && (ret == 0 || ret == DB_NEEDSPLIT))
683     ret = t_ret;
684     else if (ret == DB_NEEDSPLIT) {
685     arg = &cp->recno;
686     if ((ret = __bam_split(dbc, arg, NULL)) != 0)
687     goto err;
688     goto split;
689     }
690     if (ret != 0)
691     goto err;
692    
693     switch (flags) { /* Adjust the cursors. */
694     case DB_AFTER:
695 sgodin 9632 if ((ret = __ram_ca(dbc, CA_IAFTER, &nc)) != 0)
696     goto err;
697 fluffy 4437
698     /*
699     * We only need to adjust this cursor forward if we truly added
700     * the item after the current recno, rather than remapping it
701     * to DB_BEFORE.
702     */
703     if (iiflags == DB_AFTER)
704     ++cp->recno;
705    
706     /* Only log if __ram_ca found any relevant cursors. */
707     if (nc > 0 && CURADJ_LOG(dbc) &&
708     (ret = __bam_rcuradj_log(dbp, dbc->txn, &lsn, 0, CA_IAFTER,
709     cp->root, cp->recno, cp->order)) != 0)
710     goto err;
711     break;
712     case DB_BEFORE:
713 sgodin 9632 if ((ret = __ram_ca(dbc, CA_IBEFORE, &nc)) != 0)
714     goto err;
715 fluffy 4437 --cp->recno;
716    
717     /* Only log if __ram_ca found any relevant cursors. */
718     if (nc > 0 && CURADJ_LOG(dbc) &&
719     (ret = __bam_rcuradj_log(dbp, dbc->txn, &lsn, 0, CA_IBEFORE,
720     cp->root, cp->recno, cp->order)) != 0)
721     goto err;
722     break;
723     case DB_CURRENT:
724     /*
725     * We only need to do an adjustment if we actually
726     * added an item, which we only would have done if the
727     * cursor was marked deleted.
728     */
729 sgodin 9632 if (!CD_ISSET(cp))
730     break;
731    
732     /* Only log if __ram_ca found any relevant cursors. */
733     if ((ret = __ram_ca(dbc, CA_ICURRENT, &nc)) != 0)
734     goto err;
735     if (nc > 0 && CURADJ_LOG(dbc) &&
736 fluffy 4437 (ret = __bam_rcuradj_log(dbp, dbc->txn, &lsn, 0,
737     CA_ICURRENT, cp->root, cp->recno, cp->order)) != 0)
738     goto err;
739     break;
740     default:
741     break;
742     }
743    
744     /* Return the key if we've created a new record. */
745 sgodin 9632 if (!F_ISSET(dbc, DBC_OPD) &&
746     (flags == DB_AFTER || flags == DB_BEFORE) && key != NULL)
747     ret = __db_retcopy(env, key, &cp->recno,
748 fluffy 4437 sizeof(cp->recno), &dbc->rkey->data, &dbc->rkey->ulen);
749    
750     /* The cursor was reset, no further delete adjustment is necessary. */
751     err: CD_CLR(cp);
752    
753     return (ret);
754     }
755    
756     /*
757     * __ram_ca --
758     * Adjust cursors. Returns the number of relevant cursors.
759     *
760 sgodin 9632 * PUBLIC: int __ram_ca __P((DBC *, ca_recno_arg, int *));
761 fluffy 4437 */
762     int
763 sgodin 9632 __ram_ca(dbc_arg, op, foundp)
764 fluffy 4437 DBC *dbc_arg;
765     ca_recno_arg op;
766 sgodin 9632 int *foundp;
767 fluffy 4437 {
768     BTREE_CURSOR *cp, *cp_arg;
769     DB *dbp, *ldbp;
770     DBC *dbc;
771 sgodin 9632 ENV *env;
772 fluffy 4437 db_recno_t recno;
773 sgodin 9632 u_int32_t order;
774 fluffy 4437 int adjusted, found;
775    
776     dbp = dbc_arg->dbp;
777 sgodin 9632 env = dbp->env;
778 fluffy 4437 cp_arg = (BTREE_CURSOR *)dbc_arg->internal;
779     recno = cp_arg->recno;
780    
781     /*
782     * It only makes sense to adjust cursors if we're a renumbering
783     * recno; we should only be called if this is one.
784     */
785 sgodin 9632 DB_ASSERT(env, F_ISSET(cp_arg, C_RENUMBER));
786 fluffy 4437
787 sgodin 9632 MUTEX_LOCK(env, env->mtx_dblist);
788 fluffy 4437 /*
789     * Adjust the cursors. See the comment in __bam_ca_delete().
790 sgodin 9632 *
791 fluffy 4437 * If we're doing a delete, we need to find the highest
792     * order of any cursor currently pointing at this item,
793     * so we can assign a higher order to the newly deleted
794     * cursor. Unfortunately, this requires a second pass through
795     * the cursor list.
796     */
797     if (op == CA_DELETE) {
798 sgodin 9632 FIND_FIRST_DB_MATCH(env, dbp, ldbp);
799     for (order = 1;
800 fluffy 4437 ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
801 sgodin 9632 ldbp = TAILQ_NEXT(ldbp, dblistlinks)) {
802     MUTEX_LOCK(env, dbp->mutex);
803     TAILQ_FOREACH(dbc, &ldbp->active_queue, links) {
804 fluffy 4437 cp = (BTREE_CURSOR *)dbc->internal;
805     if (cp_arg->root == cp->root &&
806     recno == cp->recno && CD_ISSET(cp) &&
807 sgodin 9632 order <= cp->order &&
808     !MVCC_SKIP_CURADJ(dbc, cp->root))
809 fluffy 4437 order = cp->order + 1;
810     }
811 sgodin 9632 MUTEX_UNLOCK(env, dbp->mutex);
812 fluffy 4437 }
813     } else
814     order = INVALID_ORDER;
815    
816     /* Now go through and do the actual adjustments. */
817 sgodin 9632 FIND_FIRST_DB_MATCH(env, dbp, ldbp);
818     for (found = 0;
819 fluffy 4437 ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
820 sgodin 9632 ldbp = TAILQ_NEXT(ldbp, dblistlinks)) {
821     MUTEX_LOCK(env, dbp->mutex);
822     TAILQ_FOREACH(dbc, &ldbp->active_queue, links) {
823 fluffy 4437 cp = (BTREE_CURSOR *)dbc->internal;
824 sgodin 9632 if (cp_arg->root != cp->root ||
825     MVCC_SKIP_CURADJ(dbc, cp->root))
826 fluffy 4437 continue;
827     ++found;
828     adjusted = 0;
829     switch (op) {
830     case CA_DELETE:
831     if (recno < cp->recno) {
832     --cp->recno;
833     /*
834     * If the adjustment made them equal,
835     * we have to merge the orders.
836     */
837     if (recno == cp->recno && CD_ISSET(cp))
838     cp->order += order;
839     } else if (recno == cp->recno &&
840     !CD_ISSET(cp)) {
841     CD_SET(cp);
842     cp->order = order;
843 sgodin 9632 /*
844     * If we're deleting the item, we can't
845     * keep a streaming offset cached.
846     */
847     cp->stream_start_pgno = PGNO_INVALID;
848 fluffy 4437 }
849     break;
850     case CA_IBEFORE:
851     /*
852     * IBEFORE is just like IAFTER, except that we
853     * adjust cursors on the current record too.
854     */
855     if (C_EQUAL(cp_arg, cp)) {
856     ++cp->recno;
857     adjusted = 1;
858     }
859     goto iafter;
860     case CA_ICURRENT:
861    
862     /*
863     * If the original cursor wasn't deleted, we
864     * just did a replacement and so there's no
865     * need to adjust anything--we shouldn't have
866     * gotten this far. Otherwise, we behave
867     * much like an IAFTER, except that all
868     * cursors pointing to the current item get
869     * marked undeleted and point to the new
870     * item.
871     */
872 sgodin 9632 DB_ASSERT(env, CD_ISSET(cp_arg));
873 fluffy 4437 if (C_EQUAL(cp_arg, cp)) {
874     CD_CLR(cp);
875     break;
876     }
877     /* FALLTHROUGH */
878     case CA_IAFTER:
879     iafter: if (!adjusted && C_LESSTHAN(cp_arg, cp)) {
880     ++cp->recno;
881     adjusted = 1;
882     }
883     if (recno == cp->recno && adjusted)
884     /*
885     * If we've moved this cursor's recno,
886     * split its order number--i.e.,
887     * decrement it by enough so that
888     * the lowest cursor moved has order 1.
889     * cp_arg->order is the split point,
890     * so decrement by one less than that.
891     */
892     cp->order -= (cp_arg->order - 1);
893     break;
894     }
895     }
896 sgodin 9632 MUTEX_UNLOCK(dbp->env, dbp->mutex);
897 fluffy 4437 }
898 sgodin 9632 MUTEX_UNLOCK(env, env->mtx_dblist);
899 fluffy 4437
900 sgodin 9632 if (foundp != NULL)
901     *foundp = found;
902     return (0);
903 fluffy 4437 }
904    
905     /*
906     * __ram_getno --
907     * Check the user's record number, and make sure we've seen it.
908     *
909     * PUBLIC: int __ram_getno __P((DBC *, const DBT *, db_recno_t *, int));
910     */
911     int
912     __ram_getno(dbc, key, rep, can_create)
913     DBC *dbc;
914     const DBT *key;
915     db_recno_t *rep;
916     int can_create;
917     {
918     DB *dbp;
919     db_recno_t recno;
920    
921     dbp = dbc->dbp;
922    
923 sgodin 9632 /* If passed an empty DBT from Java, key->data may be NULL */
924     if (key->size != sizeof(db_recno_t)) {
925     __db_errx(dbp->env, "illegal record number size");
926     return (EINVAL);
927     }
928    
929 fluffy 4437 /* Check the user's record number. */
930     if ((recno = *(db_recno_t *)key->data) == 0) {
931 sgodin 9632 __db_errx(dbp->env, "illegal record number of 0");
932 fluffy 4437 return (EINVAL);
933     }
934     if (rep != NULL)
935     *rep = recno;
936    
937     /*
938     * Btree can neither create records nor read them in. Recno can
939     * do both, see if we can find the record.
940     */
941     return (dbc->dbtype == DB_RECNO ?
942     __ram_update(dbc, recno, can_create) : 0);
943     }
944    
945     /*
946     * __ram_update --
947     * Ensure the tree has records up to and including the specified one.
948     */
949     static int
950     __ram_update(dbc, recno, can_create)
951     DBC *dbc;
952     db_recno_t recno;
953     int can_create;
954     {
955     BTREE *t;
956     DB *dbp;
957     DBT *rdata;
958     db_recno_t nrecs;
959     int ret;
960    
961     dbp = dbc->dbp;
962     t = dbp->bt_internal;
963    
964     /*
965     * If we can't create records and we've read the entire backing input
966     * file, we're done.
967     */
968     if (!can_create && t->re_eof)
969     return (0);
970    
971     /*
972     * If we haven't seen this record yet, try to get it from the original
973     * file.
974     */
975     if ((ret = __bam_nrecs(dbc, &nrecs)) != 0)
976     return (ret);
977     if (!t->re_eof && recno > nrecs) {
978     if ((ret = __ram_sread(dbc, recno)) != 0 && ret != DB_NOTFOUND)
979     return (ret);
980     if ((ret = __bam_nrecs(dbc, &nrecs)) != 0)
981     return (ret);
982     }
983    
984     /*
985     * If we can create records, create empty ones up to the requested
986     * record.
987     */
988     if (!can_create || recno <= nrecs + 1)
989     return (0);
990    
991     rdata = &dbc->my_rdata;
992     rdata->flags = 0;
993     rdata->size = 0;
994    
995     while (recno > ++nrecs)
996     if ((ret = __ram_add(dbc,
997     &nrecs, rdata, 0, BI_DELETED)) != 0)
998     return (ret);
999     return (0);
1000     }
1001    
1002     /*
1003     * __ram_source --
1004     * Load information about the backing file.
1005     */
1006     static int
1007     __ram_source(dbp)
1008     DB *dbp;
1009     {
1010     BTREE *t;
1011 sgodin 9632 ENV *env;
1012 fluffy 4437 char *source;
1013     int ret;
1014    
1015 sgodin 9632 env = dbp->env;
1016 fluffy 4437 t = dbp->bt_internal;
1017    
1018     /* Find the real name, and swap out the one we had before. */
1019 sgodin 9632 if ((ret = __db_appname(env,
1020     DB_APP_DATA, t->re_source, NULL, &source)) != 0)
1021 fluffy 4437 return (ret);
1022 sgodin 9632 __os_free(env, t->re_source);
1023 fluffy 4437 t->re_source = source;
1024    
1025     /*
1026     * !!!
1027     * It's possible that the backing source file is read-only. We don't
1028     * much care other than we'll complain if there are any modifications
1029     * when it comes time to write the database back to the source.
1030     */
1031 sgodin 9632 if ((t->re_fp = fopen(t->re_source, "rb")) == NULL) {
1032 fluffy 4437 ret = __os_get_errno();
1033 sgodin 9632 __db_err(env, ret, "%s", t->re_source);
1034 fluffy 4437 return (ret);
1035     }
1036    
1037     t->re_eof = 0;
1038     return (0);
1039     }
1040    
1041     /*
1042     * __ram_writeback --
1043     * Rewrite the backing file.
1044     *
1045     * PUBLIC: int __ram_writeback __P((DB *));
1046     */
1047     int
1048     __ram_writeback(dbp)
1049     DB *dbp;
1050     {
1051     BTREE *t;
1052     DBC *dbc;
1053     DBT key, data;
1054 sgodin 9632 DB_THREAD_INFO *ip;
1055     ENV *env;
1056 fluffy 4437 FILE *fp;
1057     db_recno_t keyno;
1058     int ret, t_ret;
1059     u_int8_t delim, *pad;
1060    
1061     t = dbp->bt_internal;
1062 sgodin 9632 env = dbp->env;
1063 fluffy 4437 fp = NULL;
1064     pad = NULL;
1065    
1066     /* If the file wasn't modified, we're done. */
1067     if (!t->re_modified)
1068     return (0);
1069    
1070     /* If there's no backing source file, we're done. */
1071     if (t->re_source == NULL) {
1072     t->re_modified = 0;
1073     return (0);
1074     }
1075    
1076 sgodin 9632 /*
1077     * We step through the records, writing each one out. Use the record
1078     * number and the dbp->get() function, instead of a cursor, so we find
1079     * and write out "deleted" or non-existent records. The DB handle may
1080     * be threaded, so allocate memory as we go.
1081     */
1082     memset(&key, 0, sizeof(key));
1083     key.size = sizeof(db_recno_t);
1084     key.data = &keyno;
1085     memset(&data, 0, sizeof(data));
1086     F_SET(&data, DB_DBT_REALLOC);
1087    
1088 fluffy 4437 /* Allocate a cursor. */
1089 sgodin 9632 ENV_GET_THREAD_INFO(env, ip);
1090     if ((ret = __db_cursor(dbp, ip, NULL, &dbc, 0)) != 0)
1091 fluffy 4437 return (ret);
1092    
1093     /*
1094     * Read any remaining records into the tree.
1095     *
1096     * !!!
1097     * This is why we can't support transactions when applications specify
1098     * backing (re_source) files. At this point we have to read in the
1099     * rest of the records from the file so that we can write all of the
1100     * records back out again, which could modify a page for which we'd
1101     * have to log changes and which we don't have locked. This could be
1102     * partially fixed by taking a snapshot of the entire file during the
1103     * DB->open as DB->open is transaction protected. But, if a checkpoint
1104     * occurs then, the part of the log holding the copy of the file could
1105     * be discarded, and that would make it impossible to recover in the
1106     * face of disaster. This could all probably be fixed, but it would
1107     * require transaction protecting the backing source file.
1108     *
1109     * XXX
1110     * This could be made to work now that we have transactions protecting
1111     * file operations. Margo has specifically asked for the privilege of
1112     * doing this work.
1113     */
1114     if ((ret =
1115     __ram_update(dbc, DB_MAX_RECORDS, 0)) != 0 && ret != DB_NOTFOUND)
1116 sgodin 9632 goto err;
1117 fluffy 4437
1118     /*
1119     * Close any existing file handle and re-open the file, truncating it.
1120     */
1121     if (t->re_fp != NULL) {
1122     if (fclose(t->re_fp) != 0) {
1123     ret = __os_get_errno();
1124 sgodin 9632 __db_err(env, ret, "%s", t->re_source);
1125 fluffy 4437 goto err;
1126     }
1127     t->re_fp = NULL;
1128     }
1129 sgodin 9632 if ((fp = fopen(t->re_source, "wb")) == NULL) {
1130 fluffy 4437 ret = __os_get_errno();
1131 sgodin 9632 __db_err(env, ret, "%s", t->re_source);
1132 fluffy 4437 goto err;
1133     }
1134    
1135     /*
1136     * We'll need the delimiter if we're doing variable-length records,
1137     * and the pad character if we're doing fixed-length records.
1138     */
1139     delim = t->re_delim;
1140     for (keyno = 1;; ++keyno) {
1141 sgodin 9632 switch (ret = __db_get(dbp, ip, NULL, &key, &data, 0)) {
1142 fluffy 4437 case 0:
1143     if (data.size != 0 &&
1144     fwrite(data.data, 1, data.size, fp) != data.size)
1145     goto write_err;
1146     break;
1147     case DB_KEYEMPTY:
1148     if (F_ISSET(dbp, DB_AM_FIXEDLEN)) {
1149     if (pad == NULL) {
1150     if ((ret = __os_malloc(
1151 sgodin 9632 env, t->re_len, &pad)) != 0)
1152 fluffy 4437 goto err;
1153     memset(pad, t->re_pad, t->re_len);
1154     }
1155     if (fwrite(pad, 1, t->re_len, fp) != t->re_len)
1156     goto write_err;
1157     }
1158     break;
1159     case DB_NOTFOUND:
1160     ret = 0;
1161     goto done;
1162     default:
1163     goto err;
1164     }
1165     if (!F_ISSET(dbp, DB_AM_FIXEDLEN) &&
1166     fwrite(&delim, 1, 1, fp) != 1) {
1167     write_err: ret = __os_get_errno();
1168 sgodin 9632 __db_err(env, ret,
1169     "%s: write failed to backing file", t->re_source);
1170 fluffy 4437 goto err;
1171     }
1172     }
1173    
1174     err:
1175     done: /* Close the file descriptor. */
1176     if (fp != NULL && fclose(fp) != 0) {
1177     t_ret = __os_get_errno();
1178 sgodin 9632 __db_err(env, t_ret, "%s", t->re_source);
1179 fluffy 4437 if (ret == 0)
1180     ret = t_ret;
1181     }
1182    
1183     /* Discard the cursor. */
1184 sgodin 9632 if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0)
1185 fluffy 4437 ret = t_ret;
1186    
1187     /* Discard memory allocated to hold the data items. */
1188     if (data.data != NULL)
1189 sgodin 9632 __os_ufree(env, data.data);
1190 fluffy 4437 if (pad != NULL)
1191 sgodin 9632 __os_free(env, pad);
1192 fluffy 4437
1193     if (ret == 0)
1194     t->re_modified = 0;
1195    
1196     return (ret);
1197     }
1198    
1199     /*
1200     * __ram_sread --
1201     * Read records from a source file.
1202     */
1203     static int
1204     __ram_sread(dbc, top)
1205     DBC *dbc;
1206     db_recno_t top;
1207     {
1208     BTREE *t;
1209     DB *dbp;
1210     DBT data, *rdata;
1211     db_recno_t recno;
1212     size_t len;
1213     int ch, ret, was_modified;
1214    
1215     t = dbc->dbp->bt_internal;
1216     dbp = dbc->dbp;
1217     was_modified = t->re_modified;
1218    
1219     if ((ret = __bam_nrecs(dbc, &recno)) != 0)
1220     return (ret);
1221    
1222     /*
1223     * Use the record key return memory, it's only a short-term use.
1224     * The record data return memory is used by __bam_iitem, which
1225     * we'll indirectly call, so use the key so as not to collide.
1226     */
1227     len = F_ISSET(dbp, DB_AM_FIXEDLEN) ? t->re_len : 256;
1228     rdata = &dbc->my_rkey;
1229     if (rdata->ulen < len) {
1230     if ((ret = __os_realloc(
1231 sgodin 9632 dbp->env, len, &rdata->data)) != 0) {
1232 fluffy 4437 rdata->ulen = 0;
1233     rdata->data = NULL;
1234     return (ret);
1235     }
1236     rdata->ulen = (u_int32_t)len;
1237     }
1238    
1239     memset(&data, 0, sizeof(data));
1240     while (recno < top) {
1241     data.data = rdata->data;
1242     data.size = 0;
1243     if (F_ISSET(dbp, DB_AM_FIXEDLEN))
1244     for (len = t->re_len; len > 0; --len) {
1245 sgodin 9632 if ((ch = fgetc(t->re_fp)) == EOF) {
1246 fluffy 4437 if (data.size == 0)
1247     goto eof;
1248     break;
1249     }
1250     ((u_int8_t *)data.data)[data.size++] = ch;
1251     }
1252     else
1253     for (;;) {
1254 sgodin 9632 if ((ch = fgetc(t->re_fp)) == EOF) {
1255 fluffy 4437 if (data.size == 0)
1256     goto eof;
1257     break;
1258     }
1259     if (ch == t->re_delim)
1260     break;
1261    
1262     ((u_int8_t *)data.data)[data.size++] = ch;
1263     if (data.size == rdata->ulen) {
1264 sgodin 9632 if ((ret = __os_realloc(dbp->env,
1265 fluffy 4437 rdata->ulen *= 2,
1266     &rdata->data)) != 0) {
1267     rdata->ulen = 0;
1268     rdata->data = NULL;
1269     return (ret);
1270     } else
1271     data.data = rdata->data;
1272     }
1273     }
1274    
1275     /*
1276     * Another process may have read this record from the input
1277     * file and stored it into the database already, in which
1278     * case we don't need to repeat that operation. We detect
1279     * this by checking if the last record we've read is greater
1280     * or equal to the number of records in the database.
1281     */
1282     if (t->re_last >= recno) {
1283     ++recno;
1284     if ((ret = __ram_add(dbc, &recno, &data, 0, 0)) != 0)
1285     goto err;
1286     }
1287     ++t->re_last;
1288     }
1289    
1290     if (0) {
1291     eof: t->re_eof = 1;
1292     ret = DB_NOTFOUND;
1293     }
1294     err: if (!was_modified)
1295     t->re_modified = 0;
1296    
1297     return (ret);
1298     }
1299    
1300     /*
1301     * __ram_add --
1302     * Add records into the tree.
1303     */
1304     static int
1305     __ram_add(dbc, recnop, data, flags, bi_flags)
1306     DBC *dbc;
1307     db_recno_t *recnop;
1308     DBT *data;
1309     u_int32_t flags, bi_flags;
1310     {
1311     BTREE_CURSOR *cp;
1312     int exact, ret, stack, t_ret;
1313    
1314     cp = (BTREE_CURSOR *)dbc->internal;
1315    
1316     retry: /* Find the slot for insertion. */
1317     if ((ret = __bam_rsearch(dbc, recnop,
1318 sgodin 9632 SR_INSERT | (flags == DB_APPEND ? SR_APPEND : 0), 1, &exact)) != 0)
1319 fluffy 4437 return (ret);
1320     stack = 1;
1321    
1322     /* Copy the page into the cursor. */
1323     STACK_TO_CURSOR(cp, ret);
1324     if (ret != 0)
1325     goto err;
1326    
1327 sgodin 9632 if (exact && flags == DB_NOOVERWRITE && !CD_ISSET(cp) &&
1328     !B_DISSET(GET_BKEYDATA(dbc->dbp, cp->page, cp->indx)->type)) {
1329     ret = DB_KEYEXIST;
1330     goto err;
1331     }
1332    
1333 fluffy 4437 /*
1334     * The application may modify the data based on the selected record
1335     * number.
1336     */
1337     if (flags == DB_APPEND && dbc->dbp->db_append_recno != NULL &&
1338     (ret = dbc->dbp->db_append_recno(dbc->dbp, data, *recnop)) != 0)
1339     goto err;
1340    
1341     /*
1342     * Select the arguments for __bam_iitem() and do the insert. If the
1343     * key is an exact match, or we're replacing the data item with a
1344     * new data item, replace the current item. If the key isn't an exact
1345     * match, we're inserting a new key/data pair, before the search
1346     * location.
1347     */
1348     switch (ret = __bam_iitem(dbc,
1349     NULL, data, exact ? DB_CURRENT : DB_BEFORE, bi_flags)) {
1350     case 0:
1351     /*
1352     * Don't adjust anything.
1353     *
1354     * If we inserted a record, no cursors need adjusting because
1355     * the only new record it's possible to insert is at the very
1356     * end of the tree. The necessary adjustments to the internal
1357     * page counts were made by __bam_iitem().
1358     *
1359     * If we overwrote a record, no cursors need adjusting because
1360     * future DBcursor->get calls will simply return the underlying
1361     * record (there's no adjustment made for the DB_CURRENT flag
1362     * when a cursor get operation immediately follows a cursor
1363     * delete operation, and the normal adjustment for the DB_NEXT
1364     * flag is still correct).
1365     */
1366     break;
1367     case DB_NEEDSPLIT:
1368     /* Discard the stack of pages and split the page. */
1369     (void)__bam_stkrel(dbc, STK_CLRDBC);
1370     stack = 0;
1371    
1372     if ((ret = __bam_split(dbc, recnop, NULL)) != 0)
1373     goto err;
1374    
1375     goto retry;
1376     /* NOTREACHED */
1377     default:
1378     goto err;
1379     }
1380    
1381     err: if (stack && (t_ret = __bam_stkrel(dbc, STK_CLRDBC)) != 0 && ret == 0)
1382     ret = t_ret;
1383    
1384     return (ret);
1385     }

Properties

Name Value
svn:eol-style native
svn:mime-type text/plain

webmaster AT resiprocate DOT org
ViewVC Help
Powered by ViewVC 1.1.27