· 7 years ago · Oct 17, 2018, 12:18 AM
1package edu.berkeley.cs186.database.table;
2
3import java.util.Arrays;
4import java.io.Closeable;
5import java.nio.ByteBuffer;
6import java.util.Iterator;
7import java.util.List;
8import java.util.NoSuchElementException;
9import java.util.TreeSet;
10
11import edu.berkeley.cs186.database.DatabaseException;
12import edu.berkeley.cs186.database.common.ArrayBacktrackingIterator;
13import edu.berkeley.cs186.database.common.BacktrackingIterator;
14import edu.berkeley.cs186.database.common.Bits;
15import edu.berkeley.cs186.database.databox.DataBox;
16import edu.berkeley.cs186.database.io.Page;
17import edu.berkeley.cs186.database.io.PageAllocator;
18import edu.berkeley.cs186.database.io.PageAllocator.PageIterator;
19
20/**
21 * # Overview
22 * A Table represents a database table with which users can insert, get,
23 * update, and delete records:
24 *
25 * // Create a brand new table t(x: int, y: int) which is persisted in the
26 * // file "t.table".
27 * List<String> fieldNames = Arrays.asList("x", "y");
28 * List<String> fieldTypes = Arrays.asList(Type.intType(), Type.intType());
29 * Schema schema = new Schema(fieldNames, fieldTypes);
30 * Table t = new Table(schema, "t", "t.table");
31 *
32 * // Insert, get, update, and delete records.
33 * List<DataBox> a = Arrays.asList(new IntDataBox(1), new IntDataBox(2));
34 * List<DataBox> b = Arrays.asList(new IntDataBox(3), new IntDataBox(4));
35 * RecordId rid = t.addRecord(a);
36 * Record ra = t.getRecord(rid);
37 * t.updateRecord(b, rid);
38 * Record rb = t.getRecord(rid);
39 * t.deleteRecord(rid);
40 *
41 * // Close the table. All tables must be closed.
42 * t.close();
43 *
44 * # Persistence
45 * Every table constructs a new PageAllocator which it uses to persist its data
46 * into a file. For example, the table above persists itself into a file
47 * "t.table". We can later run the following code to reload the table:
48 *
49 * // Load the table t from the file "t.table". Unlike above, we do not have
50 * // to specify the schema of t because it will be parsed from "t.table".
51 * Table t = new Table("t", "t.table");
52 * // Don't forget to close the table.
53 * t.close();
54 *
55 * # Storage Format
56 * Now, we discuss how tables serialize their data into files.
57 *
58 * 1. Each file begins with a header page into which tables serialize their
59 * schema.
60 * 2. All remaining pages are data pages. Every data page begins with an
61 * n-byte bitmap followed by m records. The bitmap indicates which records
62 * in the page are valid. The values of n and m are set to maximize the
63 * number of records per page (see computeDataPageNumbers for details).
64 *
65 * For example, here is a cartoon of what a table's file would look like if we
66 * had 5-byte pages and 1-byte records:
67 *
68 * Serialized Schema___________________________________
69 * / \
70 * +----------+----------+----------+----------+----------+ \
71 * Page 0 | 00000001 | 00000001 | 01111111 | 00000001 | 00000100 | |- header
72 * +----------+----------+----------+----------+----------+ /
73 * +----------+----------+----------+----------+----------+ \
74 * Page 1 | 1001xxxx | 01111010 | xxxxxxxx | xxxxxxxx | 01100001 | |
75 * +----------+----------+----------+----------+----------+ |
76 * Page 2 | 1101xxxx | 01110010 | 01100100 | xxxxxxxx | 01101111 | |- data
77 * +----------+----------+----------+----------+----------+ |
78 * Page 3 | 0011xxxx | xxxxxxxx | xxxxxxxx | 01111010 | 00100001 | |
79 * +----------+----------+----------+----------+----------+ /
80 * \________/ \________/ \________/ \________/ \________/
81 * bitmap record 0 record 1 record 2 record 3
82 *
83 * - The first page (Page 0) is the header page and contains the serialized
84 * schema.
85 * - The second page (Page 1) is a data page. The first byte of this data page
86 * is a bitmap, and the next four bytes are each records. The first and
87 * fourth bit are set indicating that record 0 and record 3 are valid.
88 * Record 1 and record 2 are invalid, so we ignore their contents.
89 * Similarly, the last four bits of the bitmap are unused, so we ignore
90 * their contents.
91 * - The third and fourth page (Page 2 and 3) are also data pages and are
92 * formatted similar to Page 1.
93 *
94 * When we add a record to a table, we add it to the very first free slot in
95 * the table. See addRecord for more information.
96 */
97public class Table implements Iterable<Record>, Closeable {
98 public static final String FILENAME_PREFIX = "db";
99 public static final String FILENAME_EXTENSION = ".table";
100
101 // The name of the database.
102 private String name;
103
104 // The filename of the file in which this table is persisted.
105 private String filename;
106
107 // The schema of the database.
108 private Schema schema;
109
110 // The allocator used to persist the database.
111 private PageAllocator allocator;
112
113 // The size (in bytes) of the bitmap found at the beginning of each data page.
114 private int bitmapSizeInBytes;
115
116 // The number of records on each data page.
117 private int numRecordsPerPage;
118
119 // The page numbers of all allocated pages which have room for more records.
120 private TreeSet<Integer> freePageNums;
121
122 // The number of records in the table.
123 private long numRecords;
124
125 // Constructors //////////////////////////////////////////////////////////////
126 /**
127 * Construct a brand new table named `name` with schema `schema` persisted in
128 * file `filename`.
129 */
130 public Table(String name, Schema schema, String filename) {
131 this.name = name;
132 this.filename = filename;
133 this.schema = schema;
134 this.allocator = new PageAllocator(filename, true);
135 this.bitmapSizeInBytes = computeBitmapSizeInBytes(Page.pageSize, schema);
136 numRecordsPerPage = computeNumRecordsPerPage(Page.pageSize, schema);
137 this.freePageNums = new TreeSet<Integer>();
138 this.numRecords = 0;
139
140 writeSchemaToHeaderPage(allocator, schema);
141 }
142
143 /**
144 * Load a table named `name` from the file `filename`. The schema of the
145 * table will be read from the header page of the file.
146 */
147 public Table(String name, String filename) throws DatabaseException {
148 this.name = name;
149 this.filename = filename;
150 this.allocator = new PageAllocator(filename, false);
151 this.schema = readSchemaFromHeaderPage(this.allocator);
152 this.bitmapSizeInBytes = computeBitmapSizeInBytes(Page.pageSize, this.schema);
153 this.numRecordsPerPage = computeNumRecordsPerPage(Page.pageSize, this.schema);
154
155 this.freePageNums = new TreeSet<Integer>();
156 this.numRecords = 0;
157
158 Iterator<Page> iter = this.allocator.iterator();
159 iter.next(); // Skip the header page.
160 while(iter.hasNext()) {
161 Page page = iter.next();
162 byte[] bitmap = getBitMap(page);
163
164 for (short i = 0; i < numRecordsPerPage; ++i) {
165 if (Bits.getBit(bitmap, i) == Bits.Bit.ONE) {
166 Record r = getRecord(new RecordId(page.getPageNum(), i));
167 numRecords++;
168 }
169 }
170
171 if (numRecordsOnPage(page) != numRecordsPerPage) {
172 freePageNums.add(page.getPageNum());
173 }
174 }
175 }
176
177 // Accessors /////////////////////////////////////////////////////////////////
178 public String getName() {
179 return name;
180 }
181
182 public String getFilename() {
183 return filename;
184 }
185
186 public Schema getSchema() {
187 return schema;
188 }
189
190 public PageAllocator getAllocator() {
191 return allocator;
192 }
193
194 public int getBitmapSizeInBytes() {
195 return bitmapSizeInBytes;
196 }
197
198 public int getNumRecordsPerPage() {
199 return numRecordsPerPage;
200 }
201
202 public long getNumRecords() {
203 return numRecords;
204 }
205
206 public int getNumDataPages() {
207 // All pages but the first are data pages.
208 return allocator.getNumPages() - 1;
209 }
210
211 public byte[] getBitMap(Page page) {
212 byte[] bytes = new byte[bitmapSizeInBytes];
213 page.getByteBuffer().get(bytes);
214 return bytes;
215 }
216
217 public static int computeBitmapSizeInBytes(int pageSize, Schema schema) {
218 // Dividing by 8 simultaneously (a) rounds down the number of records to a
219 // multiple of 8 and (b) converts bits to bytes.
220 return computeUnroundedNumRecordsPerPage(pageSize, schema) / 8;
221 }
222
223 public static int computeNumRecordsPerPage(int pageSize, Schema schema) {
224 // Dividing by 8 and then multiplying by 8 rounds down to the nearest
225 // multiple of 8.
226 return computeUnroundedNumRecordsPerPage(pageSize, schema) / 8 * 8;
227 }
228
229 // Modifiers /////////////////////////////////////////////////////////////////
230 private synchronized void insertRecord(Page page, int entryNum, Record record) {
231 int offset = bitmapSizeInBytes + (entryNum * schema.getSizeInBytes());
232 byte[] bytes = record.toBytes(schema);
233 ByteBuffer buf = page.getByteBuffer();
234 buf.position(offset);
235 buf.put(bytes);
236 }
237
238 /**
239 * addRecord adds a record to this table and returns the record id of the
240 * newly added record. freePageNums, and numRecords are updated
241 * accordingly. The record is added to the first free slot of the first free
242 * page (if one exists, otherwise one is allocated). For example, if the
243 * first free page has bitmap 0b11101000, then the record is inserted into
244 * the page with index 3 and the bitmap is updated to 0b11111000.
245 */
246 public synchronized RecordId addRecord(List<DataBox> values) throws DatabaseException {
247 Record record = schema.verify(values);
248
249 // Get a free page, allocating a new one if necessary.
250 if (freePageNums.isEmpty()) {
251 freePageNums.add(allocator.allocPage());
252 }
253 Page page = allocator.fetchPage(freePageNums.first());
254
255 // Find the first empty slot in the bitmap.
256 // entry number of the first free slot and store it in entryNum; and (2) we
257 // count the total number of entries on this page.
258 byte[] bitmap = getBitMap(page);
259 int entryNum = 0;
260 for (; entryNum < numRecordsPerPage; ++entryNum) {
261 if (Bits.getBit(bitmap, entryNum) == Bits.Bit.ZERO) {
262 break;
263 }
264 }
265 assert(entryNum < numRecordsPerPage);
266
267 // Insert the record and update the bitmap.
268 insertRecord(page, entryNum, record);
269 Bits.setBit(page.getByteBuffer(), entryNum, Bits.Bit.ONE);
270
271 // Update the metadata.
272 if (numRecordsOnPage(page) == numRecordsPerPage) {
273 freePageNums.pollFirst();
274 }
275 numRecords++;
276
277 return new RecordId(page.getPageNum(), (short) entryNum);
278 }
279
280 /**
281 * Retrieves a record from the table, throwing an exception if no such record
282 * exists.
283 */
284 public synchronized Record getRecord(RecordId rid) throws DatabaseException {
285 validateRecordId(rid);
286 Page page = allocator.fetchPage(rid.getPageNum());
287 byte[] bitmap = getBitMap(page);
288 if (Bits.getBit(bitmap, rid.getEntryNum()) == Bits.Bit.ZERO) {
289 String msg = String.format("Record %s does not exist.", rid);
290 throw new DatabaseException(msg);
291 }
292
293 int offset = bitmapSizeInBytes + (rid.getEntryNum() * schema.getSizeInBytes());
294 ByteBuffer buf = page.getByteBuffer();
295 buf.position(offset);
296 return Record.fromBytes(buf, schema);
297 }
298
299 /**
300 * Overwrites an existing record with new values and returns the existing
301 * record. An exception is thrown if rid does
302 * not correspond to an existing record in the table.
303 */
304 public synchronized Record updateRecord(List<DataBox> values, RecordId rid) throws DatabaseException {
305 validateRecordId(rid);
306 Record newRecord = schema.verify(values);
307 Record oldRecord = getRecord(rid);
308
309 Page page = allocator.fetchPage(rid.getPageNum());
310 insertRecord(page, rid.getEntryNum(), newRecord);
311 return oldRecord;
312 }
313
314 /**
315 * Deletes and returns the record specified by rid from the table and updates
316 * freePageNums, and numRecords as necessary. An exception is thrown
317 * if rid does not correspond to an existing record in the table.
318 */
319 public synchronized Record deleteRecord(RecordId rid) throws DatabaseException {
320 validateRecordId(rid);
321 Page page = allocator.fetchPage(rid.getPageNum());
322 Record record = getRecord(rid);
323 Bits.setBit(page.getByteBuffer(), rid.getEntryNum(), Bits.Bit.ZERO);
324
325 if(numRecordsOnPage(page) == numRecordsPerPage - 1) {
326 freePageNums.add(page.getPageNum());
327 }
328 numRecords--;
329
330 return record;
331 }
332
333 public void close() {
334 allocator.close();
335 }
336
337 // Helpers ///////////////////////////////////////////////////////////////////
338 private static Schema readSchemaFromHeaderPage(PageAllocator allocator) {
339 Page headerPage = allocator.fetchPage(0);
340 ByteBuffer buf = headerPage.getByteBuffer();
341 return Schema.fromBytes(buf);
342 }
343
344 private static void writeSchemaToHeaderPage(PageAllocator allocator, Schema schema) {
345 Page headerPage = allocator.fetchPage(allocator.allocPage());
346 assert(0 == headerPage.getPageNum());
347 ByteBuffer buf = headerPage.getByteBuffer();
348 buf.put(schema.toBytes());
349 }
350
351 /**
352 * Recall that every data page contains an m-byte bitmap followed by n
353 * records. The following three functions computes m and n such that n is
354 * maximized. To simplify things, we round n down to the nearest multiple of
355 * 8 if necessary. m and n are stored in bitmapSizeInBytes and
356 * numRecordsPerPage respectively.
357 *
358 * Some examples:
359 *
360 * | Page Size | Record Size | bitmapSizeInBytes | numRecordsPerPage |
361 * | --------- | ----------- | ----------------- | ----------------- |
362 * | 9 bytes | 1 byte | 1 | 8 |
363 * | 10 bytes | 1 byte | 1 | 8 |
364 * ...
365 * | 17 bytes | 1 byte | 1 | 8 |
366 * | 18 bytes | 2 byte | 2 | 16 |
367 * | 19 bytes | 2 byte | 2 | 16 |
368 */
369 private static int computeUnroundedNumRecordsPerPage(int pageSize, Schema schema) {
370 // Storing each record requires 1 bit for the bitmap and 8 *
371 // schema.getSizeInBytes() bits for the record.
372 int recordOverheadInBits = 1 + 8 * schema.getSizeInBytes();
373 int pageSizeInBits = pageSize * 8;
374 return pageSizeInBits / recordOverheadInBits;
375 }
376
377 private int numRecordsOnPage(Page page) {
378 byte[] bitmap = getBitMap(page);
379 int numRecords = 0;
380 for (int i = 0; i < numRecordsPerPage; ++i) {
381 if (Bits.getBit(bitmap, i) == Bits.Bit.ONE) {
382 numRecords++;
383 }
384 }
385 return numRecords;
386 }
387
388 private void validateRecordId(RecordId rid) throws DatabaseException {
389 int p = rid.getPageNum();
390 int e = rid.getEntryNum();
391
392 if (p == 0) {
393 throw new DatabaseException("Page 0 is a header page, not a data page.");
394 }
395
396 if (e < 0) {
397 String msg = String.format("Invalid negative entry number %d.", e);
398 throw new DatabaseException(msg);
399 }
400
401 if (e >= numRecordsPerPage) {
402 String msg = String.format(
403 "There are only %d records per page, but record %d was requested.",
404 numRecordsPerPage, e);
405 throw new DatabaseException(msg);
406 }
407 }
408
409 // Iterators /////////////////////////////////////////////////////////////////
410 public TableIterator ridIterator() {
411 return new TableIterator();
412 }
413
414 public RecordIterator iterator() {
415 return new RecordIterator(this, ridIterator());
416 }
417
418 public BacktrackingIterator<Record> blockIterator(Page[] block) {
419 return new RecordIterator(this, new RIDBlockIterator(block));
420 }
421
422 public BacktrackingIterator<Record> blockIterator(BacktrackingIterator<Page> block) {
423 return new RecordIterator(this, new RIDBlockIterator(block));
424 }
425
426 public BacktrackingIterator<Record> blockIterator(Iterator<Page> block, int maxRecords) {
427 return new RecordIterator(this, new RIDBlockIterator(block, maxRecords));
428 }
429
430 /**
431 * RIDPageIterator is a BacktrackingIterator over the RecordIds of a single
432 * page of the table.
433 *
434 * See comments on the BacktrackingIterator interface for how mark and reset
435 * should function.
436 */
437 public class RIDPageIterator implements BacktrackingIterator<RecordId> {
438 //member variables go here
439 private int pagenum;
440 private int curr;
441 private int mark;
442 private byte[] bitmap;
443 /**
444 * The following method signature is provided for guidance, but not necessary. Feel free to
445 * implement your own solution using whatever helper methods you would like.
446 */
447
448 public RIDPageIterator(Page page) {
449 this.pagenum=page.getPageNum();
450 this.bitmap=getBitMap(page);
451 }
452
453 public boolean hasNext() {
454 for(int i=curr; i<8*bitmapSizeInBytes;i++){
455 if(Bits.getBit(bitmap, i)==Bits.Bit.ONE){
456 return true;
457 }
458 }
459 return false;
460 }
461
462 public RecordId next() {
463 while (Bits.getBit(bitmap, curr)==Bits.Bit.ZERO){
464 curr++;
465 }
466 return new RecordId(pagenum,(short) curr++);
467 }
468
469 public void mark() {
470 this.mark=Math.max(0, curr-1);
471 }
472
473 public void reset() {
474 curr=mark;
475 }
476 }
477
478 /**
479 * Helper function to create a BacktrackingIterator from an Iterator of
480 * Pages, and a maximum number of pages.
481 *
482 * At most maxPages pages will be loaded into the iterator; if there are
483 * not enough pages available, then fewer pages will be used.
484 */
485 private static BacktrackingIterator<Page> getBlockFromIterator(Iterator<Page> pageIter, int maxPages) {
486 Page[] block = new Page[maxPages];
487 int numPages;
488 for (numPages = 0; numPages < maxPages && pageIter.hasNext(); ++numPages) {
489 block[numPages] = pageIter.next();
490 }
491 if (numPages < maxPages) {
492 Page[] temp = new Page[numPages];
493 System.arraycopy(block, 0, temp, 0, numPages);
494 block = temp;
495 }
496 return new ArrayBacktrackingIterator(block);
497 }
498
499 /**
500 * RIDBlockIterator is a BacktrackingIterator yielding RecordIds of a block
501 * of pages.
502 *
503 * A "block" is specified by a BacktrackingIterator of Pages: every single
504 * Page returned by the iterator is part of the block. Your code should only
505 * utilize this iterator's functionality for fetching pages, i.e. you should
506 * *not* fetch every Page from the block iterator into an array or collection.
507 *
508 * The mark and reset methods have been provided for you already, and work by
509 * saving a BacktrackingIterator of RecordIds over the appropriate page.
510 *
511 * The iterator maintains a few pieces of state:
512 * - block is simply the BacktrackingIterator<Page> specifying the pages in
513 * the block.
514 * - blockIter is a BacktrackingIterator over RecordIds of the current page we
515 * are iterating over.
516 * - prevRecordId is the last RecordId that next() returned.
517 * - nextRecordId is the next RecordId that next() will return.
518 *
519 * In addition to these, we maintain some state to help with the
520 * implementation of mark() and reset(); you should not need to use these
521 * for implementing next() and hasNext().
522 */
523 public class RIDBlockIterator implements BacktrackingIterator<RecordId> {
524 private BacktrackingIterator<Page> block = null;
525 private BacktrackingIterator<RecordId> blockIter = null;
526
527 private BacktrackingIterator<RecordId> markedBlockIter = null;
528 private RecordId markedPrevRecordId = null;
529
530 private RecordId prevRecordId = null;
531 private RecordId nextRecordId = null;
532
533 public RIDBlockIterator(BacktrackingIterator<Page> block) {
534 this.block = block;
535 }
536
537 /**
538 * This is an extra constructor that allows one to create an
539 * RIDBlockIterator by taking the first maxPages of an iterator of Pages.
540 *
541 * If there are fewer than maxPages number of Pages available in pageIter,
542 * then all remaining pages shall be used in the "block"; otherwise,
543 * only the first maxPages number of pages shall be used.
544 *
545 * Note that this also advances pageIter by maxPages, so you can do the
546 * following:
547 *
548 * Iterator<Page> pageIter = // ...
549 * RIDBlockIterator firstBlock = new RIDBlockIterator(pageIter, 100);
550 * RIDBlockIterator secondBlock = new RIDBlockIterator(pageIter, 100);
551 * RIDBlockIterator thirdBlock = new RIDBlockIterator(pageIter, 100);
552 *
553 * to get iterators over the first 100 pages, second 100 pages, and third
554 * 100 pages.
555 */
556 public RIDBlockIterator(Iterator<Page> pageIter, int maxPages) {
557 this(Table.getBlockFromIterator(pageIter, maxPages));
558 }
559
560 /**
561 * This is an extra constructor that allows one to create an
562 * RIDBlockIterator over an array of Pages.
563 *
564 * Every page in the pages array will be used in the block of pages.
565 */
566 public RIDBlockIterator(Page[] pages) {
567 this(new ArrayBacktrackingIterator(pages));
568 }
569
570 public boolean hasNext() {
571 if(blockIter!=null && blockIter.hasNext()){
572 return true;
573 }
574 if(block.hasNext()){
575 blockIter=new RIDPageIterator(block.next());
576 return this.hasNext();
577 }
578 return false;
579 }
580
581 public RecordId next(){
582 return blockIter.next();
583 }
584
585 /**
586 * Marks the last recordId returned by next().
587 *
588 * This implementation of mark simply marks and saves the current page's
589 * iterator of RecordIds.
590 */
591 public void mark(){
592 block.mark();
593 blockIter.mark();
594 markedBlockIter=blockIter;
595 }
596// public void mark() {
597// if (this.prevRecordId == null) {
598// return;
599// }
600//
601// this.block.mark();
602// this.blockIter.mark();
603// this.markedBlockIter = this.blockIter;
604// this.markedPrevRecordId = this.prevRecordId;
605// }
606
607 /**
608 * Resets to the marked recordId.
609 *
610 * This implementation of reset restores the marked page's iterator,
611 * and calls reset() on it to move it to the correct record. Some extra
612 * care is taken to ensure that we properly reset the block page iterator.
613 */
614 public void reset(){
615 blockIter=markedBlockIter;
616 block.reset();
617 block.next();
618 blockIter.reset();
619 }
620// public void reset() {
621// if (this.markedPrevRecordId == null) {
622// return;
623// }
624// this.block.reset();
625// // We don't want to get the current page again
626// this.block.next();
627// this.blockIter = this.markedBlockIter;
628// this.blockIter.reset();
629// // If we're at the end of the block, we don't want to repeat the record
630// if (!this.block.hasNext()) {
631// this.blockIter.next();
632// if (this.blockIter.hasNext()) {
633// this.blockIter.reset();
634// }
635// }
636//
637// this.prevRecordId = null;
638// this.nextRecordId = this.markedPrevRecordId;
639// }
640 }
641
642 /**
643 * A helper function that returns the same iterator passed in, but with
644 * a single page skipped.
645 */
646 private static Iterator<Page> iteratorSkipPage(Iterator<Page> iter) {
647 iter.next();
648 return iter;
649 }
650
651 /**
652 * TableIterator is an Iterator over the record IDs of a table.
653 *
654 * This is just a very thin wrapper around RIDBlockIterator, where the "block"
655 * is an iterator of all the pages of the table (minus the header page). Once
656 * RIDBlockIterator is filled in, all tests on TableIterator should
657 * automatically pass.
658 */
659 public class TableIterator extends RIDBlockIterator {
660 public TableIterator() {
661 super((BacktrackingIterator<Page>) Table.iteratorSkipPage(Table.this.allocator.iterator()));
662 }
663 }
664}