· 7 years ago · Oct 31, 2018, 07:14 PM
1package edu.berkeley.cs186.database.query;
2
3import java.util.ArrayList;
4import java.util.HashMap;
5import java.util.HashSet;
6import java.util.Iterator;
7import java.util.List;
8import java.util.Map;
9import java.util.Set;
10
11import edu.berkeley.cs186.database.Database;
12import edu.berkeley.cs186.database.DatabaseException;
13import edu.berkeley.cs186.database.databox.DataBox;
14import edu.berkeley.cs186.database.table.Record;
15import edu.berkeley.cs186.database.table.Schema;
16import edu.berkeley.cs186.database.table.Table;
17
18/**
19 * QueryPlan provides a set of functions to generate simple queries. Calling the methods corresponding
20 * to SQL syntax stores the information in the QueryPlan, and calling execute generates and executes
21 * a QueryPlan DAG.
22 */
23public class QueryPlan {
24 public enum PredicateOperator {
25 EQUALS,
26 NOT_EQUALS,
27 LESS_THAN,
28 LESS_THAN_EQUALS,
29 GREATER_THAN,
30 GREATER_THAN_EQUALS
31 }
32
33 private Database.Transaction transaction;
34 private QueryOperator finalOperator;
35 private String startTableName;
36
37 private List<String> joinTableNames;
38 private List<String> joinLeftColumnNames;
39 private List<String> joinRightColumnNames;
40 private List<String> selectColumnNames;
41 private List<PredicateOperator> selectOperators;
42 private List<DataBox> selectDataBoxes;
43 private List<String> projectColumns;
44 private String groupByColumn;
45 private boolean hasCount;
46 private String averageColumnName;
47 private String sumColumnName;
48
49 /**
50 * Creates a new QueryPlan within transaction. The base table is startTableName.
51 *
52 * @param transaction the transaction containing this query
53 * @param startTableName the source table for this query
54 */
55 public QueryPlan(Database.Transaction transaction, String startTableName) {
56 this.transaction = transaction;
57 this.startTableName = startTableName;
58
59 this.projectColumns = new ArrayList<String>();
60 this.joinTableNames = new ArrayList<String>();
61 this.joinLeftColumnNames = new ArrayList<String>();
62 this.joinRightColumnNames = new ArrayList<String>();
63
64 this.selectColumnNames = new ArrayList<String>();
65 this.selectOperators = new ArrayList<PredicateOperator>();
66 this.selectDataBoxes = new ArrayList<DataBox>();
67
68 this.hasCount = false;
69 this.averageColumnName = null;
70 this.sumColumnName = null;
71
72 this.groupByColumn = null;
73
74 this.finalOperator = null;
75 }
76
77 public QueryOperator getFinalOperator() {
78 return this.finalOperator;
79 }
80
81 /**
82 * Add a project operator to the QueryPlan with a list of column names. Can only specify one set
83 * of projections.
84 *
85 * @param columnNames the columns to project
86 * @throws QueryPlanException
87 */
88 public void project(List<String> columnNames) throws QueryPlanException {
89 if (!this.projectColumns.isEmpty()) {
90 throw new QueryPlanException("Cannot add more than one project operator to this query.");
91 }
92
93 if (columnNames.isEmpty()) {
94 throw new QueryPlanException("Cannot project no columns.");
95 }
96
97 this.projectColumns = columnNames;
98 }
99
100 /**
101 * Add a select operator. Only returns columns in which the column fulfills the predicate relative
102 * to value.
103 *
104 * @param column the column to specify the predicate on
105 * @param comparison the comparator
106 * @param value the value to compare against
107 * @throws QueryPlanException
108 */
109 public void select(String column, PredicateOperator comparison, DataBox value) throws QueryPlanException {
110 this.selectColumnNames.add(column);
111 this.selectOperators.add(comparison);
112 this.selectDataBoxes.add(value);
113 }
114
115 /**
116 * Set the group by column for this query.
117 *
118 * @param column the column to group by
119 * @throws QueryPlanException
120 */
121 public void groupBy(String column) throws QueryPlanException {
122 this.groupByColumn = column;
123 }
124
125 /**
126 * Add a count aggregate to this query. Only can specify count(*).
127 *
128 * @throws QueryPlanException
129 */
130 public void count() throws QueryPlanException {
131 this.hasCount = true;
132 }
133
134 /**
135 * Add an average on column. Can only average over integer or float columns.
136 *
137 * @param column the column to average
138 * @throws QueryPlanException
139 */
140 public void average(String column) throws QueryPlanException {
141 this.averageColumnName = column;
142 }
143
144 /**
145 * Add a sum on column. Can only sum integer or float columns
146 *
147 * @param column the column to sum
148 * @throws QueryPlanException
149 */
150 public void sum(String column) throws QueryPlanException {
151 this.sumColumnName = column;
152 }
153
154 /**
155 * Join the leftColumnName column of the existing queryplan against the rightColumnName column
156 * of tableName.
157 *
158 * @param tableName the table to join against
159 * @param leftColumnName the join column in the existing QueryPlan
160 * @param rightColumnName the join column in tableName
161 */
162 public void join(String tableName, String leftColumnName, String rightColumnName) {
163 this.joinTableNames.add(tableName);
164 this.joinLeftColumnNames.add(leftColumnName);
165 this.joinRightColumnNames.add(rightColumnName);
166 }
167
168 //Returns a 2-array of table name, column name
169 public String[] getJoinLeftColumnNameByIndex(int i) {
170 return this.joinLeftColumnNames.get(i).split("\\.");
171 }
172
173 //Returns a 2-array of table name, column name
174 public String[] getJoinRightColumnNameByIndex(int i) {
175 return this.joinRightColumnNames.get(i).split("\\.");
176 }
177
178
179 /**
180 * Generates a naive QueryPlan in which all joins are at the bottom of the DAG followed by all select
181 * predicates, an optional group by operator, and a set of projects (in that order).
182 *
183 * @return an iterator of records that is the result of this query
184 * @throws DatabaseException
185 * @throws QueryPlanException
186 */
187 public Iterator<Record> execute() throws DatabaseException, QueryPlanException {
188 String indexColumn = this.checkIndexEligible();
189
190 if (indexColumn != null) {
191 this.generateIndexPlan(indexColumn);
192 } else {
193 // start off with the start table scan as the source
194 this.finalOperator = new SequentialScanOperator(this.transaction, this.startTableName);
195
196 this.addJoins();
197 this.addSelects();
198 this.addGroupBy();
199 this.addProjects();
200 }
201
202 return this.finalOperator.execute();
203 }
204
205 /**
206 * Generates an optimal QueryPlan based on the System R cost-based query optimizer.
207 *
208 * @return an iterator of records that is the result of this query
209 * @throws DatabaseException
210 * @throws QueryPlanException
211 */
212 public Iterator<Record> executeOptimal() throws DatabaseException, QueryPlanException {
213 // Pass 1: Iterate through all single tables. For each single table, find
214 // the lowest cost QueryOperator to access that table. Construct a mapping
215 // of each table name to its lowest cost operator.
216
217
218 // Pass i: On each pass, use the results from the previous pass to find the
219 // lowest cost joins with each single table. Repeat until all tables have
220 // been joined.
221
222
223 // Get the lowest cost operator from the last pass, add GROUP BY and SELECT
224 // operators, and return an iterator on the final operator
225 Map<Set, QueryOperator> lowestCostOperatorMap = new HashMap<>();
226 Map<Set, QueryOperator> pass1CostOperatorMap = new HashMap<>();
227 //Handle the start table
228 HashSet temp = new HashSet();
229 temp.add(startTableName);
230 pass1CostOperatorMap.put(temp, minCostSingleAccess(startTableName));
231 for (String tableName : joinTableNames) {
232 temp = new HashSet();
233 temp.add(tableName);
234 pass1CostOperatorMap.put(temp, minCostSingleAccess(tableName));
235 }
236 Map<Set, QueryOperator> prevPassMap;
237 lowestCostOperatorMap = pass1CostOperatorMap;
238 for (int i = 0; i < joinTableNames.size(); i++) {
239 prevPassMap = lowestCostOperatorMap;
240 lowestCostOperatorMap = minCostJoins(prevPassMap, pass1CostOperatorMap);
241 }
242 finalOperator = minCostOperator(lowestCostOperatorMap);
243 addGroupBy();
244 addProjects();
245 return finalOperator.iterator();
246 }
247
248 /**
249 * Gets all SELECT predicates for which there exists an index on the column
250 * referenced in that predicate for the given table.
251 *
252 * @return an ArrayList of SELECT predicates
253 */
254 private List<Integer> getEligibleIndexColumns(String table) {
255 List<Integer> selectIndices = new ArrayList<Integer>();
256
257 for (int i = 0; i < this.selectColumnNames.size(); i++) {
258 String column = this.selectColumnNames.get(i);
259
260 if (this.transaction.indexExists(table, column) &&
261 this.selectOperators.get(i) != PredicateOperator.NOT_EQUALS) {
262 selectIndices.add(i);
263 }
264 }
265
266 return selectIndices;
267 }
268
269 /**
270 * Gets all columns for which there exists an index for that table
271 *
272 * @return an ArrayList of column names
273 */
274 private List<String> getAllIndexColumns(String table) throws DatabaseException {
275 List<String> indexColumns = new ArrayList<String>();
276
277 Schema schema = this.transaction.getSchema(table);
278 List<String> columnNames = schema.getFieldNames();
279
280 for (int i = 0; i < columnNames.size(); i++) {
281 String column = columnNames.get(i);
282
283 if (this.transaction.indexExists(table, column)) {
284 indexColumns.add(table + "." + column);
285 }
286 }
287
288 return indexColumns;
289 }
290
291 /**
292 * Applies all eligible SELECT predicates to a given source, except for the
293 * predicate at index except. The purpose of except is because there might
294 * be one SELECT predicate that was already used for an index scan, so no
295 * point applying it again. A SELECT predicate is represented as elements of
296 * this.selectColumnNames, this.selectOperators, and this.selectDataBoxes that
297 * correspond to the same index of these lists.
298 *
299 * @return a new QueryOperator after SELECT has been applied
300 * @throws DatabaseException
301 * @throws QueryPlanException
302 */
303 private QueryOperator addEligibleSelections(QueryOperator source, int except) throws QueryPlanException, DatabaseException {
304
305 for (int i = 0; i < this.selectColumnNames.size(); i++) {
306 if (i == except) {
307 continue;
308 }
309
310 PredicateOperator curPred = this.selectOperators.get(i);
311 DataBox curValue = this.selectDataBoxes.get(i);
312 try {
313 String colName = source.checkSchemaForColumn(source.getOutputSchema(), selectColumnNames.get(i));
314 source = new SelectOperator(source, colName, curPred, curValue);
315 } catch (QueryPlanException err) {
316 continue;
317 }
318 }
319
320 return source;
321 }
322
323 /**
324 * Finds the lowest cost QueryOperator that scans the given table. First
325 * determine the cost of a sequential scan for the given table. Then for every index that can be
326 * used on that table, determine the cost of an index scan. Keep track of
327 * the minimum cost operation. Then push down eligible projects (SELECT
328 * predicates). If an index scan was chosen, exclude that SELECT predicate when
329 * pushing down selects. This method will be called during the first pass of the search
330 * algorithm to determine the most efficient way to access each single table.
331 *
332 * @return a QueryOperator that has the lowest cost of scanning the given table which is
333 * either a SequentialScanOperator or an IndexScanOperator nested within any possible
334 * pushed down select operators
335 * @throws DatabaseException
336 * @throws QueryPlanException
337 */
338 public QueryOperator minCostSingleAccess(String table) throws DatabaseException, QueryPlanException {
339 QueryOperator minOp = null;
340
341 // Find the cost of a sequential scan of the table
342 minOp = new SequentialScanOperator(this.transaction, table);
343
344 // 1. Find the cost of a sequential scan of the table
345
346 // 2. For each eligible index column, find the cost of an index scan of the
347 // table and retain the lowest cost operator
348
349
350 // 3. Push down SELECT predicates that apply to this table and that were not
351 // used for an index scan
352 int cost = minOp.getIOCost();
353 int minIndex = -1;
354 QueryOperator currOperator;
355 List<Integer> eligibleIndexColumns = getEligibleIndexColumns(table);
356 for (Integer columnIndex: eligibleIndexColumns){
357 currOperator = new IndexScanOperator(transaction, table, selectColumnNames.get(columnIndex), selectOperators.get(columnIndex), selectDataBoxes.get(columnIndex));
358 if (cost > currOperator.getIOCost()) {
359 cost = currOperator.getIOCost();
360 minIndex = columnIndex;
361 minOp = currOperator;
362 }
363 }
364 minOp = addEligibleSelections(minOp, minIndex);
365 return minOp;
366 }
367
368 /**
369 * Given a join condition between an outer relation represented by leftOp
370 * and an inner relation represented by rightOp, find the lowest cost join
371 * operator out of all the possible join types in JoinOperator.JoinType.
372 *
373 * @return lowest cost join QueryOperator between the input operators
374 * @throws QueryPlanException
375 */
376 private QueryOperator minCostJoinType(QueryOperator leftOp,
377 QueryOperator rightOp,
378 String leftColumn,
379 String rightColumn) throws QueryPlanException,
380 DatabaseException {
381 QueryOperator minOp = null;
382
383 int minCost = Integer.MAX_VALUE;
384 List<QueryOperator> allJoins = new ArrayList<QueryOperator>();
385 allJoins.add(new SNLJOperator(leftOp, rightOp, leftColumn, rightColumn, this.transaction));
386 allJoins.add(new BNLJOperator(leftOp, rightOp, leftColumn, rightColumn, this.transaction));
387
388 for (QueryOperator join : allJoins) {
389 int joinCost = join.estimateIOCost();
390 if (joinCost < minCost) {
391 minOp = join;
392 minCost = joinCost;
393 }
394 }
395 return minOp;
396 }
397
398 /**
399 * Iterate through all table sets in the previous pass of the search. For each
400 * table set, check each join predicate to see if there is a valid join
401 * condition with a new table. If so, check the cost of each type of join and
402 * keep the minimum cost join. Construct and return a mapping of each set of
403 * table names being joined to its lowest cost join operator. A join predicate
404 * is represented as elements of this.joinTableNames, this.joinLeftColumnNames,
405 * and this.joinRightColumnNames that correspond to the same index of these lists.
406 *
407 * @return a mapping of table names to a join QueryOperator
408 * @throws QueryPlanException
409 */
410 private Map<Set, QueryOperator> minCostJoins(Map<Set, QueryOperator> prevMap,
411 Map<Set, QueryOperator> pass1Map) throws QueryPlanException,
412 DatabaseException {
413 Map<Set, QueryOperator> map = new HashMap<Set, QueryOperator>();
414
415 //We provide a basic description of the logic you have to implement
416
417 //Input: prevMap (maps a set of tables to a query operator--the operator that joins the set)
418 //Input: pass1Map (each set is a singleton with one table and single table access query operator)
419
420 //FOR EACH set of tables in prevMap:
421
422 //FOR EACH join condition listed in the query
423
424 //get the left side and the right side (table name and column)
425 /**
426 * Case 1. Set contains left table but not right, use pass1Map to
427 * fetch the right operator to access the rightTable
428 *
429 * Case 2. Set contains right table but not left, use pass1Map to
430 * fetch the right operator to access the leftTable.
431 *
432 * Case 3. Set contains neither or both the left table or right table (continue loop)
433 *
434 * --- Then given the operator, use minCostJoinType to calculate the cheapest join with that
435 * and the previously joined tables.
436 */
437
438 /**
439 * Create a new set that is the union of the new table and previously
440 * joined tables. Add to result map this value mapping to the result from
441 * minCostJoinType if it doesn't exist or it exists and cost is lower.
442 */
443 String[] leftNames;
444 String[] rightNames;
445 Set<String> currSet;
446 QueryOperator prevOp;
447 QueryOperator minOp;
448 for (Set tablesSet: prevMap.keySet()) {
449 for (int i = 0; i < joinTableNames.size(); i++){
450 leftNames = getJoinLeftColumnNameByIndex(i);
451 rightNames = getJoinRightColumnNameByIndex(i);
452 currSet = new HashSet<>();
453 prevOp = prevMap.get(tablesSet);
454 //note: leftNames[0] is the left table name and
455 //leftNames[1] is the left column name. The same pattern applies for
456 //rightNames
457
458 if (tablesSet.contains(leftNames[0]) && !(tablesSet.contains(rightNames[0]))) {
459 currSet.add(rightNames[0]);
460 minOp = minCostJoinType(prevOp, pass1Map.get(currSet), leftNames[1], rightNames[1]);
461 } else if (tablesSet.contains(rightNames[0]) && !(tablesSet.contains(leftNames[0]))){
462 currSet.add(leftNames[0]);
463 minOp = minCostJoinType(pass1Map.get(currSet),prevOp, leftNames[1], rightNames[1]);
464 } else {
465 continue;
466 }
467 for (Object table: tablesSet) {
468 currSet.add((String) table);
469 }
470 QueryOperator temp = map.get(currSet);
471 if (temp == null || minOp.getIOCost() < temp.getIOCost()){
472 map.put(currSet, minOp);
473 }
474 }
475 }
476 return map;
477 }
478
479 /**
480 * Finds the lowest cost QueryOperator in the given mapping. A mapping is
481 * generated on each pass of the search algorithm, and relates a set of tables
482 * to the lowest cost QueryOperator accessing those tables. This method is
483 * called at the end of the search algorithm after all passes have been
484 * processed.
485 *
486 * @return a QueryOperator in the given mapping
487 * @throws QueryPlanException
488 */
489 private QueryOperator minCostOperator(Map<Set, QueryOperator> map) throws QueryPlanException, DatabaseException {
490 QueryOperator minOp = null;
491 QueryOperator newOp;
492 int minCost = Integer.MAX_VALUE;
493 int newCost;
494 for (Set tables : map.keySet()) {
495 newOp = map.get(tables);
496 newCost = newOp.getIOCost();
497 if (newCost < minCost) {
498 minOp = newOp;
499 minCost = newCost;
500 }
501 }
502 return minOp;
503 }
504
505 private String checkIndexEligible() {
506 if (this.selectColumnNames.size() > 0
507 && this.groupByColumn == null
508 && this.joinTableNames.size() == 0) {
509
510 int index = 0;
511 for (String column : selectColumnNames) {
512 if (this.transaction.indexExists(this.startTableName, column)) {
513 if (this.selectOperators.get(index) != PredicateOperator.NOT_EQUALS) {
514 return column;
515 }
516 }
517
518 index++;
519 }
520 }
521
522 return null;
523 }
524
525 private void generateIndexPlan(String indexColumn) throws QueryPlanException, DatabaseException {
526 int selectIndex = this.selectColumnNames.indexOf(indexColumn);
527 PredicateOperator operator = this.selectOperators.get(selectIndex);
528 DataBox value = this.selectDataBoxes.get(selectIndex);
529
530 this.finalOperator = new IndexScanOperator(this.transaction, this.startTableName, indexColumn, operator,
531 value);
532
533 this.selectColumnNames.remove(selectIndex);
534 this.selectOperators.remove(selectIndex);
535 this.selectDataBoxes.remove(selectIndex);
536
537 this.addSelects();
538 this.addProjects();
539 }
540
541 private void addJoins() throws QueryPlanException, DatabaseException {
542 int index = 0;
543
544 for (String joinTable : this.joinTableNames) {
545 SequentialScanOperator scanOperator = new SequentialScanOperator(this.transaction, joinTable);
546
547 SNLJOperator joinOperator = new SNLJOperator(finalOperator, scanOperator,
548 this.joinLeftColumnNames.get(index), this.joinRightColumnNames.get(index), this.transaction); //changed from new JoinOperator
549
550 this.finalOperator = joinOperator;
551 index++;
552 }
553 }
554
555 private void addSelects() throws QueryPlanException, DatabaseException {
556 int index = 0;
557
558 for (String selectColumn : this.selectColumnNames) {
559 PredicateOperator operator = this.selectOperators.get(index);
560 DataBox value = this.selectDataBoxes.get(index);
561
562 SelectOperator selectOperator = new SelectOperator(this.finalOperator, selectColumn,
563 operator, value);
564
565 this.finalOperator = selectOperator;
566 index++;
567 }
568 }
569
570 private void addGroupBy() throws QueryPlanException, DatabaseException {
571 if (this.groupByColumn != null) {
572 if (this.projectColumns.size() > 2 || (this.projectColumns.size() == 1 &&
573 !this.projectColumns.get(0).equals(this.groupByColumn))) {
574 throw new QueryPlanException("Can only project columns specified in the GROUP BY clause.");
575 }
576
577 GroupByOperator groupByOperator = new GroupByOperator(this.finalOperator, this.transaction,
578 this.groupByColumn);
579
580 this.finalOperator = groupByOperator;
581 }
582 }
583
584 private void addProjects() throws QueryPlanException, DatabaseException {
585 if (!this.projectColumns.isEmpty() || this.hasCount || this.sumColumnName != null
586 || this.averageColumnName != null) {
587 ProjectOperator projectOperator = new ProjectOperator(this.finalOperator, this.projectColumns,
588 this.hasCount, this.averageColumnName, this.sumColumnName);
589
590 this.finalOperator = projectOperator;
591 }
592 }
593
594}