-
Notifications
You must be signed in to change notification settings - Fork 4.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
HIVE-28551 Check the transactional table is recreated by its Id #5482
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,6 +44,7 @@ | |
import java.util.concurrent.locks.ReadWriteLock; | ||
import java.util.concurrent.locks.ReentrantReadWriteLock; | ||
import java.util.function.Supplier; | ||
import java.util.stream.Collectors; | ||
import java.util.stream.Stream; | ||
|
||
import org.apache.hadoop.conf.Configuration; | ||
|
@@ -88,11 +89,13 @@ public final class QueryResultsCache { | |
public static class LookupInfo { | ||
private String queryText; | ||
private Supplier<ValidTxnWriteIdList> txnWriteIdListProvider; | ||
private Set<Long> txnTables; | ||
|
||
public LookupInfo(String queryText, Supplier<ValidTxnWriteIdList> txnWriteIdListProvider) { | ||
public LookupInfo(String queryText, Supplier<ValidTxnWriteIdList> txnWriteIdListProvider, Set<Long> txnTables) { | ||
super(); | ||
this.queryText = queryText; | ||
this.txnWriteIdListProvider = txnWriteIdListProvider; | ||
this.txnTables = txnTables; | ||
} | ||
|
||
public String getQueryText() { | ||
|
@@ -671,6 +674,15 @@ public void notifyTableChanged(String dbName, String tableName, long updateTime) | |
*/ | ||
private boolean entryMatches(LookupInfo lookupInfo, CacheEntry entry, Set<CacheEntry> entriesToRemove) { | ||
QueryInfo queryInfo = entry.getQueryInfo(); | ||
|
||
if(!queryInfo.getInputs() | ||
.stream() | ||
.map(ReadEntity::getTable) | ||
.map(Table::getTTable) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i don't think that would work with the MV
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. avoid code duplication, the same thing in done in SA |
||
.map(org.apache.hadoop.hive.metastore.api.Table::getId) | ||
.collect(Collectors.toSet()).containsAll(lookupInfo.txnTables)) | ||
return false; | ||
Comment on lines
+678
to
+684
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Adding a holistic check here incurs some overhead since we duplicate some work that is already done as part of the for loop just below. Since we are already iterating over the read entities it may be better to simplify and move the check inside the loop. lookupInfo.tableIds.contains(tableUsed.getTTable().getId()) Moreover, if we detect that a certain cache entry contains a table ID that is not part of the lookupInfo we should determine if we should/can invalidate and remove that entry from the cache. In the current, approach we simply bail-out and leave the entry inside the cache. |
||
|
||
for (ReadEntity readEntity : queryInfo.getInputs()) { | ||
// Check that the tables used do not resolve to temp tables. | ||
if (readEntity.getType() == Type.TABLE) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15789,12 +15789,22 @@ private ValidTxnWriteIdList getQueryValidTxnWriteIdList() throws SemanticExcepti | |
return null; | ||
} | ||
|
||
private Set<Long> getTransactionedTables() throws SemanticException { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Typo: should be |
||
return tablesFromReadEntities(inputs) | ||
.stream() | ||
.filter(AcidUtils::isTransactionalTable) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As I wrote previously, I have the impression that it makes sense to gather the ids from all kinds of tables (not only transactional ones). |
||
.map(Table::getTTable) | ||
.map(org.apache.hadoop.hive.metastore.api.Table::getId) | ||
.collect(Collectors.toSet()); | ||
} | ||
|
||
private QueryResultsCache.LookupInfo createLookupInfoForQuery(ASTNode astNode) throws SemanticException { | ||
QueryResultsCache.LookupInfo lookupInfo = null; | ||
String queryString = getQueryStringForCache(astNode); | ||
if (queryString != null) { | ||
ValidTxnWriteIdList writeIdList = getQueryValidTxnWriteIdList(); | ||
lookupInfo = new QueryResultsCache.LookupInfo(queryString, () -> writeIdList); | ||
Set<Long> txnTables = getTransactionedTables(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Getting the tables ids is almost a one-liner so you could possibly just inline the code. Set<Long> tableIds =
tablesFromReadEntities(inputs).stream().map(Table::getTTable).map(t -> t.getId()).collect(Collectors.toSet()); |
||
lookupInfo = new QueryResultsCache.LookupInfo(queryString, () -> writeIdList, txnTables); | ||
} | ||
return lookupInfo; | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
set hive.support.concurrency=true; | ||
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; | ||
|
||
set hive.query.results.cache.enabled=true; | ||
set hive.query.results.cache.nontransactional.tables.enabled=false; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would be nice to add also another test case (e.g., |
||
set hive.fetch.task.conversion=none; | ||
|
||
CREATE TABLE author (fname STRING) STORED AS ORC TBLPROPERTIES('transactional'='true'); | ||
INSERT INTO author VALUES ('Victor'); | ||
SELECT fname FROM author; | ||
|
||
DROP TABLE author; | ||
|
||
CREATE TABLE author (fname STRING) STORED AS ORC TBLPROPERTIES('transactional'='true'); | ||
INSERT INTO author VALUES ('Alexander'); | ||
SELECT fname FROM author; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Consider adding a new line https://stackoverflow.com/questions/729692/why-should-text-files-end-with-a-newline |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
PREHOOK: query: CREATE TABLE author (fname STRING) STORED AS ORC TBLPROPERTIES('transactional'='true') | ||
PREHOOK: type: CREATETABLE | ||
PREHOOK: Output: database:default | ||
PREHOOK: Output: default@author | ||
POSTHOOK: query: CREATE TABLE author (fname STRING) STORED AS ORC TBLPROPERTIES('transactional'='true') | ||
POSTHOOK: type: CREATETABLE | ||
POSTHOOK: Output: database:default | ||
POSTHOOK: Output: default@author | ||
PREHOOK: query: INSERT INTO author VALUES ('Victor') | ||
PREHOOK: type: QUERY | ||
PREHOOK: Input: _dummy_database@_dummy_table | ||
PREHOOK: Output: default@author | ||
POSTHOOK: query: INSERT INTO author VALUES ('Victor') | ||
POSTHOOK: type: QUERY | ||
POSTHOOK: Input: _dummy_database@_dummy_table | ||
POSTHOOK: Output: default@author | ||
POSTHOOK: Lineage: author.fname SCRIPT [] | ||
PREHOOK: query: SELECT fname FROM author | ||
PREHOOK: type: QUERY | ||
PREHOOK: Input: default@author | ||
#### A masked pattern was here #### | ||
POSTHOOK: query: SELECT fname FROM author | ||
POSTHOOK: type: QUERY | ||
POSTHOOK: Input: default@author | ||
#### A masked pattern was here #### | ||
Victor | ||
PREHOOK: query: DROP TABLE author | ||
PREHOOK: type: DROPTABLE | ||
PREHOOK: Input: default@author | ||
PREHOOK: Output: database:default | ||
PREHOOK: Output: default@author | ||
POSTHOOK: query: DROP TABLE author | ||
POSTHOOK: type: DROPTABLE | ||
POSTHOOK: Input: default@author | ||
POSTHOOK: Output: database:default | ||
POSTHOOK: Output: default@author | ||
PREHOOK: query: CREATE TABLE author (fname STRING) STORED AS ORC TBLPROPERTIES('transactional'='true') | ||
PREHOOK: type: CREATETABLE | ||
PREHOOK: Output: database:default | ||
PREHOOK: Output: default@author | ||
POSTHOOK: query: CREATE TABLE author (fname STRING) STORED AS ORC TBLPROPERTIES('transactional'='true') | ||
POSTHOOK: type: CREATETABLE | ||
POSTHOOK: Output: database:default | ||
POSTHOOK: Output: default@author | ||
PREHOOK: query: INSERT INTO author VALUES ('Alexander') | ||
PREHOOK: type: QUERY | ||
PREHOOK: Input: _dummy_database@_dummy_table | ||
PREHOOK: Output: default@author | ||
POSTHOOK: query: INSERT INTO author VALUES ('Alexander') | ||
POSTHOOK: type: QUERY | ||
POSTHOOK: Input: _dummy_database@_dummy_table | ||
POSTHOOK: Output: default@author | ||
POSTHOOK: Lineage: author.fname SCRIPT [] | ||
PREHOOK: query: SELECT fname FROM author | ||
PREHOOK: type: QUERY | ||
PREHOOK: Input: default@author | ||
#### A masked pattern was here #### | ||
POSTHOOK: query: SELECT fname FROM author | ||
POSTHOOK: type: QUERY | ||
POSTHOOK: Input: default@author | ||
#### A masked pattern was here #### | ||
Alexander |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It makes sense to keep the current table ids as part of the lookup info. I would say that we need the ids from all tables not only from the transactional ones. The problem that we observed here seems to affect also non-transactional tables since recreating a table gives a new id.
Consider renaming the field to
tableIds
to better indicate its content. Also the field can be made final.