-
Notifications
You must be signed in to change notification settings - Fork 84
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[controller] do not delay RT topic deletion if the store is no longer hybrid #1234
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3507,8 +3507,7 @@ private void deleteOneStoreVersion(String clusterName, String storeName, int ver | |
} | ||
PubSubTopic rtTopic = pubSubTopicRepository.getTopic(Version.composeRealTimeTopic(storeName)); | ||
if (!store.isHybrid() && getTopicManager().containsTopic(rtTopic)) { | ||
store = resources.getStoreMetadataRepository().getStore(storeName); | ||
safeDeleteRTTopic(clusterName, storeName, store); | ||
safeDeleteRTTopic(clusterName, storeName); | ||
} | ||
} | ||
} | ||
|
@@ -3523,17 +3522,26 @@ private boolean hasFatalDataValidationError(PushMonitor pushMonitor, String topi | |
} | ||
} | ||
|
||
private void safeDeleteRTTopic(String clusterName, String storeName, Store store) { | ||
private void safeDeleteRTTopic(String clusterName, String storeName) { | ||
Store store = getHelixVeniceClusterResources(clusterName).getStoreMetadataRepository().getStore(storeName); | ||
if (canDeleteRTTopic(clusterName, storeName, store)) { | ||
deleteRTTopic(clusterName, storeName); | ||
} else { | ||
LOGGER.warn("Topic deletion for topic: {} is delayed.", Version.composeRealTimeTopic(storeName)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is it easy to log the reason for delayed deletion? |
||
} | ||
} | ||
|
||
public boolean canDeleteRTTopic(String clusterName, String storeName, Store store) { | ||
// Perform RT cleanup checks for batch only store that used to be hybrid. Check the local versions first | ||
// to see if any version is still using RT and then also check other fabrics before deleting the RT. Since | ||
// we perform this check everytime when a store version is deleted we can afford to do best effort | ||
// approach if some fabrics are unavailable or out of sync (temporarily). | ||
boolean canDeleteRT = !Version.containsHybridVersion(store.getVersions()); | ||
return !Version.containsHybridVersion(store.getVersions()) && canDeleteRTTopic(clusterName, storeName); | ||
} | ||
|
||
public boolean canDeleteRTTopic(String clusterName, String storeName) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we rename the function somehow to reflect the fact that we are querying the child controllers for their opinions? |
||
Map<String, ControllerClient> controllerClientMap = getControllerClientMap(clusterName); | ||
for (Map.Entry<String, ControllerClient> controllerClientEntry: controllerClientMap.entrySet()) { | ||
if (!canDeleteRT) { | ||
return; | ||
} | ||
StoreResponse storeResponse = controllerClientEntry.getValue().getStore(storeName); | ||
if (storeResponse.isError()) { | ||
LOGGER.warn( | ||
|
@@ -3542,24 +3550,29 @@ private void safeDeleteRTTopic(String clusterName, String storeName, Store store | |
clusterName, | ||
controllerClientEntry.getKey(), | ||
storeResponse.getError()); | ||
return; | ||
return false; | ||
} | ||
if (Version.containsHybridVersion(storeResponse.getStore().getVersions())) { | ||
return false; | ||
} | ||
canDeleteRT = !Version.containsHybridVersion(storeResponse.getStore().getVersions()); | ||
} | ||
if (canDeleteRT) { | ||
String rtTopicToDelete = Version.composeRealTimeTopic(storeName); | ||
truncateKafkaTopic(rtTopicToDelete); | ||
return true; | ||
} | ||
|
||
private void deleteRTTopic(String clusterName, String storeName) { | ||
Map<String, ControllerClient> controllerClientMap = getControllerClientMap(clusterName); | ||
String rtTopicToDelete = Version.composeRealTimeTopic(storeName); | ||
truncateKafkaTopic(rtTopicToDelete); | ||
for (ControllerClient controllerClient: controllerClientMap.values()) { | ||
controllerClient.deleteKafkaTopic(rtTopicToDelete); | ||
} | ||
Comment on lines
+3565
to
+3568
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these lines are duplicated later. Can we extract them into a small helper function? |
||
// Check if there is incremental push topic exist. If yes, delete it and send out to let other controller to | ||
// delete it. | ||
String incrementalPushRTTopicToDelete = Version.composeSeparateRealTimeTopic(storeName); | ||
if (getTopicManager().containsTopic(pubSubTopicRepository.getTopic(incrementalPushRTTopicToDelete))) { | ||
truncateKafkaTopic(incrementalPushRTTopicToDelete); | ||
for (ControllerClient controllerClient: controllerClientMap.values()) { | ||
controllerClient.deleteKafkaTopic(rtTopicToDelete); | ||
} | ||
// Check if there is incremental push topic exist. If yes, delete it and send out to let other controller to | ||
// delete it. | ||
String incrementalPushRTTopicToDelete = Version.composeSeparateRealTimeTopic(storeName); | ||
if (getTopicManager().containsTopic(pubSubTopicRepository.getTopic(incrementalPushRTTopicToDelete))) { | ||
truncateKafkaTopic(incrementalPushRTTopicToDelete); | ||
for (ControllerClient controllerClient: controllerClientMap.values()) { | ||
controllerClient.deleteKafkaTopic(incrementalPushRTTopicToDelete); | ||
} | ||
controllerClient.deleteKafkaTopic(incrementalPushRTTopicToDelete); | ||
} | ||
} | ||
} | ||
|
@@ -7366,7 +7379,7 @@ public void configureActiveActiveReplication( | |
if (storeName.isPresent()) { | ||
/** | ||
* Legacy stores venice_system_store_davinci_push_status_store_<cluster_name> still exist. | ||
* But {@link com.linkedin.venice.helix.HelixReadOnlyStoreRepositoryAdapter#getStore(String)} cannot find | ||
* But {@link HelixReadOnlyStoreRepositoryAdapter#getStore(String)} cannot find | ||
* them by store names. Skip davinci push status stores until legacy znodes are cleaned up. | ||
*/ | ||
VeniceSystemStoreType systemStoreType = VeniceSystemStoreType.getSystemStoreType(storeName.get()); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks like the conditions for RT topic deletion exist in multiple places, e.g. L3509, canDeleteRTTopic. is it possible to merge these conditions into a single function for simplicity?