diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt index f8f3867dc..d531d844d 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/AlertingPlugin.kt @@ -6,6 +6,7 @@ package org.opensearch.alerting import org.opensearch.action.ActionRequest +import org.opensearch.alerting.action.DocLevelMonitorFanOutAction import org.opensearch.alerting.action.ExecuteMonitorAction import org.opensearch.alerting.action.ExecuteWorkflowAction import org.opensearch.alerting.action.GetDestinationsAction @@ -55,6 +56,7 @@ import org.opensearch.alerting.transport.TransportAcknowledgeAlertAction import org.opensearch.alerting.transport.TransportAcknowledgeChainedAlertAction import org.opensearch.alerting.transport.TransportDeleteMonitorAction import org.opensearch.alerting.transport.TransportDeleteWorkflowAction +import org.opensearch.alerting.transport.TransportDocLevelMonitorFanOutAction import org.opensearch.alerting.transport.TransportExecuteMonitorAction import org.opensearch.alerting.transport.TransportExecuteWorkflowAction import org.opensearch.alerting.transport.TransportGetAlertsAction @@ -220,6 +222,7 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R ActionPlugin.ActionHandler(AlertingActions.DELETE_WORKFLOW_ACTION_TYPE, TransportDeleteWorkflowAction::class.java), ActionPlugin.ActionHandler(ExecuteWorkflowAction.INSTANCE, TransportExecuteWorkflowAction::class.java), ActionPlugin.ActionHandler(GetRemoteIndexesAction.INSTANCE, TransportGetRemoteIndexesAction::class.java), + ActionPlugin.ActionHandler(DocLevelMonitorFanOutAction.INSTANCE, TransportDocLevelMonitorFanOutAction::class.java) ) } @@ -254,6 +257,8 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R val settings = environment.settings() val lockService = LockService(client, clusterService) alertIndices = AlertIndices(settings, client, threadPool, clusterService) + val alertService = AlertService(client, xContentRegistry, alertIndices) + val triggerService = TriggerService(scriptService) runner = MonitorRunnerService .registerClusterService(clusterService) .registerClient(client) @@ -264,8 +269,8 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R .registerThreadPool(threadPool) .registerAlertIndices(alertIndices) .registerInputService(InputService(client, scriptService, namedWriteableRegistry, xContentRegistry, clusterService, settings)) - .registerTriggerService(TriggerService(scriptService)) - .registerAlertService(AlertService(client, xContentRegistry, alertIndices)) + .registerTriggerService(triggerService) + .registerAlertService(alertService) .registerDocLevelMonitorQueries(DocLevelMonitorQueries(client, clusterService)) .registerJvmStats(JvmStats.jvmStats()) .registerWorkflowService(WorkflowService(client, xContentRegistry)) @@ -296,7 +301,17 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R DeleteMonitorService.initialize(client, lockService) - return listOf(sweeper, scheduler, runner, scheduledJobIndices, docLevelMonitorQueries, destinationMigrationCoordinator, lockService) + return listOf( + sweeper, + scheduler, + runner, + scheduledJobIndices, + docLevelMonitorQueries, + destinationMigrationCoordinator, + lockService, + alertService, + triggerService + ) } override fun getSettings(): List> { @@ -327,6 +342,7 @@ internal class AlertingPlugin : PainlessExtension, ActionPlugin, ScriptPlugin, R AlertingSettings.ALERT_HISTORY_RETENTION_PERIOD, AlertingSettings.ALERTING_MAX_MONITORS, AlertingSettings.PERCOLATE_QUERY_DOCS_SIZE_MEMORY_PERCENTAGE_LIMIT, + AlertingSettings.DOC_LEVEL_MONITOR_FAN_OUT_NODES, DOC_LEVEL_MONITOR_SHARD_FETCH_SIZE, AlertingSettings.PERCOLATE_QUERY_MAX_NUM_DOCS_IN_MEMORY, AlertingSettings.REQUEST_TIMEOUT, diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/BucketLevelMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/BucketLevelMonitorRunner.kt index 47fef3edb..b1e876291 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/BucketLevelMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/BucketLevelMonitorRunner.kt @@ -55,6 +55,7 @@ import org.opensearch.search.aggregations.AggregatorFactories import org.opensearch.search.aggregations.bucket.composite.CompositeAggregationBuilder import org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder import org.opensearch.search.builder.SearchSourceBuilder +import org.opensearch.transport.TransportService import java.time.Instant import java.util.UUID @@ -68,7 +69,8 @@ object BucketLevelMonitorRunner : MonitorRunner() { periodEnd: Instant, dryrun: Boolean, workflowRunContext: WorkflowRunContext?, - executionId: String + executionId: String, + transportService: TransportService ): MonitorRunResult { val roles = MonitorRunnerService.getRolesForMonitor(monitor) logger.debug("Running monitor: ${monitor.name} with roles: $roles Thread: ${Thread.currentThread().name}") diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/DocumentLevelMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/DocumentLevelMonitorRunner.kt index cc4b26a79..cc15ad71a 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/DocumentLevelMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/DocumentLevelMonitorRunner.kt @@ -7,93 +7,51 @@ package org.opensearch.alerting import org.apache.logging.log4j.LogManager import org.opensearch.ExceptionsHelper -import org.opensearch.OpenSearchStatusException -import org.opensearch.action.DocWriteRequest -import org.opensearch.action.admin.indices.refresh.RefreshAction -import org.opensearch.action.admin.indices.refresh.RefreshRequest -import org.opensearch.action.bulk.BulkRequest -import org.opensearch.action.bulk.BulkResponse -import org.opensearch.action.get.MultiGetItemResponse -import org.opensearch.action.get.MultiGetRequest -import org.opensearch.action.index.IndexRequest -import org.opensearch.action.search.SearchAction -import org.opensearch.action.search.SearchRequest -import org.opensearch.action.search.SearchResponse -import org.opensearch.alerting.model.AlertContext +import org.opensearch.Version +import org.opensearch.action.ActionListenerResponseHandler +import org.opensearch.action.support.GroupedActionListener +import org.opensearch.alerting.action.DocLevelMonitorFanOutAction +import org.opensearch.alerting.action.DocLevelMonitorFanOutRequest +import org.opensearch.alerting.action.DocLevelMonitorFanOutResponse +import org.opensearch.alerting.model.ActionRunResult import org.opensearch.alerting.model.DocumentLevelTriggerRunResult import org.opensearch.alerting.model.IndexExecutionContext import org.opensearch.alerting.model.InputRunResults -import org.opensearch.alerting.model.MonitorMetadata import org.opensearch.alerting.model.MonitorRunResult -import org.opensearch.alerting.model.userErrorMessage -import org.opensearch.alerting.opensearchapi.convertToMap -import org.opensearch.alerting.opensearchapi.suspendUntil -import org.opensearch.alerting.script.DocumentLevelTriggerExecutionContext import org.opensearch.alerting.util.AlertingException import org.opensearch.alerting.util.IndexUtils -import org.opensearch.alerting.util.defaultToPerExecutionAction -import org.opensearch.alerting.util.getActionExecutionPolicy -import org.opensearch.alerting.util.getCancelAfterTimeInterval -import org.opensearch.alerting.util.parseSampleDocTags -import org.opensearch.alerting.util.printsSampleDocData import org.opensearch.alerting.workflow.WorkflowRunContext -import org.opensearch.client.node.NodeClient import org.opensearch.cluster.metadata.IndexMetadata -import org.opensearch.cluster.routing.Preference +import org.opensearch.cluster.node.DiscoveryNode import org.opensearch.cluster.routing.ShardRouting import org.opensearch.cluster.service.ClusterService -import org.opensearch.common.unit.TimeValue -import org.opensearch.common.xcontent.XContentFactory -import org.opensearch.common.xcontent.XContentType -import org.opensearch.commons.alerting.AlertingPluginInterface -import org.opensearch.commons.alerting.action.PublishFindingsRequest -import org.opensearch.commons.alerting.action.SubscribeFindingsResponse -import org.opensearch.commons.alerting.model.ActionExecutionResult -import org.opensearch.commons.alerting.model.Alert import org.opensearch.commons.alerting.model.DocLevelMonitorInput import org.opensearch.commons.alerting.model.DocLevelQuery -import org.opensearch.commons.alerting.model.DocumentLevelTrigger -import org.opensearch.commons.alerting.model.Finding import org.opensearch.commons.alerting.model.Monitor -import org.opensearch.commons.alerting.model.action.PerAlertActionScope -import org.opensearch.commons.alerting.util.string import org.opensearch.core.action.ActionListener -import org.opensearch.core.common.bytes.BytesReference +import org.opensearch.core.common.breaker.CircuitBreakingException +import org.opensearch.core.common.io.stream.Writeable +import org.opensearch.core.index.shard.ShardId import org.opensearch.core.rest.RestStatus -import org.opensearch.core.xcontent.ToXContent -import org.opensearch.core.xcontent.XContentBuilder import org.opensearch.index.IndexNotFoundException -import org.opensearch.index.query.BoolQueryBuilder -import org.opensearch.index.query.Operator -import org.opensearch.index.query.QueryBuilders import org.opensearch.index.seqno.SequenceNumbers -import org.opensearch.indices.IndexClosedException -import org.opensearch.percolator.PercolateQueryBuilderExt -import org.opensearch.search.SearchHit -import org.opensearch.search.SearchHits -import org.opensearch.search.builder.SearchSourceBuilder -import org.opensearch.search.fetch.subphase.FetchSourceContext -import org.opensearch.search.sort.SortOrder +import org.opensearch.node.NodeClosedException +import org.opensearch.transport.ActionNotFoundTransportException +import org.opensearch.transport.ConnectTransportException +import org.opensearch.transport.RemoteTransportException +import org.opensearch.transport.TransportException +import org.opensearch.transport.TransportRequestOptions +import org.opensearch.transport.TransportService import java.io.IOException import java.time.Instant -import java.util.UUID -import java.util.stream.Collectors +import kotlin.coroutines.resume +import kotlin.coroutines.resumeWithException +import kotlin.coroutines.suspendCoroutine import kotlin.math.max class DocumentLevelMonitorRunner : MonitorRunner() { private val logger = LogManager.getLogger(javaClass) - var nonPercolateSearchesTimeTakenStat = 0L - var percolateQueriesTimeTakenStat = 0L - var totalDocsQueriedStat = 0L - var docTransformTimeTakenStat = 0L - var totalDocsSizeInBytesStat = 0L - var docsSizeOfBatchInBytes = 0L - /* Contains list of docs source that are held in memory to submit to percolate query against query index. - * Docs are fetched from the source index per shard and transformed.*/ - val transformedDocs = mutableListOf>() - - // Maps a finding ID to the related document. - private val findingIdToDocSource = mutableMapOf() + private var totalTimeTakenStat = 0L override suspend fun runMonitor( monitor: Monitor, @@ -102,9 +60,11 @@ class DocumentLevelMonitorRunner : MonitorRunner() { periodEnd: Instant, dryrun: Boolean, workflowRunContext: WorkflowRunContext?, - executionId: String + executionId: String, + transportService: TransportService ): MonitorRunResult { logger.debug("Document-level-monitor is running ...") + val startTime = System.currentTimeMillis() val isTempMonitor = dryrun || monitor.id == Monitor.NO_ID var monitorResult = MonitorRunResult(monitor.name, periodStart, periodEnd) monitorCtx.findingsToTriggeredQueries = mutableMapOf() @@ -149,10 +109,6 @@ class DocumentLevelMonitorRunner : MonitorRunner() { val updatedLastRunContext = lastRunContext.toMutableMap() - val queryToDocIds = mutableMapOf>() - val inputRunResults = mutableMapOf>() - val docsToQueries = mutableMapOf>() - try { // Resolve all passed indices to concrete indices val allConcreteIndices = IndexUtils.resolveAllIndices( @@ -186,6 +142,7 @@ class DocumentLevelMonitorRunner : MonitorRunner() { val concreteIndicesSeenSoFar = mutableListOf() val updatedIndexNames = mutableListOf() + val docLevelMonitorFanOutResponses: MutableList = mutableListOf() docLevelMonitorInput.indices.forEach { indexName -> var concreteIndices = IndexUtils.resolveAllIndices( listOf(indexName), @@ -236,7 +193,11 @@ class DocumentLevelMonitorRunner : MonitorRunner() { if (IndexUtils.isAlias(indexName, monitorCtx.clusterService!!.state()) || IndexUtils.isDataStream(indexName, monitorCtx.clusterService!!.state()) ) { - if (concreteIndexName == IndexUtils.getWriteIndex(indexName, monitorCtx.clusterService!!.state())) { + if (concreteIndexName == IndexUtils.getWriteIndex( + indexName, + monitorCtx.clusterService!!.state() + ) + ) { updatedLastRunContext.remove(lastWriteIndex) updatedLastRunContext[concreteIndexName] = indexUpdatedRunContext } @@ -251,30 +212,9 @@ class DocumentLevelMonitorRunner : MonitorRunner() { // update lastRunContext if its a temp monitor as we only want to view the last bit of data then // TODO: If dryrun, we should make it so we limit the search as this could still potentially give us lots of data if (isTempMonitor) { - indexLastRunContext[shard] = max(-1, (indexUpdatedRunContext[shard] as String).toInt() - 10) - } - } - - val fieldsToBeQueried = mutableSetOf() - if (monitorCtx.fetchOnlyQueryFieldNames) { - for (it in queries) { - if (it.queryFieldNames.isEmpty()) { - fieldsToBeQueried.clear() - logger.debug( - "Monitor ${monitor.id} : " + - "Doc Level query ${it.id} : ${it.query} doesn't have queryFieldNames populated. " + - "Cannot optimize monitor to fetch only query-relevant fields. " + - "Querying entire doc source." - ) - break - } - fieldsToBeQueried.addAll(it.queryFieldNames) + indexLastRunContext[shard] = + max(-1, (indexUpdatedRunContext[shard] as String).toInt() - 10) } - if (fieldsToBeQueried.isNotEmpty()) - logger.debug( - "Monitor ${monitor.id} Querying only fields " + - "${fieldsToBeQueried.joinToString()} instead of entire _source of documents" - ) } val indexExecutionContext = IndexExecutionContext( queries, @@ -282,90 +222,150 @@ class DocumentLevelMonitorRunner : MonitorRunner() { indexUpdatedRunContext, updatedIndexName, concreteIndexName, + updatedIndexNames, + concreteIndices, conflictingFields.toList(), matchingDocIdsPerIndex?.get(concreteIndexName), ) - fetchShardDataAndMaybeExecutePercolateQueries( - monitor, + val shards = mutableSetOf() + shards.addAll(indexUpdatedRunContext.keys) + shards.remove("index") + shards.remove("shards_count") + + val nodeMap = getNodes(monitorCtx) + val nodeShardAssignments = distributeShards( monitorCtx, - indexExecutionContext, - monitorMetadata, - inputRunResults, - docsToQueries, - updatedIndexNames, - concreteIndicesSeenSoFar, - ArrayList(fieldsToBeQueried) - ) { shard, maxSeqNo -> // function passed to update last run context with new max sequence number - indexExecutionContext.updatedLastRunContext[shard] = maxSeqNo - } - } - } - /* if all indices are covered still in-memory docs size limit is not breached we would need to submit - the percolate query at the end */ - if (transformedDocs.isNotEmpty()) { - performPercolateQueryAndResetCounters( - monitorCtx, - monitor, - monitorMetadata, - updatedIndexNames, - concreteIndicesSeenSoFar, - inputRunResults, - docsToQueries, - ) - } - monitorResult = monitorResult.copy(inputResults = InputRunResults(listOf(inputRunResults))) + nodeMap.keys.toList(), + shards.toList(), + concreteIndexName + ) + + val responses: Collection = suspendCoroutine { cont -> + val listener = GroupedActionListener( + object : ActionListener> { + override fun onResponse(response: Collection) { + cont.resume(response) + } + + override fun onFailure(e: Exception) { + if (e.cause is Exception) + cont.resumeWithException(e.cause as Exception) + else + cont.resumeWithException(e) + } + }, + nodeShardAssignments.size + ) + val responseReader = Writeable.Reader { + DocLevelMonitorFanOutResponse(it) + } + for (node in nodeMap) { + if (nodeShardAssignments.containsKey(node.key)) { + val docLevelMonitorFanOutRequest = DocLevelMonitorFanOutRequest( + monitor, + dryrun, + monitorMetadata, + executionId, + indexExecutionContext, + nodeShardAssignments[node.key]!!.toList(), + concreteIndicesSeenSoFar, + workflowRunContext + ) - /* - populate the map queryToDocIds with pairs of - this fixes the issue of passing id, name, tags fields of DocLevelQuery object correctly to TriggerExpressionParser - */ - queries.forEach { - if (inputRunResults.containsKey(it.id)) { - queryToDocIds[it] = inputRunResults[it.id]!! + transportService.sendRequest( + node.value, + DocLevelMonitorFanOutAction.NAME, + docLevelMonitorFanOutRequest, + TransportRequestOptions.EMPTY, + object : ActionListenerResponseHandler( + listener, + responseReader + ) { + override fun handleException(e: TransportException) { + val cause = e.unwrapCause() + if (cause is ConnectTransportException || + ( + e is RemoteTransportException && + ( + cause is NodeClosedException || + cause is CircuitBreakingException || + cause is ActionNotFoundTransportException + ) + ) + ) { + val localNode = monitorCtx.clusterService!!.localNode() + // retry in local node + transportService.sendRequest( + localNode, + DocLevelMonitorFanOutAction.NAME, + docLevelMonitorFanOutRequest, + TransportRequestOptions.EMPTY, + object : + ActionListenerResponseHandler( + listener, + responseReader + ) { + override fun handleException(e: TransportException) { + logger.error("Fan out retry failed in node ${localNode.id}", e) + listener.onResponse( + DocLevelMonitorFanOutResponse( + "", + "", + "", + mutableMapOf(), + exception = if (e.cause is AlertingException) { + e.cause as AlertingException + } else { + AlertingException.wrap(e) as AlertingException + } + ) + ) + } + + override fun handleResponse(response: DocLevelMonitorFanOutResponse) { + listener.onResponse(response) + } + } + ) + } else { + logger.error("Fan out failed in node ${node.key}", e) + listener.onResponse( + DocLevelMonitorFanOutResponse( + "", + "", + "", + mutableMapOf(), + exception = if (e.cause is AlertingException) { + e.cause as AlertingException + } else { + AlertingException.wrap(e) as AlertingException + } + ) + ) + } + } + + override fun handleResponse(response: DocLevelMonitorFanOutResponse) { + listener.onResponse(response) + } + } + ) + } + } + } + docLevelMonitorFanOutResponses.addAll(responses) } } - val idQueryMap: Map = queries.associateBy { it.id } - - val triggerResults = mutableMapOf() - // If there are no triggers defined, we still want to generate findings - if (monitor.triggers.isEmpty()) { - if (dryrun == false && monitor.id != Monitor.NO_ID) { - createFindings(monitor, monitorCtx, docsToQueries, idQueryMap, true) - } - } else { - monitor.triggers.forEach { - triggerResults[it.id] = runForEachDocTrigger( - monitorCtx, - monitorResult, - it as DocumentLevelTrigger, - monitor, - idQueryMap, - docsToQueries, - queryToDocIds, - dryrun, - executionId = executionId, - workflowRunContext = workflowRunContext - ) - } + val isFanOutSuccessful = checkAndThrowExceptionIfAllFanOutsFailed(docLevelMonitorFanOutResponses) + if (isFanOutSuccessful != null) { + throw isFanOutSuccessful } - // Don't update monitor if this is a test monitor + updateLastRunContextFromFanOutResponses(docLevelMonitorFanOutResponses, updatedLastRunContext) + val triggerResults = buildTriggerResults(docLevelMonitorFanOutResponses) + val inputRunResults = buildInputRunResults(docLevelMonitorFanOutResponses) if (!isTempMonitor) { - // If any error happened during trigger execution, upsert monitor error alert - val errorMessage = constructErrorMessageFromTriggerResults(triggerResults = triggerResults) - if (errorMessage.isNotEmpty()) { - monitorCtx.alertService!!.upsertMonitorErrorAlert( - monitor = monitor, - errorMessage = errorMessage, - executionId = executionId, - workflowRunContext - ) - } else { - onSuccessfulMonitorRun(monitorCtx, monitor) - } - MonitorMetadataService.upsertMetadata( monitorMetadata.copy(lastRunContext = updatedLastRunContext), true @@ -374,9 +374,8 @@ class DocumentLevelMonitorRunner : MonitorRunner() { // Clean up any queries created by the dry run monitor monitorCtx.docLevelMonitorQueries!!.deleteDocLevelQueriesOnDryRun(monitorMetadata) } - // TODO: Update the Document as part of the Trigger and return back the trigger action result - return monitorResult.copy(triggerResults = triggerResults) + return monitorResult.copy(triggerResults = triggerResults, inputResults = inputRunResults) } catch (e: Exception) { val errorMessage = ExceptionsHelper.detailedMessage(e) monitorCtx.alertService!!.upsertMonitorErrorAlert(monitor, errorMessage, executionId, workflowRunContext) @@ -388,282 +387,134 @@ class DocumentLevelMonitorRunner : MonitorRunner() { ) return monitorResult.copy(error = alertingException, inputResults = InputRunResults(emptyList(), alertingException)) } finally { + val endTime = System.currentTimeMillis() + totalTimeTakenStat = endTime - startTime logger.debug( - "PERF_DEBUG_STATS: Monitor ${monitor.id} " + - "Time spent on fetching data from shards in millis: $nonPercolateSearchesTimeTakenStat" - ) - logger.debug( - "PERF_DEBUG_STATS: Monitor {} Time spent on percolate queries in millis: {}", - monitor.id, - percolateQueriesTimeTakenStat - ) - logger.debug( - "PERF_DEBUG_STATS: Monitor {} Time spent on transforming doc fields in millis: {}", + "Monitor {} Time spent on monitor run: {}", monitor.id, - docTransformTimeTakenStat + totalTimeTakenStat ) - logger.debug("PERF_DEBUG_STATS: Monitor {} Num docs queried: {}", monitor.id, totalDocsQueriedStat) } } - private suspend fun onSuccessfulMonitorRun(monitorCtx: MonitorRunnerExecutionContext, monitor: Monitor) { - monitorCtx.alertService!!.clearMonitorErrorAlert(monitor) - if (monitor.dataSources.alertsHistoryIndex != null) { - monitorCtx.alertService!!.moveClearedErrorAlertsToHistory( - monitor.id, - monitor.dataSources.alertsIndex, - monitor.dataSources.alertsHistoryIndex!! - ) - } - } + private fun updateLastRunContextFromFanOutResponses( + docLevelMonitorFanOutResponses: MutableList, + updatedLastRunContext: MutableMap>, + ) { - private fun constructErrorMessageFromTriggerResults( - triggerResults: MutableMap? = null - ): String { - var errorMessage = "" - if (triggerResults != null) { - val triggersErrorBuilder = StringBuilder() - triggerResults.forEach { - if (it.value.error != null) { - triggersErrorBuilder.append("[${it.key}]: [${it.value.error!!.userErrorMessage()}]").append(" | ") + // Prepare updatedLastRunContext for each index + for (indexName in updatedLastRunContext.keys) { + for (fanOutResponse in docLevelMonitorFanOutResponses) { + if (fanOutResponse.exception == null) { + // fanOutResponse.lastRunContexts //updatedContexts for relevant shards + val indexLastRunContext = updatedLastRunContext[indexName] as MutableMap + + if (fanOutResponse.lastRunContexts.contains("index") && fanOutResponse.lastRunContexts["index"] == indexName) { + fanOutResponse.lastRunContexts.keys.forEach { + + val seq_no = fanOutResponse.lastRunContexts[it].toString().toIntOrNull() + if ( + it != "shards_count" && + it != "index" && + seq_no != null && + seq_no >= 0 + ) { + indexLastRunContext[it] = seq_no + } + } + } } } - if (triggersErrorBuilder.isNotEmpty()) { - errorMessage = "Trigger errors: $triggersErrorBuilder" - } } - return errorMessage } - private suspend fun runForEachDocTrigger( - monitorCtx: MonitorRunnerExecutionContext, - monitorResult: MonitorRunResult, - trigger: DocumentLevelTrigger, - monitor: Monitor, - idQueryMap: Map, - docsToQueries: MutableMap>, - queryToDocIds: Map>, - dryrun: Boolean, - workflowRunContext: WorkflowRunContext?, - executionId: String - ): DocumentLevelTriggerRunResult { - val triggerCtx = DocumentLevelTriggerExecutionContext(monitor, trigger) - val triggerResult = monitorCtx.triggerService!!.runDocLevelTrigger(monitor, trigger, queryToDocIds) - - val triggerFindingDocPairs = mutableListOf>() - - // TODO: Implement throttling for findings - val findingToDocPairs = createFindings( - monitor, - monitorCtx, - docsToQueries, - idQueryMap, - !dryrun && monitor.id != Monitor.NO_ID, - executionId - ) - - findingToDocPairs.forEach { - // Only pick those entries whose docs have triggers associated with them - if (triggerResult.triggeredDocs.contains(it.second)) { - triggerFindingDocPairs.add(Pair(it.first, it.second)) - } - } - - val actionCtx = triggerCtx.copy( - triggeredDocs = triggerResult.triggeredDocs, - relatedFindings = findingToDocPairs.map { it.first }, - error = monitorResult.error ?: triggerResult.error - ) - - if (printsSampleDocData(trigger) && triggerFindingDocPairs.isNotEmpty()) - getDocSources( - findingToDocPairs = findingToDocPairs, - monitorCtx = monitorCtx, - monitor = monitor - ) - - val alerts = mutableListOf() - val alertContexts = mutableListOf() - triggerFindingDocPairs.forEach { - val alert = monitorCtx.alertService!!.composeDocLevelAlert( - listOf(it.first), - listOf(it.second), - triggerCtx, - monitorResult.alertError() ?: triggerResult.alertError(), - executionId = executionId, - workflorwRunContext = workflowRunContext - ) - alerts.add(alert) - - val docSource = findingIdToDocSource[alert.findingIds.first()]?.response?.convertToMap() - - alertContexts.add( - AlertContext( - alert = alert, - associatedQueries = alert.findingIds.flatMap { findingId -> - monitorCtx.findingsToTriggeredQueries?.getOrDefault(findingId, emptyList()) ?: emptyList() - }, - sampleDocs = listOfNotNull(docSource) - ) - ) - } - - val shouldDefaultToPerExecution = defaultToPerExecutionAction( - monitorCtx.maxActionableAlertCount, - monitorId = monitor.id, - triggerId = trigger.id, - totalActionableAlertCount = alerts.size, - monitorOrTriggerError = actionCtx.error - ) - - for (action in trigger.actions) { - val actionExecutionScope = action.getActionExecutionPolicy(monitor)!!.actionExecutionScope - if (actionExecutionScope is PerAlertActionScope && !shouldDefaultToPerExecution) { - for (alertContext in alertContexts) { - val actionResults = this.runAction(action, actionCtx.copy(alerts = listOf(alertContext)), monitorCtx, monitor, dryrun) - triggerResult.actionResultsMap.getOrPut(alertContext.alert.id) { mutableMapOf() } - triggerResult.actionResultsMap[alertContext.alert.id]?.set(action.id, actionResults) - } - } else if (alertContexts.isNotEmpty()) { - val actionResults = this.runAction(action, actionCtx.copy(alerts = alertContexts), monitorCtx, monitor, dryrun) - for (alert in alerts) { - triggerResult.actionResultsMap.getOrPut(alert.id) { mutableMapOf() } - triggerResult.actionResultsMap[alert.id]?.set(action.id, actionResults) - } - } - } - - // Alerts are saved after the actions since if there are failures in the actions, they can be stated in the alert - if (!dryrun && monitor.id != Monitor.NO_ID) { - val updatedAlerts = alerts.map { alert -> - val actionResults = triggerResult.actionResultsMap.getOrDefault(alert.id, emptyMap()) - val actionExecutionResults = actionResults.values.map { actionRunResult -> - ActionExecutionResult(actionRunResult.actionId, actionRunResult.executionTime, if (actionRunResult.throttled) 1 else 0) - } - alert.copy(actionExecutionResults = actionExecutionResults) - } - - monitorCtx.retryPolicy?.let { - monitorCtx.alertService!!.saveAlerts( - monitor.dataSources, - updatedAlerts, - it, - routingId = monitor.id - ) + private fun checkAndThrowExceptionIfAllFanOutsFailed( + docLevelMonitorFanOutResponses: MutableList + ): AlertingException? { + val exceptions = mutableListOf() + for (res in docLevelMonitorFanOutResponses) { + if (res.exception == null) { + return null + } else { + exceptions.add(res.exception) } } - return triggerResult + return AlertingException.merge(*exceptions.toTypedArray()) } - /** - * 1. Bulk index all findings based on shouldCreateFinding flag - * 2. invoke publishFinding() to kickstart auto-correlations - * 3. Returns a list of pairs for finding id to doc id - */ - private suspend fun createFindings( - monitor: Monitor, - monitorCtx: MonitorRunnerExecutionContext, - docsToQueries: MutableMap>, - idQueryMap: Map, - shouldCreateFinding: Boolean, - workflowExecutionId: String? = null, - ): List> { - - val findingDocPairs = mutableListOf>() - val findings = mutableListOf() - val indexRequests = mutableListOf() - val findingsToTriggeredQueries = mutableMapOf>() - - docsToQueries.forEach { - val triggeredQueries = it.value.map { queryId -> idQueryMap[queryId]!! } - - // Before the "|" is the doc id and after the "|" is the index - val docIndex = it.key.split("|") - - val finding = Finding( - id = UUID.randomUUID().toString(), - relatedDocIds = listOf(docIndex[0]), - correlatedDocIds = listOf(docIndex[0]), - monitorId = monitor.id, - monitorName = monitor.name, - index = docIndex[1], - docLevelQueries = triggeredQueries, - timestamp = Instant.now(), - executionId = workflowExecutionId - ) - findingDocPairs.add(Pair(finding.id, it.key)) - findings.add(finding) - findingsToTriggeredQueries[finding.id] = triggeredQueries - - val findingStr = - finding.toXContent(XContentBuilder.builder(XContentType.JSON.xContent()), ToXContent.EMPTY_PARAMS) - .string() - logger.debug("Findings: $findingStr") + private fun buildTriggerResults( + docLevelMonitorFanOutResponses: MutableList, + ): MutableMap { + val triggerResults = mutableMapOf() + val triggerErrorMap = mutableMapOf>() + for (res in docLevelMonitorFanOutResponses) { + if (res.exception == null) { + for (triggerId in res.triggerResults.keys) { + val documentLevelTriggerRunResult = res.triggerResults[triggerId] + if (documentLevelTriggerRunResult != null) { + if (false == triggerResults.contains(triggerId)) { + triggerResults[triggerId] = documentLevelTriggerRunResult + triggerErrorMap[triggerId] = if (documentLevelTriggerRunResult.error != null) { + val error = if (documentLevelTriggerRunResult.error is AlertingException) { + documentLevelTriggerRunResult.error as AlertingException + } else { + AlertingException.wrap(documentLevelTriggerRunResult.error!!) as AlertingException + } + mutableListOf(error) + } else { + mutableListOf() + } + } else { + val currVal = triggerResults[triggerId] + val newTriggeredDocs = mutableListOf() + newTriggeredDocs.addAll(currVal!!.triggeredDocs) + newTriggeredDocs.addAll(documentLevelTriggerRunResult.triggeredDocs) + val newActionResults = mutableMapOf>() + newActionResults.putAll(currVal.actionResultsMap) + newActionResults.putAll(documentLevelTriggerRunResult.actionResultsMap) + triggerResults[triggerId] = currVal.copy( + triggeredDocs = newTriggeredDocs, + actionResultsMap = newActionResults + ) - if (shouldCreateFinding) { - indexRequests += IndexRequest(monitor.dataSources.findingsIndex) - .source(findingStr, XContentType.JSON) - .id(finding.id) - .opType(DocWriteRequest.OpType.CREATE) + if (documentLevelTriggerRunResult.error != null) { + triggerErrorMap[triggerId]!!.add(documentLevelTriggerRunResult.error as AlertingException) + } + } + } + } } } - if (indexRequests.isNotEmpty()) { - bulkIndexFindings(monitor, monitorCtx, indexRequests) - } - - try { - findings.forEach { finding -> - publishFinding(monitor, monitorCtx, finding) + triggerErrorMap.forEach { triggerId, errorList -> + if (errorList.isNotEmpty()) { + triggerResults[triggerId]!!.error = AlertingException.merge(*errorList.toTypedArray()) } - } catch (e: Exception) { - // suppress exception - logger.error("Optional finding callback failed", e) } - - if (monitorCtx.findingsToTriggeredQueries == null) monitorCtx.findingsToTriggeredQueries = findingsToTriggeredQueries - else monitorCtx.findingsToTriggeredQueries = monitorCtx.findingsToTriggeredQueries!! + findingsToTriggeredQueries - - return findingDocPairs + return triggerResults } - private suspend fun bulkIndexFindings( - monitor: Monitor, - monitorCtx: MonitorRunnerExecutionContext, - indexRequests: List - ) { - indexRequests.chunked(monitorCtx.findingsIndexBatchSize).forEach { batch -> - val bulkResponse: BulkResponse = monitorCtx.client!!.suspendUntil { - bulk(BulkRequest().add(batch), it) - } - if (bulkResponse.hasFailures()) { - bulkResponse.items.forEach { item -> - if (item.isFailed) { - logger.error("Failed indexing the finding ${item.id} of monitor [${monitor.id}]") + private fun buildInputRunResults(docLevelMonitorFanOutResponses: MutableList): InputRunResults { + val inputRunResults = mutableMapOf>() + val errors: MutableList = mutableListOf() + for (response in docLevelMonitorFanOutResponses) { + if (response.exception == null) { + if (response.inputResults.error != null) { + if (response.inputResults.error is AlertingException) { + errors.add(response.inputResults.error) + } else { + errors.add(AlertingException.wrap(response.inputResults.error) as AlertingException) + } + } + val partialResult = response.inputResults.results + for (result in partialResult) { + for (id in result.keys) { + inputRunResults.getOrPut(id) { mutableSetOf() }.addAll(result[id] as Collection) } } - } else { - logger.debug("[${bulkResponse.items.size}] All findings successfully indexed.") } } - monitorCtx.client!!.execute(RefreshAction.INSTANCE, RefreshRequest(monitor.dataSources.findingsIndex)) - } - - private fun publishFinding( - monitor: Monitor, - monitorCtx: MonitorRunnerExecutionContext, - finding: Finding - ) { - val publishFindingsRequest = PublishFindingsRequest(monitor.id, finding) - AlertingPluginInterface.publishFinding( - monitorCtx.client!! as NodeClient, - publishFindingsRequest, - object : ActionListener { - override fun onResponse(response: SubscribeFindingsResponse) {} - - override fun onFailure(e: Exception) {} - } - ) + return InputRunResults(listOf(inputRunResults), if (!errors.isEmpty()) AlertingException.merge(*errors.toTypedArray()) else null) } private fun initializeNewLastRunContext( @@ -712,436 +563,44 @@ class DocumentLevelMonitorRunner : MonitorRunner() { return allShards.filter { it.primary() }.size } - /** 1. Fetch data per shard for given index. (only 10000 docs are fetched. - * needs to be converted to scroll if not performant enough) - * 2. Transform documents to conform to format required for percolate query - * 3a. Check if docs in memory are crossing threshold defined by setting. - * 3b. If yes, perform percolate query and update docToQueries Map with all hits from percolate queries */ - private suspend fun fetchShardDataAndMaybeExecutePercolateQueries( - monitor: Monitor, - monitorCtx: MonitorRunnerExecutionContext, - indexExecutionCtx: IndexExecutionContext, - monitorMetadata: MonitorMetadata, - inputRunResults: MutableMap>, - docsToQueries: MutableMap>, - monitorInputIndices: List, - concreteIndices: List, - fieldsToBeQueried: List, - updateLastRunContext: (String, String) -> Unit - ) { - val count: Int = indexExecutionCtx.updatedLastRunContext["shards_count"] as Int - for (i: Int in 0 until count) { - val shard = i.toString() - try { - val prevSeqNo = indexExecutionCtx.lastRunContext[shard].toString().toLongOrNull() - val from = prevSeqNo ?: SequenceNumbers.NO_OPS_PERFORMED - var to: Long = Long.MAX_VALUE - while (to >= from) { - val hits: SearchHits = searchShard( - monitorCtx, - indexExecutionCtx.concreteIndexName, - shard, - from, - to, - indexExecutionCtx.docIds, - fieldsToBeQueried, - ) - if (hits.hits.isEmpty()) { - if (to == Long.MAX_VALUE) { - updateLastRunContext(shard, (prevSeqNo ?: SequenceNumbers.NO_OPS_PERFORMED).toString()) // didn't find any docs - } - break - } - if (to == Long.MAX_VALUE) { // max sequence number of shard needs to be computed - updateLastRunContext(shard, hits.hits[0].seqNo.toString()) - } - val leastSeqNoFromHits = hits.hits.last().seqNo - to = leastSeqNoFromHits - 1 - val startTime = System.currentTimeMillis() - transformedDocs.addAll( - transformSearchHitsAndReconstructDocs( - hits, - indexExecutionCtx.indexName, - indexExecutionCtx.concreteIndexName, - monitor.id, - indexExecutionCtx.conflictingFields, - ) - ) - if ( - transformedDocs.isNotEmpty() && - shouldPerformPercolateQueryAndFlushInMemoryDocs(transformedDocs.size, monitorCtx) - ) { - performPercolateQueryAndResetCounters( - monitorCtx, - monitor, - monitorMetadata, - monitorInputIndices, - concreteIndices, - inputRunResults, - docsToQueries, - ) - } - docTransformTimeTakenStat += System.currentTimeMillis() - startTime - } - } catch (e: Exception) { - logger.error( - "Monitor ${monitor.id} :" + - "Failed to run fetch data from shard [$shard] of index [${indexExecutionCtx.concreteIndexName}]. " + - "Error: ${e.message}", - e - ) - if (e is IndexClosedException) { - throw e - } - } - if ( - transformedDocs.isNotEmpty() && - shouldPerformPercolateQueryAndFlushInMemoryDocs(transformedDocs.size, monitorCtx) - ) { - performPercolateQueryAndResetCounters( - monitorCtx, - monitor, - monitorMetadata, - monitorInputIndices, - concreteIndices, - inputRunResults, - docsToQueries, - ) - } - } - } - - private fun shouldPerformPercolateQueryAndFlushInMemoryDocs( - numDocs: Int, - monitorCtx: MonitorRunnerExecutionContext, - ): Boolean { - return isInMemoryDocsSizeExceedingMemoryLimit(docsSizeOfBatchInBytes, monitorCtx) || - isInMemoryNumDocsExceedingMaxDocsPerPercolateQueryLimit(numDocs, monitorCtx) - } - - private suspend fun performPercolateQueryAndResetCounters( - monitorCtx: MonitorRunnerExecutionContext, - monitor: Monitor, - monitorMetadata: MonitorMetadata, - monitorInputIndices: List, - concreteIndices: List, - inputRunResults: MutableMap>, - docsToQueries: MutableMap>, - ) { - try { - val percolateQueryResponseHits = runPercolateQueryOnTransformedDocs( - monitorCtx, - transformedDocs, - monitor, - monitorMetadata, - concreteIndices, - monitorInputIndices, - ) - - percolateQueryResponseHits.forEach { hit -> - var id = hit.id - concreteIndices.forEach { id = id.replace("_${it}_${monitor.id}", "") } - monitorInputIndices.forEach { id = id.replace("_${it}_${monitor.id}", "") } - val docIndices = hit.field("_percolator_document_slot").values.map { it.toString().toInt() } - docIndices.forEach { idx -> - val docIndex = "${transformedDocs[idx].first}|${transformedDocs[idx].second.concreteIndexName}" - inputRunResults.getOrPut(id) { mutableSetOf() }.add(docIndex) - docsToQueries.getOrPut(docIndex) { mutableListOf() }.add(id) - } - } - totalDocsQueriedStat += transformedDocs.size.toLong() - } finally { - transformedDocs.clear() - docsSizeOfBatchInBytes = 0 - } - } - - /** Executes search query on given shard of given index to fetch docs with sequene number greater than prevSeqNo. - * This method hence fetches only docs from shard which haven't been queried before - */ - private suspend fun searchShard( - monitorCtx: MonitorRunnerExecutionContext, - index: String, - shard: String, - prevSeqNo: Long?, - maxSeqNo: Long, - docIds: List? = null, - fieldsToFetch: List, - ): SearchHits { - if (prevSeqNo?.equals(maxSeqNo) == true && maxSeqNo != 0L) { - return SearchHits.empty() - } - val boolQueryBuilder = BoolQueryBuilder() - boolQueryBuilder.filter(QueryBuilders.rangeQuery("_seq_no").gt(prevSeqNo).lte(maxSeqNo)) - - if (!docIds.isNullOrEmpty()) { - boolQueryBuilder.filter(QueryBuilders.termsQuery("_id", docIds)) - } - - val request: SearchRequest = SearchRequest() - .indices(index) - .preference("_shards:$shard") - .source( - SearchSourceBuilder() - .version(true) - .sort("_seq_no", SortOrder.DESC) - .seqNoAndPrimaryTerm(true) - .query(boolQueryBuilder) - .size(monitorCtx.docLevelMonitorShardFetchSize) - ) - .preference(Preference.PRIMARY_FIRST.type()) - request.cancelAfterTimeInterval = TimeValue.timeValueMinutes( - getCancelAfterTimeInterval() - ) - if (monitorCtx.fetchOnlyQueryFieldNames && fieldsToFetch.isNotEmpty()) { - request.source().fetchSource(false) - for (field in fieldsToFetch) { - request.source().fetchField(field) - } - } - val response: SearchResponse = monitorCtx.client!!.suspendUntil { monitorCtx.client!!.search(request, it) } - if (response.status() !== RestStatus.OK) { - throw IOException("Failed to search shard: [$shard] in index [$index]. Response status is ${response.status()}") - } - nonPercolateSearchesTimeTakenStat += response.took.millis - return response.hits + private fun getNodes(monitorCtx: MonitorRunnerExecutionContext): Map { + return monitorCtx.clusterService!!.state().nodes.dataNodes.filter { it.value.version >= Version.CURRENT } } - /** Executes percolate query on the docs against the monitor's query index and return the hits from the search response*/ - private suspend fun runPercolateQueryOnTransformedDocs( + private fun distributeShards( monitorCtx: MonitorRunnerExecutionContext, - docs: MutableList>, - monitor: Monitor, - monitorMetadata: MonitorMetadata, - concreteIndices: List, - monitorInputIndices: List, - ): SearchHits { - val indices = docs.stream().map { it.second.indexName }.distinct().collect(Collectors.toList()) - val boolQueryBuilder = BoolQueryBuilder().must(buildShouldClausesOverPerIndexMatchQueries(indices)) - val percolateQueryBuilder = - PercolateQueryBuilderExt("query", docs.map { it.second.docSource }, XContentType.JSON) - if (monitor.id.isNotEmpty()) { - boolQueryBuilder.must(QueryBuilders.matchQuery("monitor_id", monitor.id).operator(Operator.AND)) - } - boolQueryBuilder.filter(percolateQueryBuilder) - val queryIndices = - docs.map { monitorMetadata.sourceToQueryIndexMapping[it.second.indexName + monitor.id] }.distinct() - if (queryIndices.isEmpty()) { - val message = - "Monitor ${monitor.id}: Failed to resolve query Indices from source indices during monitor execution!" + - " sourceIndices: $monitorInputIndices" - logger.error(message) - throw AlertingException.wrap( - OpenSearchStatusException(message, RestStatus.INTERNAL_SERVER_ERROR) - ) - } - - val searchRequest = - SearchRequest().indices(*queryIndices.toTypedArray()).preference(Preference.PRIMARY_FIRST.type()) - val searchSourceBuilder = SearchSourceBuilder() - searchSourceBuilder.query(boolQueryBuilder) - searchRequest.source(searchSourceBuilder) - logger.debug( - "Monitor ${monitor.id}: " + - "Executing percolate query for docs from source indices " + - "$monitorInputIndices against query index $queryIndices" - ) - var response: SearchResponse - - try { - searchRequest.cancelAfterTimeInterval = TimeValue.timeValueMinutes( - getCancelAfterTimeInterval() - ) - - response = monitorCtx.client!!.suspendUntil { - monitorCtx.client!!.execute(SearchAction.INSTANCE, searchRequest, it) - } - } catch (e: Exception) { - throw IllegalStateException( - "Monitor ${monitor.id}:" + - " Failed to run percolate search for sourceIndex [${concreteIndices.joinToString()}] " + - "and queryIndex [${queryIndices.joinToString()}] for ${docs.size} document(s)", - e - ) - } - - if (response.status() !== RestStatus.OK) { - throw IOException( - "Monitor ${monitor.id}: Failed to search percolate index: ${queryIndices.joinToString()}. " + - "Response status is ${response.status()}" - ) - } - logger.debug("Monitor ${monitor.id} PERF_DEBUG: Percolate query time taken millis = ${response.took}") - percolateQueriesTimeTakenStat += response.took.millis - return response.hits - } - /** we cannot use terms query because `index` field's mapping is of type TEXT and not keyword. Refer doc-level-queries.json*/ - private fun buildShouldClausesOverPerIndexMatchQueries(indices: List): BoolQueryBuilder { - val boolQueryBuilder = QueryBuilders.boolQuery() - indices.forEach { boolQueryBuilder.should(QueryBuilders.matchQuery("index", it)) } - return boolQueryBuilder - } - - /** Transform field names and index names in all the search hits to format required to run percolate search against them. - * Hits are transformed using method transformDocumentFieldNames() */ - private fun transformSearchHitsAndReconstructDocs( - hits: SearchHits, + allNodes: List, + shards: List, index: String, - concreteIndex: String, - monitorId: String, - conflictingFields: List, - ): List> { - return hits.mapNotNull(fun(hit: SearchHit): Pair? { - try { - val sourceMap = if (hit.hasSource()) { - hit.sourceAsMap - } else { - constructSourceMapFromFieldsInHit(hit) - } - transformDocumentFieldNames( - sourceMap, - conflictingFields, - "_${index}_$monitorId", - "_${concreteIndex}_$monitorId", - "" - ) - var xContentBuilder = XContentFactory.jsonBuilder().map(sourceMap) - val sourceRef = BytesReference.bytes(xContentBuilder) - docsSizeOfBatchInBytes += sourceRef.ramBytesUsed() - totalDocsSizeInBytesStat += sourceRef.ramBytesUsed() - return Pair(hit.id, TransformedDocDto(index, concreteIndex, hit.id, sourceRef)) - } catch (e: Exception) { - logger.error("Monitor $monitorId: Failed to transform payload $hit for percolate query", e) - // skip any document which we fail to transform because we anyway won't be able to run percolate queries on them. - return null - } - }) - } - - private fun constructSourceMapFromFieldsInHit(hit: SearchHit): MutableMap { - if (hit.fields == null) - return mutableMapOf() - val sourceMap: MutableMap = mutableMapOf() - for (field in hit.fields) { - if (field.value.values != null && field.value.values.isNotEmpty()) - if (field.value.values.size == 1) { - sourceMap[field.key] = field.value.values[0] - } else sourceMap[field.key] = field.value.values + ): Map> { + val totalShards = shards.size + val numFanOutNodes = allNodes.size.coerceAtMost((totalShards + 1) / 2) + val totalNodes = monitorCtx.totalNodesFanOut.coerceAtMost(numFanOutNodes) + val shardsPerNode = totalShards / totalNodes + var shardsRemaining = totalShards % totalNodes + + val shardIdList = shards.map { + ShardId(monitorCtx.clusterService!!.state().metadata.index(index).index, it.toInt()) } - return sourceMap - } - - /** - * Traverses document fields in leaves recursively and appends [fieldNameSuffixIndex] to field names with same names - * but different mappings & [fieldNameSuffixPattern] to field names which have unique names. - * - * Example for index name is my_log_index and Monitor ID is TReewWdsf2gdJFV: - * { { - * "a": { "a": { - * "b": 1234 ----> "b_my_log_index_TReewWdsf2gdJFV": 1234 - * } } - * } - * - * @param jsonAsMap Input JSON (as Map) - * @param fieldNameSuffix Field suffix which is appended to existing field name - */ - private fun transformDocumentFieldNames( - jsonAsMap: MutableMap, - conflictingFields: List, - fieldNameSuffixPattern: String, - fieldNameSuffixIndex: String, - fieldNamePrefix: String - ) { - val tempMap = mutableMapOf() - val it: MutableIterator> = jsonAsMap.entries.iterator() - while (it.hasNext()) { - val entry = it.next() - if (entry.value is Map<*, *>) { - transformDocumentFieldNames( - entry.value as MutableMap, - conflictingFields, - fieldNameSuffixPattern, - fieldNameSuffixIndex, - if (fieldNamePrefix == "") entry.key else "$fieldNamePrefix.${entry.key}" - ) - } else if (!entry.key.endsWith(fieldNameSuffixPattern) && !entry.key.endsWith(fieldNameSuffixIndex)) { - var alreadyReplaced = false - conflictingFields.forEach { conflictingField -> - if (conflictingField == "$fieldNamePrefix.${entry.key}" || (fieldNamePrefix == "" && conflictingField == entry.key)) { - tempMap["${entry.key}$fieldNameSuffixIndex"] = entry.value - it.remove() - alreadyReplaced = true - } - } - if (!alreadyReplaced) { - tempMap["${entry.key}$fieldNameSuffixPattern"] = entry.value - it.remove() - } + val nodes = allNodes.subList(0, totalNodes) + + val nodeShardAssignments = mutableMapOf>() + var idx = 0 + for (node in nodes) { + val nodeShardAssignment = mutableSetOf() + for (i in 1..shardsPerNode) { + nodeShardAssignment.add(shardIdList[idx++]) } + nodeShardAssignments[node] = nodeShardAssignment } - jsonAsMap.putAll(tempMap) - } - - /** - * Returns true, if the docs fetched from shards thus far amount to less than threshold - * amount of percentage (default:10. setting is dynamic and configurable) of the total heap size or not. - * - */ - private fun isInMemoryDocsSizeExceedingMemoryLimit(docsBytesSize: Long, monitorCtx: MonitorRunnerExecutionContext): Boolean { - var thresholdPercentage = monitorCtx.percQueryDocsSizeMemoryPercentageLimit - val heapMaxBytes = monitorCtx.jvmStats!!.mem.heapMax.bytes - val thresholdBytes = (thresholdPercentage.toDouble() / 100.0) * heapMaxBytes - - return docsBytesSize > thresholdBytes - } - - private fun isInMemoryNumDocsExceedingMaxDocsPerPercolateQueryLimit(numDocs: Int, monitorCtx: MonitorRunnerExecutionContext): Boolean { - var maxNumDocsThreshold = monitorCtx.percQueryMaxNumDocsInMemory - return numDocs >= maxNumDocsThreshold - } - - /** - * Performs an mGet request to retrieve the documents associated with findings. - * - * When possible, this will only retrieve the document fields that are specifically - * referenced for printing in the mustache template. - */ - private suspend fun getDocSources( - findingToDocPairs: List>, - monitorCtx: MonitorRunnerExecutionContext, - monitor: Monitor - ) { - val docFieldTags = parseSampleDocTags(monitor.triggers) - val request = MultiGetRequest() - // Perform mGet request in batches. - findingToDocPairs.chunked(monitorCtx.findingsIndexBatchSize).forEach { batch -> - batch.forEach { (findingId, docIdAndIndex) -> - val docIdAndIndexSplit = docIdAndIndex.split("|") - val docId = docIdAndIndexSplit[0] - val concreteIndex = docIdAndIndexSplit[1] - if (findingId.isNotEmpty() && docId.isNotEmpty() && concreteIndex.isNotEmpty()) { - val docItem = MultiGetRequest.Item(concreteIndex, docId) - if (docFieldTags.isNotEmpty()) - docItem.fetchSourceContext(FetchSourceContext(true, docFieldTags.toTypedArray(), emptyArray())) - request.add(docItem) - } - val response = monitorCtx.client!!.suspendUntil { monitorCtx.client!!.multiGet(request, it) } - response.responses.forEach { item -> - findingIdToDocSource[findingId] = item - } + for (node in nodes) { + if (shardsRemaining == 0) { + break } + nodeShardAssignments[node]!!.add(shardIdList[idx++]) + --shardsRemaining } + return nodeShardAssignments } - - /** - * POJO holding information about each doc's concrete index, id, input index pattern/alias/datastream name - * and doc source. A list of these POJOs would be passed to percolate query execution logic. - */ - data class TransformedDocDto( - var indexName: String, - var concreteIndexName: String, - var docId: String, - var docSource: BytesReference - ) } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunner.kt index f8d5fe686..870b39031 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunner.kt @@ -32,6 +32,7 @@ import org.opensearch.commons.alerting.model.Table import org.opensearch.commons.alerting.model.action.Action import org.opensearch.commons.notifications.model.NotificationConfigInfo import org.opensearch.core.common.Strings +import org.opensearch.transport.TransportService import java.time.Instant abstract class MonitorRunner { @@ -43,7 +44,8 @@ abstract class MonitorRunner { periodEnd: Instant, dryRun: Boolean, workflowRunContext: WorkflowRunContext? = null, - executionId: String + executionId: String, + transportService: TransportService ): MonitorRunResult<*> suspend fun runAction( diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerExecutionContext.kt b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerExecutionContext.kt index eefad1b6b..459743f9d 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerExecutionContext.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerExecutionContext.kt @@ -61,5 +61,6 @@ data class MonitorRunnerExecutionContext( AlertingSettings.DEFAULT_PERCOLATE_QUERY_DOCS_SIZE_MEMORY_PERCENTAGE_LIMIT, @Volatile var docLevelMonitorShardFetchSize: Int = AlertingSettings.DEFAULT_DOC_LEVEL_MONITOR_SHARD_FETCH_SIZE, + @Volatile var totalNodesFanOut: Int = AlertingSettings.DEFAULT_FAN_OUT_NODES, @Volatile var lockService: LockService? = null ) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt index 4ff0bee73..3ddff1b03 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/MonitorRunnerService.kt @@ -14,6 +14,12 @@ import org.apache.logging.log4j.LogManager import org.opensearch.action.bulk.BackoffPolicy import org.opensearch.action.search.TransportSearchAction.SEARCH_CANCEL_AFTER_TIME_INTERVAL_SETTING import org.opensearch.action.support.master.AcknowledgedResponse +import org.opensearch.alerting.action.ExecuteMonitorAction +import org.opensearch.alerting.action.ExecuteMonitorRequest +import org.opensearch.alerting.action.ExecuteMonitorResponse +import org.opensearch.alerting.action.ExecuteWorkflowAction +import org.opensearch.alerting.action.ExecuteWorkflowRequest +import org.opensearch.alerting.action.ExecuteWorkflowResponse import org.opensearch.alerting.alerts.AlertIndices import org.opensearch.alerting.alerts.AlertMover.Companion.moveAlerts import org.opensearch.alerting.core.JobRunner @@ -50,6 +56,7 @@ import org.opensearch.cluster.metadata.IndexNameExpressionResolver import org.opensearch.cluster.service.ClusterService import org.opensearch.common.lifecycle.AbstractLifecycleComponent import org.opensearch.common.settings.Settings +import org.opensearch.common.unit.TimeValue import org.opensearch.commons.alerting.model.Alert import org.opensearch.commons.alerting.model.Monitor import org.opensearch.commons.alerting.model.ScheduledJob @@ -63,6 +70,7 @@ import org.opensearch.script.Script import org.opensearch.script.ScriptService import org.opensearch.script.TemplateScript import org.opensearch.threadpool.ThreadPool +import org.opensearch.transport.TransportService import java.time.Instant import java.time.LocalDateTime import java.time.ZoneOffset @@ -220,6 +228,11 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon monitorCtx.docLevelMonitorShardFetchSize = it } + monitorCtx.totalNodesFanOut = AlertingSettings.DOC_LEVEL_MONITOR_FAN_OUT_NODES.get(monitorCtx.settings) + monitorCtx.clusterService!!.clusterSettings.addSettingsUpdateConsumer(AlertingSettings.DOC_LEVEL_MONITOR_FAN_OUT_NODES) { + monitorCtx.totalNodesFanOut = it + } + return this } @@ -313,11 +326,20 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon monitorCtx.lockService!!.acquireLock(job, it) } ?: return@launch logger.debug("lock ${lock!!.lockId} acquired") - logger.debug( - "PERF_DEBUG: executing workflow ${job.id} on node " + - monitorCtx.clusterService!!.state().nodes().localNode.id - ) - runJob(job, periodStart, periodEnd, false) + + monitorCtx.client!!.suspendUntil { + monitorCtx.client!!.execute( + ExecuteWorkflowAction.INSTANCE, + ExecuteWorkflowRequest( + false, + TimeValue(periodEnd.toEpochMilli()), + job.id, + job, + TimeValue(periodStart.toEpochMilli()) + ), + it + ) + } } finally { monitorCtx.client!!.suspendUntil { monitorCtx.lockService!!.release(lock, it) } logger.debug("lock ${lock!!.lockId} released") @@ -336,7 +358,20 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon "PERF_DEBUG: executing ${job.monitorType} ${job.id} on node " + monitorCtx.clusterService!!.state().nodes().localNode.id ) - runJob(job, periodStart, periodEnd, false) + val executeMonitorRequest = ExecuteMonitorRequest( + false, + TimeValue(periodEnd.toEpochMilli()), + job.id, + job, + TimeValue(periodStart.toEpochMilli()) + ) + monitorCtx.client!!.suspendUntil { + monitorCtx.client!!.execute( + ExecuteMonitorAction.INSTANCE, + executeMonitorRequest, + it + ) + } } finally { monitorCtx.client!!.suspendUntil { monitorCtx.lockService!!.release(lock, it) } logger.debug("lock ${lock!!.lockId} released") @@ -349,11 +384,23 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon } } - suspend fun runJob(workflow: Workflow, periodStart: Instant, periodEnd: Instant, dryrun: Boolean): WorkflowRunResult { - return CompositeWorkflowRunner.runWorkflow(workflow, monitorCtx, periodStart, periodEnd, dryrun) + suspend fun runJob( + workflow: Workflow, + periodStart: Instant, + periodEnd: Instant, + dryrun: Boolean, + transportService: TransportService + ): WorkflowRunResult { + return CompositeWorkflowRunner.runWorkflow(workflow, monitorCtx, periodStart, periodEnd, dryrun, transportService) } - suspend fun runJob(job: ScheduledJob, periodStart: Instant, periodEnd: Instant, dryrun: Boolean): MonitorRunResult<*> { + suspend fun runJob( + job: ScheduledJob, + periodStart: Instant, + periodEnd: Instant, + dryrun: Boolean, + transportService: TransportService + ): MonitorRunResult<*> { // Updating the scheduled job index at the start of monitor execution runs for when there is an upgrade the the schema mapping // has not been updated. if (!IndexUtils.scheduledJobIndexUpdated && monitorCtx.clusterService != null && monitorCtx.client != null) { @@ -373,7 +420,7 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon if (job is Workflow) { logger.info("Executing scheduled workflow - id: ${job.id}, periodStart: $periodStart, periodEnd: $periodEnd, dryrun: $dryrun") - CompositeWorkflowRunner.runWorkflow(workflow = job, monitorCtx, periodStart, periodEnd, dryrun) + CompositeWorkflowRunner.runWorkflow(workflow = job, monitorCtx, periodStart, periodEnd, dryrun, transportService) } val monitor = job as Monitor val executionId = "${monitor.id}_${LocalDateTime.now(ZoneOffset.UTC)}_${UUID.randomUUID()}" @@ -382,11 +429,35 @@ object MonitorRunnerService : JobRunner, CoroutineScope, AbstractLifecycleCompon "periodEnd: $periodEnd, dryrun: $dryrun, executionId: $executionId" ) val runResult = if (monitor.isBucketLevelMonitor()) { - BucketLevelMonitorRunner.runMonitor(monitor, monitorCtx, periodStart, periodEnd, dryrun, executionId = executionId) + BucketLevelMonitorRunner.runMonitor( + monitor, + monitorCtx, + periodStart, + periodEnd, + dryrun, + executionId = executionId, + transportService = transportService + ) } else if (monitor.isDocLevelMonitor()) { - DocumentLevelMonitorRunner().runMonitor(monitor, monitorCtx, periodStart, periodEnd, dryrun, executionId = executionId) + DocumentLevelMonitorRunner().runMonitor( + monitor, + monitorCtx, + periodStart, + periodEnd, + dryrun, + executionId = executionId, + transportService = transportService + ) } else { - QueryLevelMonitorRunner.runMonitor(monitor, monitorCtx, periodStart, periodEnd, dryrun, executionId = executionId) + QueryLevelMonitorRunner.runMonitor( + monitor, + monitorCtx, + periodStart, + periodEnd, + dryrun, + executionId = executionId, + transportService = transportService + ) } return runResult } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/QueryLevelMonitorRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/QueryLevelMonitorRunner.kt index 3d9ab5a78..b975af728 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/QueryLevelMonitorRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/QueryLevelMonitorRunner.kt @@ -17,6 +17,7 @@ import org.opensearch.alerting.workflow.WorkflowRunContext import org.opensearch.commons.alerting.model.Alert import org.opensearch.commons.alerting.model.Monitor import org.opensearch.commons.alerting.model.QueryLevelTrigger +import org.opensearch.transport.TransportService import java.time.Instant object QueryLevelMonitorRunner : MonitorRunner() { @@ -29,7 +30,8 @@ object QueryLevelMonitorRunner : MonitorRunner() { periodEnd: Instant, dryrun: Boolean, workflowRunContext: WorkflowRunContext?, - executionId: String + executionId: String, + transportService: TransportService ): MonitorRunResult { val roles = MonitorRunnerService.getRolesForMonitor(monitor) logger.debug("Running monitor: ${monitor.name} with roles: $roles Thread: ${Thread.currentThread().name}") diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutAction.kt b/alerting/src/main/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutAction.kt new file mode 100644 index 000000000..c03d95942 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutAction.kt @@ -0,0 +1,15 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.action + +import org.opensearch.action.ActionType + +class DocLevelMonitorFanOutAction private constructor() : ActionType(NAME, ::DocLevelMonitorFanOutResponse) { + companion object { + val INSTANCE = DocLevelMonitorFanOutAction() + const val NAME = "cluster:admin/opensearch/alerting/monitor/doclevel/fanout" + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutRequest.kt b/alerting/src/main/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutRequest.kt new file mode 100644 index 000000000..7b16b8961 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutRequest.kt @@ -0,0 +1,101 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.action + +import org.opensearch.action.ActionRequest +import org.opensearch.action.ActionRequestValidationException +import org.opensearch.alerting.model.IndexExecutionContext +import org.opensearch.alerting.model.MonitorMetadata +import org.opensearch.alerting.workflow.WorkflowRunContext +import org.opensearch.commons.alerting.model.Monitor +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.common.io.stream.StreamOutput +import org.opensearch.core.index.shard.ShardId +import org.opensearch.core.xcontent.ToXContent +import org.opensearch.core.xcontent.ToXContentObject +import org.opensearch.core.xcontent.XContentBuilder +import java.io.IOException + +class DocLevelMonitorFanOutRequest : ActionRequest, ToXContentObject { + val monitor: Monitor + val dryRun: Boolean + val monitorMetadata: MonitorMetadata + val executionId: String + val indexExecutionContext: IndexExecutionContext + val shardIds: List + val concreteIndicesSeenSoFar: List + val workflowRunContext: WorkflowRunContext? + + constructor( + monitor: Monitor, + dryRun: Boolean, + monitorMetadata: MonitorMetadata, + executionId: String, + indexExecutionContext: IndexExecutionContext, + shardIds: List, + concreteIndicesSeenSoFar: List, + workflowRunContext: WorkflowRunContext?, + ) : super() { + this.monitor = monitor + this.dryRun = dryRun + this.monitorMetadata = monitorMetadata + this.executionId = executionId + this.indexExecutionContext = indexExecutionContext + this.shardIds = shardIds + this.concreteIndicesSeenSoFar = concreteIndicesSeenSoFar + this.workflowRunContext = workflowRunContext + require(false == shardIds.isEmpty()) { } + } + + @Throws(IOException::class) + constructor(sin: StreamInput) : this( + monitor = Monitor.readFrom(sin)!!, + dryRun = sin.readBoolean(), + monitorMetadata = MonitorMetadata.readFrom(sin), + executionId = sin.readString(), + shardIds = sin.readList(::ShardId), + concreteIndicesSeenSoFar = sin.readStringList(), + workflowRunContext = if (sin.readBoolean()) { + WorkflowRunContext(sin) + } else null, + indexExecutionContext = IndexExecutionContext(sin) + ) + + @Throws(IOException::class) + override fun writeTo(out: StreamOutput) { + monitor.writeTo(out) + out.writeBoolean(dryRun) + monitorMetadata.writeTo(out) + out.writeString(executionId) + out.writeCollection(shardIds) + out.writeStringCollection(concreteIndicesSeenSoFar) + out.writeBoolean(workflowRunContext != null) + workflowRunContext?.writeTo(out) + indexExecutionContext.writeTo(out) + } + + override fun validate(): ActionRequestValidationException? { + var actionValidationException: ActionRequestValidationException? = null + if (shardIds.isEmpty()) { + actionValidationException = ActionRequestValidationException() + actionValidationException.addValidationError("shard_ids is null or empty") + } + return actionValidationException + } + + @Throws(IOException::class) + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { + builder.startObject() + .field("monitor", monitor) + .field("dry_run", dryRun) + .field("execution_id", executionId) + .field("index_execution_context", indexExecutionContext) + .field("shard_ids", shardIds) + .field("concrete_indices", concreteIndicesSeenSoFar) + .field("workflow_run_context", workflowRunContext) + return builder.endObject() + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutResponse.kt b/alerting/src/main/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutResponse.kt new file mode 100644 index 000000000..8c74de356 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutResponse.kt @@ -0,0 +1,92 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.action + +import org.opensearch.alerting.model.DocumentLevelTriggerRunResult +import org.opensearch.alerting.model.InputRunResults +import org.opensearch.alerting.util.AlertingException +import org.opensearch.core.action.ActionResponse +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.common.io.stream.StreamOutput +import org.opensearch.core.xcontent.ToXContent +import org.opensearch.core.xcontent.ToXContentObject +import org.opensearch.core.xcontent.XContentBuilder +import java.io.IOException + +class DocLevelMonitorFanOutResponse : ActionResponse, ToXContentObject { + val nodeId: String + val executionId: String + val monitorId: String + val lastRunContexts: MutableMap + val inputResults: InputRunResults + val triggerResults: Map + val exception: AlertingException? + + @Throws(IOException::class) + constructor(sin: StreamInput) : this( + nodeId = sin.readString(), + executionId = sin.readString(), + monitorId = sin.readString(), + lastRunContexts = sin.readMap()!! as MutableMap, + inputResults = InputRunResults.readFrom(sin), + triggerResults = suppressWarning(sin.readMap(StreamInput::readString, DocumentLevelTriggerRunResult::readFrom)), + exception = sin.readException() + ) + + constructor( + nodeId: String, + executionId: String, + monitorId: String, + lastRunContexts: MutableMap, + inputResults: InputRunResults = InputRunResults(), // partial, + triggerResults: Map = mapOf(), + exception: AlertingException? = null + ) : super() { + this.nodeId = nodeId + this.executionId = executionId + this.monitorId = monitorId + this.lastRunContexts = lastRunContexts + this.inputResults = inputResults + this.triggerResults = triggerResults + this.exception = exception + } + + @Throws(IOException::class) + override fun writeTo(out: StreamOutput) { + out.writeString(nodeId) + out.writeString(executionId) + out.writeString(monitorId) + out.writeMap(lastRunContexts) + inputResults.writeTo(out) + out.writeMap( + triggerResults, + StreamOutput::writeString, + { stream, stats -> stats.writeTo(stream) } + ) + out.writeException(exception) + } + + @Throws(IOException::class) + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { + builder.startObject() + .field("node_id", nodeId) + .field("execution_id", executionId) + .field("monitor_id", monitorId) + .field("last_run_contexts", lastRunContexts) + .field("input_results", inputResults) + .field("trigger_results", triggerResults) + .field("exception", exception) + .endObject() + return builder + } + + companion object { + @Suppress("UNCHECKED_CAST") + fun suppressWarning(map: MutableMap?): Map { + return map as Map + } + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorRequest.kt b/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorRequest.kt index ecc504677..c7b699dfc 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorRequest.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteMonitorRequest.kt @@ -18,17 +18,20 @@ class ExecuteMonitorRequest : ActionRequest { val requestEnd: TimeValue val monitorId: String? val monitor: Monitor? + val requestStart: TimeValue? constructor( dryrun: Boolean, requestEnd: TimeValue, monitorId: String?, - monitor: Monitor? + monitor: Monitor?, + requestStart: TimeValue? = null ) : super() { this.dryrun = dryrun this.requestEnd = requestEnd this.monitorId = monitorId this.monitor = monitor + this.requestStart = requestStart } @Throws(IOException::class) @@ -38,7 +41,8 @@ class ExecuteMonitorRequest : ActionRequest { sin.readOptionalString(), // monitorId if (sin.readBoolean()) { Monitor.readFrom(sin) // monitor - } else null + } else null, + sin.readOptionalTimeValue() ) override fun validate(): ActionRequestValidationException? { @@ -56,5 +60,6 @@ class ExecuteMonitorRequest : ActionRequest { } else { out.writeBoolean(false) } + out.writeOptionalTimeValue(requestStart) } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteWorkflowRequest.kt b/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteWorkflowRequest.kt index 3b3d48ed2..104448cce 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteWorkflowRequest.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/action/ExecuteWorkflowRequest.kt @@ -22,15 +22,18 @@ class ExecuteWorkflowRequest : ActionRequest { val requestEnd: TimeValue val workflowId: String? val workflow: Workflow? + val requestStart: TimeValue? constructor( dryrun: Boolean, requestEnd: TimeValue, workflowId: String?, workflow: Workflow?, + requestStart: TimeValue? = null, ) : super() { this.dryrun = dryrun this.requestEnd = requestEnd + this.requestStart = requestStart this.workflowId = workflowId this.workflow = workflow } @@ -42,7 +45,8 @@ class ExecuteWorkflowRequest : ActionRequest { sin.readOptionalString(), if (sin.readBoolean()) { Workflow.readFrom(sin) - } else null + } else null, + sin.readOptionalTimeValue() ) override fun validate(): ActionRequestValidationException? { @@ -59,6 +63,7 @@ class ExecuteWorkflowRequest : ActionRequest { override fun writeTo(out: StreamOutput) { out.writeBoolean(dryrun) out.writeTimeValue(requestEnd) + out.writeOptionalTimeValue(requestStart) out.writeOptionalString(workflowId) if (workflow != null) { out.writeBoolean(true) diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/model/DocumentLevelTriggerRunResult.kt b/alerting/src/main/kotlin/org/opensearch/alerting/model/DocumentLevelTriggerRunResult.kt index 9d98aab42..a89d89da7 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/model/DocumentLevelTriggerRunResult.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/model/DocumentLevelTriggerRunResult.kt @@ -25,7 +25,7 @@ data class DocumentLevelTriggerRunResult( triggerName = sin.readString(), error = sin.readException(), triggeredDocs = sin.readStringList(), - actionResultsMap = sin.readMap() as MutableMap> + actionResultsMap = readActionResults(sin) ) override fun internalXContent(builder: XContentBuilder, params: ToXContent.Params): XContentBuilder { @@ -39,7 +39,15 @@ data class DocumentLevelTriggerRunResult( override fun writeTo(out: StreamOutput) { super.writeTo(out) out.writeStringCollection(triggeredDocs) - out.writeMap(actionResultsMap as Map) + out.writeInt(actionResultsMap.size) + actionResultsMap.forEach { (alert, actionResults) -> + out.writeString(alert) + out.writeInt(actionResults.size) + actionResults.forEach { (id, result) -> + out.writeString(id) + result.writeTo(out) + } + } } companion object { @@ -48,5 +56,27 @@ data class DocumentLevelTriggerRunResult( fun readFrom(sin: StreamInput): TriggerRunResult { return DocumentLevelTriggerRunResult(sin) } + + @JvmStatic + fun readActionResults(sin: StreamInput): MutableMap> { + val actionResultsMapReconstruct: MutableMap> = mutableMapOf() + val size = sin.readInt() + var idx = 0 + while (idx < size) { + val alert = sin.readString() + val actionResultsSize = sin.readInt() + val actionRunResultElem = mutableMapOf() + var i = 0 + while (i < actionResultsSize) { + val actionId = sin.readString() + val actionResult = ActionRunResult.readFrom(sin) + actionRunResultElem[actionId] = actionResult + ++i + } + actionResultsMapReconstruct[alert] = actionRunResultElem + ++idx + } + return actionResultsMapReconstruct + } } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/model/IndexExecutionContext.kt b/alerting/src/main/kotlin/org/opensearch/alerting/model/IndexExecutionContext.kt index e7aa707f9..b36460d96 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/model/IndexExecutionContext.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/model/IndexExecutionContext.kt @@ -6,14 +6,62 @@ package org.opensearch.alerting.model import org.opensearch.commons.alerting.model.DocLevelQuery +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.common.io.stream.StreamOutput +import org.opensearch.core.common.io.stream.Writeable +import org.opensearch.core.xcontent.ToXContent +import org.opensearch.core.xcontent.XContentBuilder +import java.io.IOException -/** DTO that contains all the necessary context for fetching data from shard and performing percolate queries */ data class IndexExecutionContext( val queries: List, - val lastRunContext: MutableMap, - val updatedLastRunContext: MutableMap, + val lastRunContext: MutableMap, // previous execution + val updatedLastRunContext: MutableMap, // without sequence numbers val indexName: String, val concreteIndexName: String, + val updatedIndexNames: List, + val concreteIndexNames: List, val conflictingFields: List, - val docIds: List? = null, -) + val docIds: List? = emptyList(), +) : Writeable, ToXContent { + + @Throws(IOException::class) + constructor(sin: StreamInput) : this( + queries = sin.readList { DocLevelQuery(sin) }, + lastRunContext = sin.readMap(), + updatedLastRunContext = sin.readMap(), + indexName = sin.readString(), + concreteIndexName = sin.readString(), + updatedIndexNames = sin.readStringList(), + concreteIndexNames = sin.readStringList(), + conflictingFields = sin.readStringList(), + docIds = sin.readOptionalStringList() + ) + + override fun writeTo(out: StreamOutput?) { + out!!.writeCollection(queries) + out.writeMap(lastRunContext) + out.writeMap(updatedLastRunContext) + out.writeString(indexName) + out.writeString(concreteIndexName) + out.writeStringCollection(updatedIndexNames) + out.writeStringCollection(concreteIndexNames) + out.writeStringCollection(conflictingFields) + out.writeOptionalStringCollection(docIds) + } + + override fun toXContent(builder: XContentBuilder?, params: ToXContent.Params?): XContentBuilder { + builder!!.startObject() + .field("queries", queries) + .field("last_run_context", lastRunContext) + .field("updated_last_run_context", updatedLastRunContext) + .field("index_name", indexName) + .field("concrete_index_name", concreteIndexName) + .field("udpated_index_names", updatedIndexNames) + .field("concrete_index_names", concreteIndexNames) + .field("conflicting_fields", conflictingFields) + .field("doc_ids", docIds) + .endObject() + return builder + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/settings/AlertingSettings.kt b/alerting/src/main/kotlin/org/opensearch/alerting/settings/AlertingSettings.kt index 21ef8f1bc..f782be9b5 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/settings/AlertingSettings.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/settings/AlertingSettings.kt @@ -21,6 +21,7 @@ class AlertingSettings { const val DEFAULT_PERCOLATE_QUERY_NUM_DOCS_IN_MEMORY = 50000 const val DEFAULT_PERCOLATE_QUERY_DOCS_SIZE_MEMORY_PERCENTAGE_LIMIT = 10 const val DEFAULT_DOC_LEVEL_MONITOR_SHARD_FETCH_SIZE = 10000 + const val DEFAULT_FAN_OUT_NODES = 1000 val ALERTING_MAX_MONITORS = Setting.intSetting( "plugins.alerting.monitor.max_monitors", @@ -214,5 +215,13 @@ class AlertingSettings { 1, Setting.Property.NodeScope, Setting.Property.Dynamic ) + + val DOC_LEVEL_MONITOR_FAN_OUT_NODES = Setting.intSetting( + "plugins.alerting.monitor.doc_level_monitor_fan_out_nodes", + DEFAULT_FAN_OUT_NODES, + 1, + Int.MAX_VALUE, + Setting.Property.NodeScope, Setting.Property.Dynamic + ) } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDocLevelMonitorFanOutAction.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDocLevelMonitorFanOutAction.kt new file mode 100644 index 000000000..343bac978 --- /dev/null +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportDocLevelMonitorFanOutAction.kt @@ -0,0 +1,1184 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.transport + +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.launch +import org.apache.logging.log4j.LogManager +import org.opensearch.OpenSearchSecurityException +import org.opensearch.OpenSearchStatusException +import org.opensearch.action.DocWriteRequest +import org.opensearch.action.admin.indices.refresh.RefreshAction +import org.opensearch.action.admin.indices.refresh.RefreshRequest +import org.opensearch.action.bulk.BackoffPolicy +import org.opensearch.action.bulk.BulkRequest +import org.opensearch.action.bulk.BulkResponse +import org.opensearch.action.get.MultiGetItemResponse +import org.opensearch.action.get.MultiGetRequest +import org.opensearch.action.index.IndexRequest +import org.opensearch.action.search.SearchAction +import org.opensearch.action.search.SearchRequest +import org.opensearch.action.search.SearchResponse +import org.opensearch.action.support.ActionFilters +import org.opensearch.action.support.HandledTransportAction +import org.opensearch.alerting.AlertService +import org.opensearch.alerting.MonitorRunnerService +import org.opensearch.alerting.TriggerService +import org.opensearch.alerting.action.DocLevelMonitorFanOutAction +import org.opensearch.alerting.action.DocLevelMonitorFanOutRequest +import org.opensearch.alerting.action.DocLevelMonitorFanOutResponse +import org.opensearch.alerting.action.GetDestinationsAction +import org.opensearch.alerting.action.GetDestinationsRequest +import org.opensearch.alerting.action.GetDestinationsResponse +import org.opensearch.alerting.model.ActionRunResult +import org.opensearch.alerting.model.AlertContext +import org.opensearch.alerting.model.DocumentLevelTriggerRunResult +import org.opensearch.alerting.model.IndexExecutionContext +import org.opensearch.alerting.model.InputRunResults +import org.opensearch.alerting.model.MonitorMetadata +import org.opensearch.alerting.model.MonitorRunResult +import org.opensearch.alerting.model.destination.Destination +import org.opensearch.alerting.model.destination.DestinationContextFactory +import org.opensearch.alerting.model.userErrorMessage +import org.opensearch.alerting.opensearchapi.InjectorContextElement +import org.opensearch.alerting.opensearchapi.convertToMap +import org.opensearch.alerting.opensearchapi.suspendUntil +import org.opensearch.alerting.opensearchapi.withClosableContext +import org.opensearch.alerting.script.DocumentLevelTriggerExecutionContext +import org.opensearch.alerting.script.QueryLevelTriggerExecutionContext +import org.opensearch.alerting.script.TriggerExecutionContext +import org.opensearch.alerting.settings.AlertingSettings +import org.opensearch.alerting.settings.AlertingSettings.Companion.ALERT_BACKOFF_COUNT +import org.opensearch.alerting.settings.AlertingSettings.Companion.ALERT_BACKOFF_MILLIS +import org.opensearch.alerting.settings.AlertingSettings.Companion.DOC_LEVEL_MONITOR_FETCH_ONLY_QUERY_FIELDS_ENABLED +import org.opensearch.alerting.settings.AlertingSettings.Companion.DOC_LEVEL_MONITOR_SHARD_FETCH_SIZE +import org.opensearch.alerting.settings.AlertingSettings.Companion.FINDINGS_INDEXING_BATCH_SIZE +import org.opensearch.alerting.settings.AlertingSettings.Companion.MAX_ACTIONABLE_ALERT_COUNT +import org.opensearch.alerting.settings.AlertingSettings.Companion.PERCOLATE_QUERY_DOCS_SIZE_MEMORY_PERCENTAGE_LIMIT +import org.opensearch.alerting.settings.AlertingSettings.Companion.PERCOLATE_QUERY_MAX_NUM_DOCS_IN_MEMORY +import org.opensearch.alerting.settings.DestinationSettings +import org.opensearch.alerting.util.AlertingException +import org.opensearch.alerting.util.defaultToPerExecutionAction +import org.opensearch.alerting.util.destinationmigration.NotificationActionConfigs +import org.opensearch.alerting.util.destinationmigration.NotificationApiUtils +import org.opensearch.alerting.util.destinationmigration.getTitle +import org.opensearch.alerting.util.destinationmigration.publishLegacyNotification +import org.opensearch.alerting.util.destinationmigration.sendNotification +import org.opensearch.alerting.util.getActionExecutionPolicy +import org.opensearch.alerting.util.isAllowed +import org.opensearch.alerting.util.isTestAction +import org.opensearch.alerting.util.parseSampleDocTags +import org.opensearch.alerting.util.printsSampleDocData +import org.opensearch.alerting.util.use +import org.opensearch.alerting.workflow.WorkflowRunContext +import org.opensearch.client.Client +import org.opensearch.client.node.NodeClient +import org.opensearch.cluster.routing.Preference +import org.opensearch.cluster.service.ClusterService +import org.opensearch.common.inject.Inject +import org.opensearch.common.settings.Settings +import org.opensearch.common.xcontent.XContentFactory +import org.opensearch.common.xcontent.XContentType +import org.opensearch.commons.alerting.AlertingPluginInterface +import org.opensearch.commons.alerting.action.PublishFindingsRequest +import org.opensearch.commons.alerting.action.SubscribeFindingsResponse +import org.opensearch.commons.alerting.model.ActionExecutionResult +import org.opensearch.commons.alerting.model.Alert +import org.opensearch.commons.alerting.model.DocLevelMonitorInput +import org.opensearch.commons.alerting.model.DocLevelQuery +import org.opensearch.commons.alerting.model.DocumentLevelTrigger +import org.opensearch.commons.alerting.model.Finding +import org.opensearch.commons.alerting.model.Monitor +import org.opensearch.commons.alerting.model.Table +import org.opensearch.commons.alerting.model.action.Action +import org.opensearch.commons.alerting.model.action.PerAlertActionScope +import org.opensearch.commons.alerting.util.string +import org.opensearch.commons.notifications.model.NotificationConfigInfo +import org.opensearch.core.action.ActionListener +import org.opensearch.core.common.Strings +import org.opensearch.core.common.bytes.BytesReference +import org.opensearch.core.rest.RestStatus +import org.opensearch.core.xcontent.NamedXContentRegistry +import org.opensearch.core.xcontent.ToXContent +import org.opensearch.core.xcontent.XContentBuilder +import org.opensearch.index.query.BoolQueryBuilder +import org.opensearch.index.query.Operator +import org.opensearch.index.query.QueryBuilders +import org.opensearch.index.seqno.SequenceNumbers +import org.opensearch.indices.IndexClosedException +import org.opensearch.monitor.jvm.JvmStats +import org.opensearch.percolator.PercolateQueryBuilderExt +import org.opensearch.script.Script +import org.opensearch.script.ScriptService +import org.opensearch.script.TemplateScript +import org.opensearch.search.SearchHit +import org.opensearch.search.SearchHits +import org.opensearch.search.builder.SearchSourceBuilder +import org.opensearch.search.fetch.subphase.FetchSourceContext +import org.opensearch.search.sort.SortOrder +import org.opensearch.tasks.Task +import org.opensearch.transport.TransportService +import java.io.IOException +import java.time.Instant +import java.util.UUID +import java.util.stream.Collectors + +private val log = LogManager.getLogger(TransportDocLevelMonitorFanOutAction::class.java) +private val scope: CoroutineScope = CoroutineScope(Dispatchers.IO) + +class TransportDocLevelMonitorFanOutAction +@Inject constructor( + transportService: TransportService, + val client: Client, + val actionFilters: ActionFilters, + val clusterService: ClusterService, + val triggerService: TriggerService, + val alertService: AlertService, + val scriptService: ScriptService, + val settings: Settings, + val xContentRegistry: NamedXContentRegistry +) : HandledTransportAction( + DocLevelMonitorFanOutAction.NAME, transportService, actionFilters, ::DocLevelMonitorFanOutRequest +), + SecureTransportAction { + var nonPercolateSearchesTimeTakenStat = 0L + var percolateQueriesTimeTakenStat = 0L + var totalDocsQueriedStat = 0L + var docTransformTimeTakenStat = 0L + var totalDocsSizeInBytesStat = 0L + var docsSizeOfBatchInBytes = 0L + var findingsToTriggeredQueries: Map> = mutableMapOf() + + @Volatile var percQueryMaxNumDocsInMemory: Int = PERCOLATE_QUERY_MAX_NUM_DOCS_IN_MEMORY.get(settings) + @Volatile var percQueryDocsSizeMemoryPercentageLimit: Int = PERCOLATE_QUERY_DOCS_SIZE_MEMORY_PERCENTAGE_LIMIT.get(settings) + @Volatile var docLevelMonitorShardFetchSize: Int = DOC_LEVEL_MONITOR_SHARD_FETCH_SIZE.get(settings) + @Volatile var findingsIndexBatchSize: Int = FINDINGS_INDEXING_BATCH_SIZE.get(settings) + @Volatile var maxActionableAlertCount: Long = MAX_ACTIONABLE_ALERT_COUNT.get(settings) + @Volatile var retryPolicy = BackoffPolicy.constantBackoff(ALERT_BACKOFF_MILLIS.get(settings), ALERT_BACKOFF_COUNT.get(settings)) + @Volatile var allowList: List = DestinationSettings.ALLOW_LIST.get(settings) + @Volatile var fetchOnlyQueryFieldNames = DOC_LEVEL_MONITOR_FETCH_ONLY_QUERY_FIELDS_ENABLED.get(settings) + + init { + clusterService.clusterSettings.addSettingsUpdateConsumer(PERCOLATE_QUERY_MAX_NUM_DOCS_IN_MEMORY) { + percQueryMaxNumDocsInMemory = it + } + clusterService.clusterSettings.addSettingsUpdateConsumer(PERCOLATE_QUERY_DOCS_SIZE_MEMORY_PERCENTAGE_LIMIT) { + percQueryDocsSizeMemoryPercentageLimit = it + } + clusterService.clusterSettings.addSettingsUpdateConsumer(DOC_LEVEL_MONITOR_SHARD_FETCH_SIZE) { + docLevelMonitorShardFetchSize = it + } + clusterService.clusterSettings.addSettingsUpdateConsumer(FINDINGS_INDEXING_BATCH_SIZE) { + findingsIndexBatchSize = it + } + clusterService.clusterSettings.addSettingsUpdateConsumer(MAX_ACTIONABLE_ALERT_COUNT) { + maxActionableAlertCount = it + } + clusterService.clusterSettings.addSettingsUpdateConsumer(ALERT_BACKOFF_MILLIS, ALERT_BACKOFF_COUNT) { millis, count -> + retryPolicy = BackoffPolicy.constantBackoff(millis, count) + } + clusterService.clusterSettings.addSettingsUpdateConsumer(DestinationSettings.ALLOW_LIST) { + allowList = it + } + clusterService.clusterSettings.addSettingsUpdateConsumer(DOC_LEVEL_MONITOR_FETCH_ONLY_QUERY_FIELDS_ENABLED) { + fetchOnlyQueryFieldNames = it + } + } + + @Volatile + override var filterByEnabled = AlertingSettings.FILTER_BY_BACKEND_ROLES.get(settings) + + override fun doExecute( + task: Task, + request: DocLevelMonitorFanOutRequest, + listener: ActionListener + ) { + scope.launch { + executeMonitor(request, listener) + } + } + + private suspend fun executeMonitor( + request: DocLevelMonitorFanOutRequest, + listener: ActionListener + ) { + try { + val monitor = request.monitor + var monitorResult = MonitorRunResult(monitor.name, Instant.now(), Instant.now()) + val updatedIndexNames = request.indexExecutionContext.updatedIndexNames + val monitorMetadata = request.monitorMetadata + val shardIds = request.shardIds + val indexExecutionContext = request.indexExecutionContext + val concreteIndicesSeenSoFar = request.concreteIndicesSeenSoFar + val dryrun = request.dryRun + val executionId = request.executionId + val workflowRunContext = request.workflowRunContext + + val queryToDocIds = mutableMapOf>() + val inputRunResults = mutableMapOf>() + val docsToQueries = mutableMapOf>() + val transformedDocs = mutableListOf>() + val findingIdToDocSource = mutableMapOf() + val isTempMonitor = dryrun || monitor.id == Monitor.NO_ID + + val docLevelMonitorInput = request.monitor.inputs[0] as DocLevelMonitorInput + val queries: List = docLevelMonitorInput.queries + val fieldsToBeQueried = mutableSetOf() + if (fetchOnlyQueryFieldNames) { + for (it in queries) { + if (it.queryFieldNames.isEmpty()) { + fieldsToBeQueried.clear() + log.debug( + "Monitor ${request.monitor.id} : " + + "Doc Level query ${it.id} : ${it.query} doesn't have queryFieldNames populated. " + + "Cannot optimize monitor to fetch only query-relevant fields. " + + "Querying entire doc source." + ) + break + } + fieldsToBeQueried.addAll(it.queryFieldNames) + } + if (fieldsToBeQueried.isNotEmpty()) { + log.debug( + "Monitor ${monitor.id} Querying only fields " + + "${fieldsToBeQueried.joinToString()} instead of entire _source of documents" + ) + } + } + + fetchShardDataAndMaybeExecutePercolateQueries( + monitor, + indexExecutionContext, + monitorMetadata, + inputRunResults, + docsToQueries, + updatedIndexNames, + concreteIndicesSeenSoFar, + ArrayList(fieldsToBeQueried), + shardIds.map { it.id }, + transformedDocs + ) { shard, maxSeqNo -> // function passed to update last run context with new max sequence number + indexExecutionContext.updatedLastRunContext[shard] = maxSeqNo + } + if (transformedDocs.isNotEmpty()) { + performPercolateQueryAndResetCounters( + monitor, + monitorMetadata, + updatedIndexNames, + concreteIndicesSeenSoFar, + inputRunResults, + docsToQueries, + transformedDocs + ) + } + monitorResult = monitorResult.copy(inputResults = InputRunResults(listOf(inputRunResults))) + + /* + populate the map queryToDocIds with pairs of + this fixes the issue of passing id, name, tags fields of DocLevelQuery object correctly to TriggerExpressionParser + */ + queries.forEach { + if (inputRunResults.containsKey(it.id)) { + queryToDocIds[it] = inputRunResults[it.id]!! + } + } + + val idQueryMap: Map = queries.associateBy { it.id } + + val triggerResults = mutableMapOf() + // If there are no triggers defined, we still want to generate findings + if (monitor.triggers.isEmpty()) { + if (dryrun == false && monitor.id != Monitor.NO_ID) { + createFindings(monitor, docsToQueries, idQueryMap, true) + } + } else { + monitor.triggers.forEach { + triggerResults[it.id] = runForEachDocTrigger( + monitorResult, + it as DocumentLevelTrigger, + monitor, + idQueryMap, + docsToQueries, + queryToDocIds, + dryrun, + executionId = executionId, + findingIdToDocSource, + workflowRunContext = workflowRunContext + ) + } + } + + if (!isTempMonitor) { + // If any error happened during trigger execution, upsert monitor error alert + val errorMessage = constructErrorMessageFromTriggerResults(triggerResults = triggerResults) + log.info(errorMessage) + if (errorMessage.isNotEmpty()) { + alertService.upsertMonitorErrorAlert( + monitor = monitor, + errorMessage = errorMessage, + executionId = executionId, + workflowRunContext + ) + } else { + onSuccessfulMonitorRun(monitor) + } + } + + listener.onResponse( + DocLevelMonitorFanOutResponse( + nodeId = clusterService.localNode().id, + executionId = request.executionId, + monitorId = monitor.id, + indexExecutionContext.updatedLastRunContext, + InputRunResults(listOf(inputRunResults)), + triggerResults + ) + ) + } catch (e: Exception) { + log.error("${request.monitor.id} Failed to run fan_out on node ${clusterService.localNode().id} due to error $e") + listener.onFailure(AlertingException.wrap(e)) + } + } + + private suspend fun runForEachDocTrigger( + monitorResult: MonitorRunResult, + trigger: DocumentLevelTrigger, + monitor: Monitor, + idQueryMap: Map, + docsToQueries: MutableMap>, + queryToDocIds: Map>, + dryrun: Boolean, + executionId: String, + findingIdToDocSource: MutableMap, + workflowRunContext: WorkflowRunContext? + ): DocumentLevelTriggerRunResult { + val triggerCtx = DocumentLevelTriggerExecutionContext(monitor, trigger) + val triggerResult = triggerService.runDocLevelTrigger(monitor, trigger, queryToDocIds) + + val triggerFindingDocPairs = mutableListOf>() + + // TODO: Implement throttling for findings + val findingToDocPairs = createFindings( + monitor, + docsToQueries, + idQueryMap, + !dryrun && monitor.id != Monitor.NO_ID, + executionId + ) + + findingToDocPairs.forEach { + // Only pick those entries whose docs have triggers associated with them + if (triggerResult.triggeredDocs.contains(it.second)) { + triggerFindingDocPairs.add(Pair(it.first, it.second)) + } + } + + val actionCtx = triggerCtx.copy( + triggeredDocs = triggerResult.triggeredDocs, + relatedFindings = findingToDocPairs.map { it.first }, + error = monitorResult.error ?: triggerResult.error + ) + + if (printsSampleDocData(trigger) && triggerFindingDocPairs.isNotEmpty()) + getDocSources( + findingToDocPairs = findingToDocPairs, + monitor = monitor, + findingIdToDocSource = findingIdToDocSource + ) + + val alerts = mutableListOf() + val alertContexts = mutableListOf() + triggerFindingDocPairs.forEach { + val alert = alertService.composeDocLevelAlert( + listOf(it.first), + listOf(it.second), + triggerCtx, + monitorResult.alertError() ?: triggerResult.alertError(), + executionId = executionId, + workflorwRunContext = workflowRunContext + ) + alerts.add(alert) + + val docSource = findingIdToDocSource[alert.findingIds.first()]?.response?.convertToMap() + + alertContexts.add( + AlertContext( + alert = alert, + associatedQueries = alert.findingIds.flatMap { findingId -> + findingsToTriggeredQueries.getOrDefault(findingId, emptyList()) ?: emptyList() + }, + sampleDocs = listOfNotNull(docSource) + ) + ) + } + + val shouldDefaultToPerExecution = defaultToPerExecutionAction( + maxActionableAlertCount, + monitorId = monitor.id, + triggerId = trigger.id, + totalActionableAlertCount = alerts.size, + monitorOrTriggerError = actionCtx.error + ) + + for (action in trigger.actions) { + val actionExecutionScope = action.getActionExecutionPolicy(monitor)!!.actionExecutionScope + if (actionExecutionScope is PerAlertActionScope && !shouldDefaultToPerExecution) { + for (alertContext in alertContexts) { + val actionResults = this.runAction(action, actionCtx.copy(alerts = listOf(alertContext)), monitor, dryrun) + triggerResult.actionResultsMap.getOrPut(alertContext.alert.id) { mutableMapOf() } + triggerResult.actionResultsMap[alertContext.alert.id]?.set(action.id, actionResults) + } + } else if (alertContexts.isNotEmpty()) { + val actionResults = this.runAction(action, actionCtx.copy(alerts = alertContexts), monitor, dryrun) + for (alert in alerts) { + triggerResult.actionResultsMap.getOrPut(alert.id) { mutableMapOf() } + triggerResult.actionResultsMap[alert.id]?.set(action.id, actionResults) + } + } + } + + // Alerts are saved after the actions since if there are failures in the actions, they can be stated in the alert + if (!dryrun && monitor.id != Monitor.NO_ID) { + val updatedAlerts = alerts.map { alert -> + val actionResults = triggerResult.actionResultsMap.getOrDefault(alert.id, emptyMap()) + val actionExecutionResults = actionResults.values.map { actionRunResult -> + ActionExecutionResult(actionRunResult.actionId, actionRunResult.executionTime, if (actionRunResult.throttled) 1 else 0) + } + alert.copy(actionExecutionResults = actionExecutionResults) + } + + retryPolicy.let { + alertService.saveAlerts( + monitor.dataSources, + updatedAlerts, + it, + routingId = monitor.id + ) + } + } + return triggerResult + } + + /** + * 1. Bulk index all findings based on shouldCreateFinding flag + * 2. invoke publishFinding() to kickstart auto-correlations + * 3. Returns a list of pairs for finding id to doc id + */ + private suspend fun createFindings( + monitor: Monitor, + docsToQueries: MutableMap>, + idQueryMap: Map, + shouldCreateFinding: Boolean, + workflowExecutionId: String? = null, + ): List> { + + val findingDocPairs = mutableListOf>() + val findings = mutableListOf() + val indexRequests = mutableListOf() + val findingsToTriggeredQueries = mutableMapOf>() + + docsToQueries.forEach { + val triggeredQueries = it.value.map { queryId -> idQueryMap[queryId]!! } + + // Before the "|" is the doc id and after the "|" is the index + val docIndex = it.key.split("|") + + val finding = Finding( + id = UUID.randomUUID().toString(), + relatedDocIds = listOf(docIndex[0]), + correlatedDocIds = listOf(docIndex[0]), + monitorId = monitor.id, + monitorName = monitor.name, + index = docIndex[1], + docLevelQueries = triggeredQueries, + timestamp = Instant.now(), + executionId = workflowExecutionId + ) + findingDocPairs.add(Pair(finding.id, it.key)) + findings.add(finding) + findingsToTriggeredQueries[finding.id] = triggeredQueries + + val findingStr = + finding.toXContent(XContentBuilder.builder(XContentType.JSON.xContent()), ToXContent.EMPTY_PARAMS) + .string() + log.debug("Findings: $findingStr") + + if (shouldCreateFinding) { + indexRequests += IndexRequest(monitor.dataSources.findingsIndex) + .source(findingStr, XContentType.JSON) + .id(finding.id) + .opType(DocWriteRequest.OpType.CREATE) + } + } + + if (indexRequests.isNotEmpty()) { + bulkIndexFindings(monitor, indexRequests) + } + + try { + findings.forEach { finding -> + publishFinding(monitor, finding) + } + } catch (e: Exception) { + // suppress exception + log.error("Optional finding callback failed", e) + } + this.findingsToTriggeredQueries += findingsToTriggeredQueries + + return findingDocPairs + } + + private suspend fun bulkIndexFindings( + monitor: Monitor, + indexRequests: List + ) { + indexRequests.chunked(findingsIndexBatchSize).forEach { batch -> + val bulkResponse: BulkResponse = client.suspendUntil { + bulk(BulkRequest().add(batch), it) + } + if (bulkResponse.hasFailures()) { + bulkResponse.items.forEach { item -> + if (item.isFailed) { + log.error("Failed indexing the finding ${item.id} of monitor [${monitor.id}]") + } + } + } else { + log.debug("[${bulkResponse.items.size}] All findings successfully indexed.") + } + } + client.execute(RefreshAction.INSTANCE, RefreshRequest(monitor.dataSources.findingsIndex)) + } + + private fun publishFinding( + monitor: Monitor, + finding: Finding + ) { + val publishFindingsRequest = PublishFindingsRequest(monitor.id, finding) + AlertingPluginInterface.publishFinding( + client as NodeClient, + publishFindingsRequest, + object : ActionListener { + override fun onResponse(response: SubscribeFindingsResponse) {} + + override fun onFailure(e: Exception) {} + } + ) + } + + suspend fun runAction( + action: Action, + ctx: TriggerExecutionContext, + monitor: Monitor, + dryrun: Boolean + ): ActionRunResult { + return try { + if (ctx is QueryLevelTriggerExecutionContext && !MonitorRunnerService.isActionActionable(action, ctx.alert)) { + return ActionRunResult(action.id, action.name, mapOf(), true, null, null) + } + val actionOutput = mutableMapOf() + actionOutput[Action.SUBJECT] = if (action.subjectTemplate != null) + compileTemplate(action.subjectTemplate!!, ctx) + else "" + actionOutput[Action.MESSAGE] = compileTemplate(action.messageTemplate, ctx) + if (Strings.isNullOrEmpty(actionOutput[Action.MESSAGE])) { + throw IllegalStateException("Message content missing in the Destination with id: ${action.destinationId}") + } + if (!dryrun) { + client.threadPool().threadContext.stashContext().use { + withClosableContext( + InjectorContextElement( + monitor.id, + settings, + client.threadPool().threadContext, + monitor.user?.roles, + monitor.user + ) + ) { + actionOutput[Action.MESSAGE_ID] = getConfigAndSendNotification( + action, + actionOutput[Action.SUBJECT], + actionOutput[Action.MESSAGE]!! + ) + } + } + } + ActionRunResult( + action.id, + action.name, + actionOutput, + false, + Instant.ofEpochMilli(client.threadPool().absoluteTimeInMillis()), + null + ) + } catch (e: Exception) { + ActionRunResult(action.id, action.name, mapOf(), false, Instant.ofEpochMilli(client.threadPool().absoluteTimeInMillis()), e) + } + } + + protected suspend fun getConfigAndSendNotification( + action: Action, + subject: String?, + message: String + ): String { + val config = getConfigForNotificationAction(action) + if (config.destination == null && config.channel == null) { + throw IllegalStateException("Unable to find a Notification Channel or Destination config with id [${action.destinationId}]") + } + + // Adding a check on TEST_ACTION Destination type here to avoid supporting it as a LegacyBaseMessage type + // just for Alerting integration tests + if (config.destination?.isTestAction() == true) { + return "test action" + } + + if (config.destination?.isAllowed(allowList) == false) { + throw IllegalStateException( + "Monitor contains a Destination type that is not allowed: ${config.destination.type}" + ) + } + + var actionResponseContent = "" + actionResponseContent = config.channel + ?.sendNotification( + client, + config.channel.getTitle(subject), + message + ) ?: actionResponseContent + + actionResponseContent = config.destination + ?.buildLegacyBaseMessage(subject, message, getDestinationContextFactory().getDestinationContext(config.destination)) + ?.publishLegacyNotification(client) + ?: actionResponseContent + + return actionResponseContent + } + + /** 1. Fetch data per shard for given index. (only 10000 docs are fetched. + * needs to be converted to scroll if not performant enough) + * 2. Transform documents to conform to format required for percolate query + * 3a. Check if docs in memory are crossing threshold defined by setting. + * 3b. If yes, perform percolate query and update docToQueries Map with all hits from percolate queries */ + private suspend fun fetchShardDataAndMaybeExecutePercolateQueries( + monitor: Monitor, + indexExecutionCtx: IndexExecutionContext, + monitorMetadata: MonitorMetadata, + inputRunResults: MutableMap>, + docsToQueries: MutableMap>, + monitorInputIndices: List, + concreteIndices: List, + fieldsToBeQueried: List, + shardList: List, + transformedDocs: MutableList>, + updateLastRunContext: (String, String) -> Unit + ) { + for (shardId in shardList) { + val shard = shardId.toString() + try { + val prevSeqNo = indexExecutionCtx.lastRunContext[shard].toString().toLongOrNull() + val from = prevSeqNo ?: SequenceNumbers.NO_OPS_PERFORMED + var to: Long = Long.MAX_VALUE + while (to >= from) { + val hits: SearchHits = searchShard( + indexExecutionCtx.concreteIndexName, + shard, + from, + to, + indexExecutionCtx.docIds, + fieldsToBeQueried, + ) + if (hits.hits.isEmpty()) { + if (to == Long.MAX_VALUE) { + updateLastRunContext(shard, (prevSeqNo ?: SequenceNumbers.NO_OPS_PERFORMED).toString()) // didn't find any docs + } + break + } + if (to == Long.MAX_VALUE) { // max sequence number of shard needs to be computed + updateLastRunContext(shard, hits.hits[0].seqNo.toString()) + } + val leastSeqNoFromHits = hits.hits.last().seqNo + to = leastSeqNoFromHits - 1 + val startTime = System.currentTimeMillis() + transformedDocs.addAll( + transformSearchHitsAndReconstructDocs( + hits, + indexExecutionCtx.indexName, + indexExecutionCtx.concreteIndexName, + monitor.id, + indexExecutionCtx.conflictingFields, + ) + ) + if ( + transformedDocs.isNotEmpty() && + shouldPerformPercolateQueryAndFlushInMemoryDocs(transformedDocs.size) + ) { + performPercolateQueryAndResetCounters( + monitor, + monitorMetadata, + monitorInputIndices, + concreteIndices, + inputRunResults, + docsToQueries, + transformedDocs + ) + } + docTransformTimeTakenStat += System.currentTimeMillis() - startTime + } + } catch (e: Exception) { + log.error( + "Monitor ${monitor.id} :" + + "Failed to run fetch data from shard [$shard] of index [${indexExecutionCtx.concreteIndexName}]. " + + "Error: ${e.message}", + e + ) + if (e is IndexClosedException) { + throw e + } + } + if ( + transformedDocs.isNotEmpty() && + shouldPerformPercolateQueryAndFlushInMemoryDocs(transformedDocs.size) + ) { + performPercolateQueryAndResetCounters( + monitor, + monitorMetadata, + monitorInputIndices, + concreteIndices, + inputRunResults, + docsToQueries, + transformedDocs + ) + } + } + } + + private suspend fun performPercolateQueryAndResetCounters( + monitor: Monitor, + monitorMetadata: MonitorMetadata, + monitorInputIndices: List, + concreteIndices: List, + inputRunResults: MutableMap>, + docsToQueries: MutableMap>, + transformedDocs: MutableList> + ) { + try { + val percolateQueryResponseHits = runPercolateQueryOnTransformedDocs( + transformedDocs, + monitor, + monitorMetadata, + concreteIndices, + monitorInputIndices, + ) + + percolateQueryResponseHits.forEach { hit -> + var id = hit.id + concreteIndices.forEach { id = id.replace("_${it}_${monitor.id}", "") } + monitorInputIndices.forEach { id = id.replace("_${it}_${monitor.id}", "") } + val docIndices = hit.field("_percolator_document_slot").values.map { it.toString().toInt() } + docIndices.forEach { idx -> + val docIndex = "${transformedDocs[idx].first}|${transformedDocs[idx].second.concreteIndexName}" + inputRunResults.getOrPut(id) { mutableSetOf() }.add(docIndex) + docsToQueries.getOrPut(docIndex) { mutableListOf() }.add(id) + } + } + totalDocsQueriedStat += transformedDocs.size.toLong() + } finally { + transformedDocs.clear() + docsSizeOfBatchInBytes = 0 + } + } + + /** Executes percolate query on the docs against the monitor's query index and return the hits from the search response*/ + private suspend fun runPercolateQueryOnTransformedDocs( + docs: MutableList>, + monitor: Monitor, + monitorMetadata: MonitorMetadata, + concreteIndices: List, + monitorInputIndices: List, + ): SearchHits { + val indices = docs.stream().map { it.second.indexName }.distinct().collect(Collectors.toList()) + val boolQueryBuilder = BoolQueryBuilder().must(buildShouldClausesOverPerIndexMatchQueries(indices)) + val percolateQueryBuilder = + PercolateQueryBuilderExt("query", docs.map { it.second.docSource }, XContentType.JSON) + if (monitor.id.isNotEmpty()) { + boolQueryBuilder.must(QueryBuilders.matchQuery("monitor_id", monitor.id).operator(Operator.AND)) + } + boolQueryBuilder.filter(percolateQueryBuilder) + val queryIndices = + docs.map { monitorMetadata.sourceToQueryIndexMapping[it.second.indexName + monitor.id] }.distinct() + if (queryIndices.isEmpty()) { + val message = + "Monitor ${monitor.id}: Failed to resolve query Indices from source indices during monitor execution!" + + " sourceIndices: $monitorInputIndices" + log.error(message) + throw AlertingException.wrap( + OpenSearchStatusException(message, RestStatus.INTERNAL_SERVER_ERROR) + ) + } + + val searchRequest = + SearchRequest().indices(*queryIndices.toTypedArray()).preference(Preference.PRIMARY_FIRST.type()) + val searchSourceBuilder = SearchSourceBuilder() + searchSourceBuilder.query(boolQueryBuilder) + searchRequest.source(searchSourceBuilder) + log.debug( + "Monitor ${monitor.id}: " + + "Executing percolate query for docs from source indices " + + "$monitorInputIndices against query index $queryIndices" + ) + var response: SearchResponse + try { + response = client.suspendUntil { + client.execute(SearchAction.INSTANCE, searchRequest, it) + } + } catch (e: Exception) { + throw IllegalStateException( + "Monitor ${monitor.id}:" + + " Failed to run percolate search for sourceIndex [${concreteIndices.joinToString()}] " + + "and queryIndex [${queryIndices.joinToString()}] for ${docs.size} document(s)", + e + ) + } + + if (response.status() !== RestStatus.OK) { + throw IOException( + "Monitor ${monitor.id}: Failed to search percolate index: ${queryIndices.joinToString()}. " + + "Response status is ${response.status()}" + ) + } + log.debug("Monitor ${monitor.id} PERF_DEBUG: Percolate query time taken millis = ${response.took}") + percolateQueriesTimeTakenStat += response.took.millis + return response.hits + } + + /** we cannot use terms query because `index` field's mapping is of type TEXT and not keyword. Refer doc-level-queries.json*/ + private fun buildShouldClausesOverPerIndexMatchQueries(indices: List): BoolQueryBuilder { + val boolQueryBuilder = QueryBuilders.boolQuery() + indices.forEach { boolQueryBuilder.should(QueryBuilders.matchQuery("index", it)) } + return boolQueryBuilder + } + + /** Executes search query on given shard of given index to fetch docs with sequene number greater than prevSeqNo. + * This method hence fetches only docs from shard which haven't been queried before + */ + private suspend fun searchShard( + index: String, + shard: String, + prevSeqNo: Long?, + maxSeqNo: Long, + docIds: List? = null, + fieldsToFetch: List, + ): SearchHits { + if (prevSeqNo?.equals(maxSeqNo) == true && maxSeqNo != 0L) { + return SearchHits.empty() + } + val boolQueryBuilder = BoolQueryBuilder() + boolQueryBuilder.filter(QueryBuilders.rangeQuery("_seq_no").gt(prevSeqNo).lte(maxSeqNo)) + + if (!docIds.isNullOrEmpty()) { + boolQueryBuilder.filter(QueryBuilders.termsQuery("_id", docIds)) + } + + val request: SearchRequest = SearchRequest() + .indices(index) + .preference("_shards:$shard") + .source( + SearchSourceBuilder() + .version(true) + .sort("_seq_no", SortOrder.DESC) + .seqNoAndPrimaryTerm(true) + .query(boolQueryBuilder) + .size(docLevelMonitorShardFetchSize) + ) + + if (fieldsToFetch.isNotEmpty() && fetchOnlyQueryFieldNames) { + request.source().fetchSource(false) + for (field in fieldsToFetch) { + request.source().fetchField(field) + } + } + val response: SearchResponse = client.suspendUntil { client.search(request, it) } + if (response.status() !== RestStatus.OK) { + throw IOException("Failed to search shard: [$shard] in index [$index]. Response status is ${response.status()}") + } + nonPercolateSearchesTimeTakenStat += response.took.millis + return response.hits + } + + /** Transform field names and index names in all the search hits to format required to run percolate search against them. + * Hits are transformed using method transformDocumentFieldNames() */ + private fun transformSearchHitsAndReconstructDocs( + hits: SearchHits, + index: String, + concreteIndex: String, + monitorId: String, + conflictingFields: List, + ): List> { + return hits.mapNotNull(fun(hit: SearchHit): Pair? { + try { + val sourceMap = if (hit.hasSource()) { + hit.sourceAsMap + } else { + constructSourceMapFromFieldsInHit(hit) + } + transformDocumentFieldNames( + sourceMap, + conflictingFields, + "_${index}_$monitorId", + "_${concreteIndex}_$monitorId", + "" + ) + var xContentBuilder = XContentFactory.jsonBuilder().map(sourceMap) + val sourceRef = BytesReference.bytes(xContentBuilder) + docsSizeOfBatchInBytes += sourceRef.ramBytesUsed() + totalDocsSizeInBytesStat += sourceRef.ramBytesUsed() + return Pair( + hit.id, + TransformedDocDto(index, concreteIndex, hit.id, sourceRef) + ) + } catch (e: Exception) { + log.error("Monitor $monitorId: Failed to transform payload $hit for percolate query", e) + // skip any document which we fail to transform because we anyway won't be able to run percolate queries on them. + return null + } + }) + } + + private fun constructSourceMapFromFieldsInHit(hit: SearchHit): MutableMap { + if (hit.fields == null) + return mutableMapOf() + val sourceMap: MutableMap = mutableMapOf() + for (field in hit.fields) { + if (field.value.values != null && field.value.values.isNotEmpty()) + if (field.value.values.size == 1) { + sourceMap[field.key] = field.value.values[0] + } else sourceMap[field.key] = field.value.values + } + return sourceMap + } + + /** + * Traverses document fields in leaves recursively and appends [fieldNameSuffixIndex] to field names with same names + * but different mappings & [fieldNameSuffixPattern] to field names which have unique names. + * + * Example for index name is my_log_index and Monitor ID is TReewWdsf2gdJFV: + * { { + * "a": { "a": { + * "b": 1234 ----> "b_my_log_index_TReewWdsf2gdJFV": 1234 + * } } + * } + * + * @param jsonAsMap Input JSON (as Map) + * @param fieldNameSuffix Field suffix which is appended to existing field name + */ + private fun transformDocumentFieldNames( + jsonAsMap: MutableMap, + conflictingFields: List, + fieldNameSuffixPattern: String, + fieldNameSuffixIndex: String, + fieldNamePrefix: String + ) { + val tempMap = mutableMapOf() + val it: MutableIterator> = jsonAsMap.entries.iterator() + while (it.hasNext()) { + val entry = it.next() + if (entry.value is Map<*, *>) { + transformDocumentFieldNames( + entry.value as MutableMap, + conflictingFields, + fieldNameSuffixPattern, + fieldNameSuffixIndex, + if (fieldNamePrefix == "") entry.key else "$fieldNamePrefix.${entry.key}" + ) + } else if (!entry.key.endsWith(fieldNameSuffixPattern) && !entry.key.endsWith(fieldNameSuffixIndex)) { + var alreadyReplaced = false + conflictingFields.forEach { conflictingField -> + if (conflictingField == "$fieldNamePrefix.${entry.key}" || (fieldNamePrefix == "" && conflictingField == entry.key)) { + tempMap["${entry.key}$fieldNameSuffixIndex"] = entry.value + it.remove() + alreadyReplaced = true + } + } + if (!alreadyReplaced) { + tempMap["${entry.key}$fieldNameSuffixPattern"] = entry.value + it.remove() + } + } + } + jsonAsMap.putAll(tempMap) + } + + private fun shouldPerformPercolateQueryAndFlushInMemoryDocs( + numDocs: Int + ): Boolean { + return isInMemoryDocsSizeExceedingMemoryLimit(docsSizeOfBatchInBytes) || + isInMemoryNumDocsExceedingMaxDocsPerPercolateQueryLimit(numDocs) + } + + /** + * Returns true, if the docs fetched from shards thus far amount to less than threshold + * amount of percentage (default:10. setting is dynamic and configurable) of the total heap size or not. + * + */ + private fun isInMemoryDocsSizeExceedingMemoryLimit(docsBytesSize: Long): Boolean { + var thresholdPercentage = percQueryDocsSizeMemoryPercentageLimit + val heapMaxBytes = JvmStats.jvmStats().mem.heapMax.bytes + val thresholdBytes = (thresholdPercentage.toDouble() / 100.0) * heapMaxBytes + + return docsBytesSize > thresholdBytes + } + + private fun isInMemoryNumDocsExceedingMaxDocsPerPercolateQueryLimit(numDocs: Int): Boolean { + var maxNumDocsThreshold = percQueryMaxNumDocsInMemory + return numDocs >= maxNumDocsThreshold + } + + /** + * Performs an mGet request to retrieve the documents associated with findings. + * + * When possible, this will only retrieve the document fields that are specifically + * referenced for printing in the mustache template. + */ + private suspend fun getDocSources( + findingToDocPairs: List>, + monitor: Monitor, + findingIdToDocSource: MutableMap + ) { + val docFieldTags = parseSampleDocTags(monitor.triggers) + val request = MultiGetRequest() + + // Perform mGet request in batches. + findingToDocPairs.chunked(findingsIndexBatchSize).forEach { batch -> + batch.forEach { (findingId, docIdAndIndex) -> + val docIdAndIndexSplit = docIdAndIndex.split("|") + val docId = docIdAndIndexSplit[0] + val concreteIndex = docIdAndIndexSplit[1] + if (findingId.isNotEmpty() && docId.isNotEmpty() && concreteIndex.isNotEmpty()) { + val docItem = MultiGetRequest.Item(concreteIndex, docId) + if (docFieldTags.isNotEmpty()) + docItem.fetchSourceContext(FetchSourceContext(true, docFieldTags.toTypedArray(), emptyArray())) + request.add(docItem) + } + val response = client.suspendUntil { client.multiGet(request, it) } + response.responses.forEach { item -> + findingIdToDocSource[findingId] = item + } + } + } + } + + /** + * The "destination" ID referenced in a Monitor Action could either be a Notification config or a Destination config + * depending on whether the background migration process has already migrated it from a Destination to a Notification config. + * + * To cover both of these cases, the Notification config will take precedence and if it is not found, the Destination will be retrieved. + */ + private suspend fun getConfigForNotificationAction( + action: Action + ): NotificationActionConfigs { + var destination: Destination? = null + var notificationPermissionException: Exception? = null + + var channel: NotificationConfigInfo? = null + try { + channel = + NotificationApiUtils.getNotificationConfigInfo(client as NodeClient, action.destinationId) + } catch (e: OpenSearchSecurityException) { + notificationPermissionException = e + } + + // If the channel was not found, try to retrieve the Destination + if (channel == null) { + destination = try { + val table = Table( + "asc", + "destination.name.keyword", + null, + 1, + 0, + null + ) + val getDestinationsRequest = GetDestinationsRequest( + action.destinationId, + 0L, + null, + table, + "ALL" + ) + + val getDestinationsResponse: GetDestinationsResponse = client.suspendUntil { + client.execute(GetDestinationsAction.INSTANCE, getDestinationsRequest, it) + } + getDestinationsResponse.destinations.firstOrNull() + } catch (e: IllegalStateException) { + // Catching the exception thrown when the Destination was not found so the NotificationActionConfigs object can be returned + null + } catch (e: OpenSearchSecurityException) { + if (notificationPermissionException != null) + throw notificationPermissionException + else + throw e + } + + if (destination == null && notificationPermissionException != null) + throw notificationPermissionException + } + + return NotificationActionConfigs(destination, channel) + } + + private fun getDestinationContextFactory(): DestinationContextFactory { + val destinationSettings = DestinationSettings.loadDestinationSettings(settings) + return DestinationContextFactory(client, xContentRegistry, destinationSettings) + } + + private fun compileTemplate(template: Script, ctx: TriggerExecutionContext): String { + return scriptService.compile(template, TemplateScript.CONTEXT) + .newInstance(template.params + mapOf("ctx" to ctx.asTemplateArg())) + .execute() + } + + private suspend fun onSuccessfulMonitorRun(monitor: Monitor) { + alertService.clearMonitorErrorAlert(monitor) + if (monitor.dataSources.alertsHistoryIndex != null) { + alertService.moveClearedErrorAlertsToHistory( + monitor.id, + monitor.dataSources.alertsIndex, + monitor.dataSources.alertsHistoryIndex!! + ) + } + } + + private fun constructErrorMessageFromTriggerResults( + triggerResults: MutableMap? = null + ): String { + var errorMessage = "" + if (triggerResults != null) { + val triggersErrorBuilder = StringBuilder() + triggerResults.forEach { + if (it.value.error != null) { + triggersErrorBuilder.append("[${it.key}]: [${it.value.error!!.userErrorMessage()}]").append(" | ") + } + } + if (triggersErrorBuilder.isNotEmpty()) { + errorMessage = "Trigger errors: $triggersErrorBuilder" + } + } + return errorMessage + } + + /** + * POJO holding information about each doc's concrete index, id, input index pattern/alias/datastream name + * and doc source. A list of these POJOs would be passed to percolate query execution logic. + */ + data class TransformedDocDto( + var indexName: String, + var concreteIndexName: String, + var docId: String, + var docSource: BytesReference + ) +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorAction.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorAction.kt index b0de10ff0..3f01f470c 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorAction.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteMonitorAction.kt @@ -47,7 +47,7 @@ private val log = LogManager.getLogger(TransportExecuteMonitorAction::class.java private val scope: CoroutineScope = CoroutineScope(Dispatchers.IO) class TransportExecuteMonitorAction @Inject constructor( - transportService: TransportService, + private val transportService: TransportService, private val client: Client, private val clusterService: ClusterService, private val runner: MonitorRunnerService, @@ -72,14 +72,20 @@ class TransportExecuteMonitorAction @Inject constructor( // stored on the threadContext set by the security plugin when using the Alerting plugin with the Security plugin. // runner.launch(ElasticThreadContextElement(client.threadPool().threadContext)) { runner.launch { - val (periodStart, periodEnd) = + val (periodStart, periodEnd) = if (execMonitorRequest.requestStart != null) { + Pair( + Instant.ofEpochMilli(execMonitorRequest.requestStart.millis), + Instant.ofEpochMilli(execMonitorRequest.requestEnd.millis) + ) + } else { monitor.schedule.getPeriodEndingAt(Instant.ofEpochMilli(execMonitorRequest.requestEnd.millis)) + } try { log.info( "Executing monitor from API - id: ${monitor.id}, type: ${monitor.monitorType.name}, " + "periodStart: $periodStart, periodEnd: $periodEnd, dryrun: ${execMonitorRequest.dryrun}" ) - val monitorRunResult = runner.runJob(monitor, periodStart, periodEnd, execMonitorRequest.dryrun) + val monitorRunResult = runner.runJob(monitor, periodStart, periodEnd, execMonitorRequest.dryrun, transportService) withContext(Dispatchers.IO) { actionListener.onResponse(ExecuteMonitorResponse(monitorRunResult)) } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteWorkflowAction.kt b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteWorkflowAction.kt index 037628e9e..0d9fe527a 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteWorkflowAction.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/transport/TransportExecuteWorkflowAction.kt @@ -39,7 +39,7 @@ import java.time.Instant private val log = LogManager.getLogger(TransportExecuteWorkflowAction::class.java) class TransportExecuteWorkflowAction @Inject constructor( - transportService: TransportService, + private val transportService: TransportService, private val client: Client, private val runner: MonitorRunnerService, actionFilters: ActionFilters, @@ -59,15 +59,27 @@ class TransportExecuteWorkflowAction @Inject constructor( client.threadPool().threadContext.stashContext().use { val executeWorkflow = fun(workflow: Workflow) { runner.launch { - val (periodStart, periodEnd) = + val (periodStart, periodEnd) = if (execWorkflowRequest.requestStart != null) { + Pair( + Instant.ofEpochMilli(execWorkflowRequest.requestStart.millis), + Instant.ofEpochMilli(execWorkflowRequest.requestEnd.millis) + ) + } else { workflow.schedule.getPeriodEndingAt(Instant.ofEpochMilli(execWorkflowRequest.requestEnd.millis)) + } try { log.info( "Executing workflow from API - id: ${workflow.id}, periodStart: $periodStart, periodEnd: $periodEnd, " + "dryrun: ${execWorkflowRequest.dryrun}" ) val workflowRunResult = - MonitorRunnerService.runJob(workflow, periodStart, periodEnd, execWorkflowRequest.dryrun) + MonitorRunnerService.runJob( + workflow, + periodStart, + periodEnd, + execWorkflowRequest.dryrun, + transportService = transportService + ) withContext(Dispatchers.IO, { actionListener.onResponse( ExecuteWorkflowResponse( diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/util/AlertingException.kt b/alerting/src/main/kotlin/org/opensearch/alerting/util/AlertingException.kt index 4127afaa2..3ed7a8674 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/util/AlertingException.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/util/AlertingException.kt @@ -20,7 +20,7 @@ private val log = LogManager.getLogger(AlertingException::class.java) /** * Converts into a user friendly message. */ -class AlertingException(message: String, val status: RestStatus, ex: Exception) : OpenSearchException(message, ex) { +class AlertingException(message: String, val status: RestStatus, val ex: Exception) : OpenSearchException(message, ex) { override fun status(): RestStatus { return status @@ -69,5 +69,21 @@ class AlertingException(message: String, val status: RestStatus, ex: Exception) // Below logic is to set friendly message to error.root_cause.reason. return AlertingException(friendlyMsg, status, Exception("${ex.javaClass.name}: ${ex.message}")) } + + @JvmStatic + fun merge(vararg ex: AlertingException): AlertingException { + var friendlyMsg = "" + var unwrappedExceptionMsg = "" + ex.forEach { + if (friendlyMsg != "") { + friendlyMsg += ", ${it.message}" + unwrappedExceptionMsg += ", ${it.ex.message}" + } else { + friendlyMsg = it.message.orEmpty() + unwrappedExceptionMsg = "${it.ex.message}" + } + } + return AlertingException(friendlyMsg, ex.first().status, Exception(unwrappedExceptionMsg)) + } } } diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/workflow/CompositeWorkflowRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/workflow/CompositeWorkflowRunner.kt index a22b09bdc..fc20962e3 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/workflow/CompositeWorkflowRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/workflow/CompositeWorkflowRunner.kt @@ -39,6 +39,7 @@ import org.opensearch.index.query.QueryBuilders import org.opensearch.index.query.QueryBuilders.boolQuery import org.opensearch.index.query.QueryBuilders.existsQuery import org.opensearch.index.query.QueryBuilders.termsQuery +import org.opensearch.transport.TransportService import java.time.Instant import java.time.LocalDateTime import java.time.ZoneOffset @@ -54,6 +55,7 @@ object CompositeWorkflowRunner : WorkflowRunner() { periodStart: Instant, periodEnd: Instant, dryRun: Boolean, + transportService: TransportService ): WorkflowRunResult { val workflowExecutionStartTime = Instant.now() @@ -135,7 +137,16 @@ object CompositeWorkflowRunner : WorkflowRunner() { try { dataSources = delegateMonitor.dataSources val delegateRunResult = - runDelegateMonitor(delegateMonitor, monitorCtx, periodStart, periodEnd, dryRun, workflowRunContext, executionId) + runDelegateMonitor( + delegateMonitor, + monitorCtx, + periodStart, + periodEnd, + dryRun, + workflowRunContext, + executionId, + transportService + ) resultList.add(delegateRunResult!!) } catch (ex: Exception) { logger.error("Error executing workflow delegate monitor ${delegate.monitorId}", ex) @@ -238,6 +249,7 @@ object CompositeWorkflowRunner : WorkflowRunner() { dryRun: Boolean, workflowRunContext: WorkflowRunContext, executionId: String, + transportService: TransportService ): MonitorRunResult<*>? { if (delegateMonitor.isBucketLevelMonitor()) { @@ -248,7 +260,8 @@ object CompositeWorkflowRunner : WorkflowRunner() { periodEnd, dryRun, workflowRunContext, - executionId + executionId, + transportService ) } else if (delegateMonitor.isDocLevelMonitor()) { return DocumentLevelMonitorRunner().runMonitor( @@ -258,7 +271,8 @@ object CompositeWorkflowRunner : WorkflowRunner() { periodEnd, dryRun, workflowRunContext, - executionId + executionId, + transportService ) } else if (delegateMonitor.isQueryLevelMonitor()) { return QueryLevelMonitorRunner.runMonitor( @@ -268,7 +282,8 @@ object CompositeWorkflowRunner : WorkflowRunner() { periodEnd, dryRun, workflowRunContext, - executionId + executionId, + transportService ) } else { throw AlertingException.wrap( diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/workflow/WorkflowRunContext.kt b/alerting/src/main/kotlin/org/opensearch/alerting/workflow/WorkflowRunContext.kt index 14488a16a..8ebb2be9c 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/workflow/WorkflowRunContext.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/workflow/WorkflowRunContext.kt @@ -5,11 +5,51 @@ package org.opensearch.alerting.workflow +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.common.io.stream.StreamOutput +import org.opensearch.core.common.io.stream.Writeable +import org.opensearch.core.xcontent.ToXContent +import org.opensearch.core.xcontent.ToXContentObject +import org.opensearch.core.xcontent.XContentBuilder + data class WorkflowRunContext( // In case of dry run it's random generated id, while in other cases it's workflowId val workflowId: String, val workflowMetadataId: String, val chainedMonitorId: String?, val matchingDocIdsPerIndex: Map>, - val auditDelegateMonitorAlerts: Boolean -) + val auditDelegateMonitorAlerts: Boolean, +) : Writeable, ToXContentObject { + companion object { + fun readFrom(sin: StreamInput): WorkflowRunContext { + return WorkflowRunContext(sin) + } + } + + constructor(sin: StreamInput) : this( + sin.readString(), + sin.readString(), + sin.readOptionalString(), + sin.readMap() as Map>, + sin.readBoolean() + ) + + override fun writeTo(out: StreamOutput) { + out.writeString(workflowId) + out.writeString(workflowMetadataId) + out.writeOptionalString(chainedMonitorId) + out.writeMap(matchingDocIdsPerIndex) + out.writeBoolean(auditDelegateMonitorAlerts) + } + + override fun toXContent(builder: XContentBuilder, params: ToXContent.Params?): XContentBuilder { + builder.startObject() + .field("workflow_id", workflowId) + .field("workflow_metadata_id", workflowMetadataId) + .field("chained_monitor_id", chainedMonitorId) + .field("matching_doc_ids_per_index", matchingDocIdsPerIndex) + .field("audit_delegate_monitor_alerts", auditDelegateMonitorAlerts) + .endObject() + return builder + } +} diff --git a/alerting/src/main/kotlin/org/opensearch/alerting/workflow/WorkflowRunner.kt b/alerting/src/main/kotlin/org/opensearch/alerting/workflow/WorkflowRunner.kt index 4b954b168..037ab0592 100644 --- a/alerting/src/main/kotlin/org/opensearch/alerting/workflow/WorkflowRunner.kt +++ b/alerting/src/main/kotlin/org/opensearch/alerting/workflow/WorkflowRunner.kt @@ -34,6 +34,7 @@ import org.opensearch.commons.notifications.model.NotificationConfigInfo import org.opensearch.core.common.Strings import org.opensearch.script.Script import org.opensearch.script.TemplateScript +import org.opensearch.transport.TransportService import java.time.Instant abstract class WorkflowRunner { @@ -42,7 +43,8 @@ abstract class WorkflowRunner { monitorCtx: MonitorRunnerExecutionContext, periodStart: Instant, periodEnd: Instant, - dryRun: Boolean + dryRun: Boolean, + transportService: TransportService ): WorkflowRunResult suspend fun runAction( diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/AlertingRestTestCase.kt b/alerting/src/test/kotlin/org/opensearch/alerting/AlertingRestTestCase.kt index f1ae92260..c7d548c42 100644 --- a/alerting/src/test/kotlin/org/opensearch/alerting/AlertingRestTestCase.kt +++ b/alerting/src/test/kotlin/org/opensearch/alerting/AlertingRestTestCase.kt @@ -903,6 +903,17 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { return response } + public fun indexDoc(client: RestClient, index: String, doc: String, refresh: Boolean = true): Response { + val requestBody = StringEntity(doc, APPLICATION_JSON) + val params = if (refresh) mapOf("refresh" to "true") else mapOf() + val response = client.makeRequest("POST", "$index/_doc?op_type=create", params, requestBody) + assertTrue( + "Unable to index doc: '${doc.take(15)}...' to index: '$index'", + listOf(RestStatus.OK, RestStatus.CREATED).contains(response.restStatus()) + ) + return response + } + protected fun deleteDoc(index: String, id: String, refresh: Boolean = true): Response { val params = if (refresh) mapOf("refresh" to "true") else mapOf() val response = client().makeRequest("DELETE", "$index/_doc/$id", params) @@ -925,6 +936,20 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { return index } + protected fun createTestIndex(index: String = randomAlphaOfLength(10).lowercase(Locale.ROOT), settings: Settings): String { + createIndex( + index, settings, + """ + "properties" : { + "test_strict_date_time" : { "type" : "date", "format" : "strict_date_time" }, + "test_field" : { "type" : "keyword" }, + "number" : { "type" : "keyword" } + } + """.trimIndent() + ) + return index + } + protected fun createTestIndex(index: String, mapping: String): String { createIndex(index, Settings.EMPTY, mapping.trimIndent()) return index @@ -1023,7 +1048,7 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { client().makeRequest("DELETE", "_data_stream/$datastream") } - protected fun createIndexAlias(alias: String, mappings: String?) { + protected fun createIndexAlias(alias: String, mappings: String?, setting: String? = "") { val indexPattern = "$alias*" var componentTemplateMappings = "\"properties\": {" + " \"netflow.destination_transport_port\":{ \"type\": \"long\" }," + @@ -1032,9 +1057,10 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { if (mappings != null) { componentTemplateMappings = mappings } - createComponentTemplateWithMappings( + createComponentTemplateWithMappingsAndSettings( "my_alias_component_template-$alias", - componentTemplateMappings + componentTemplateMappings, + setting ) createComposableIndexTemplate( "my_index_template_alias-$alias", @@ -1070,6 +1096,17 @@ abstract class AlertingRestTestCase : ODFERestTestCase() { ) } + protected fun createComponentTemplateWithMappingsAndSettings(componentTemplateName: String, mappings: String?, setting: String?) { + val body = """{"template" : { "mappings": {$mappings}, "settings": {$setting} }}""" + client().makeRequest( + "PUT", + "_component_template/$componentTemplateName", + emptyMap(), + StringEntity(body, ContentType.APPLICATION_JSON), + BasicHeader("Content-Type", "application/json") + ) + } + protected fun createComposableIndexTemplate( templateName: String, indexPatterns: List, diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/DocumentMonitorRunnerIT.kt b/alerting/src/test/kotlin/org/opensearch/alerting/DocumentMonitorRunnerIT.kt index f53438c1f..460e92111 100644 --- a/alerting/src/test/kotlin/org/opensearch/alerting/DocumentMonitorRunnerIT.kt +++ b/alerting/src/test/kotlin/org/opensearch/alerting/DocumentMonitorRunnerIT.kt @@ -14,6 +14,7 @@ import org.opensearch.alerting.core.lock.LockService import org.opensearch.alerting.settings.AlertingSettings import org.opensearch.client.Response import org.opensearch.client.ResponseException +import org.opensearch.common.settings.Settings import org.opensearch.common.xcontent.json.JsonXContent import org.opensearch.commons.alerting.model.Alert import org.opensearch.commons.alerting.model.DataSources @@ -2644,9 +2645,107 @@ class DocumentMonitorRunnerIT : AlertingRestTestCase() { } } - @Suppress("UNCHECKED_CAST") - /** helper that returns a field in a json map whose values are all json objects */ - private fun Map.objectMap(key: String): Map> { - return this[key] as Map> + fun `test document-level monitor fanout which generates alerts and findings`() { + val testIndex = createTestIndex(settings = Settings.builder().put("number_of_shards", "7").build()) + val testTime = DateTimeFormatter.ISO_OFFSET_DATE_TIME.format(ZonedDateTime.now().truncatedTo(MILLIS)) + val testDoc = """{ + "message" : "This is an error from IAD region", + "test_strict_date_time" : "$testTime", + "test_field" : "us-west-2" + }""" + + val docQuery = DocLevelQuery(query = "test_field:\"us-west-2\"", fields = listOf(), name = "3") + val docLevelInput = DocLevelMonitorInput("description", listOf(testIndex), listOf(docQuery)) + + val trigger = randomDocumentLevelTrigger(condition = ALWAYS_RUN) + val monitor = createMonitor( + randomDocumentLevelMonitor( + inputs = listOf(docLevelInput), + triggers = listOf(trigger), + enabled = true, + schedule = IntervalSchedule(1, ChronoUnit.MINUTES) + ) + ) + assertNotNull(monitor.id) + + indexDoc(testIndex, "1", testDoc) + indexDoc(testIndex, "2", testDoc) + indexDoc(testIndex, "4", testDoc) + indexDoc(testIndex, "5", testDoc) + indexDoc(testIndex, "6", testDoc) + indexDoc(testIndex, "7", testDoc) + + OpenSearchTestCase.waitUntil( + { searchFindings(monitor).size == 6 && searchAlertsWithFilter(monitor).size == 6 }, 2, TimeUnit.MINUTES + ) + + indexDoc(testIndex, "11", testDoc) + indexDoc(testIndex, "12", testDoc) + indexDoc(testIndex, "14", testDoc) + indexDoc(testIndex, "15", testDoc) + indexDoc(testIndex, "16", testDoc) + indexDoc(testIndex, "17", testDoc) + + OpenSearchTestCase.waitUntil( + { searchFindings(monitor).size == 6 && searchAlertsWithFilter(monitor).size == 6 }, 2, TimeUnit.MINUTES + ) + } + + fun `test document-level monitor fanout when aliases contain indices with multiple shards`() { + val aliasName = "test-alias" + createIndexAlias( + aliasName, + """ + "properties" : { + "test_strict_date_time" : { "type" : "date", "format" : "strict_date_time" }, + "test_field" : { "type" : "keyword" }, + "number" : { "type" : "keyword" } + } + """.trimIndent(), + "\"index.number_of_shards\": 7" + ) + + val docQuery = DocLevelQuery(query = "test_field:\"us-west-2\"", fields = listOf(), name = "3") + val docLevelInput = DocLevelMonitorInput("description", listOf(aliasName), listOf(docQuery)) + + val action = randomAction(template = randomTemplateScript("Hello {{ctx.monitor.name}}"), destinationId = createDestination().id) + val monitor = createMonitor( + randomDocumentLevelMonitor( + inputs = listOf(docLevelInput), + triggers = listOf(randomDocumentLevelTrigger(condition = ALWAYS_RUN, actions = listOf(action))), + enabled = true, + schedule = IntervalSchedule(1, ChronoUnit.MINUTES) + ) + ) + + val testTime = DateTimeFormatter.ISO_OFFSET_DATE_TIME.format(ZonedDateTime.now().truncatedTo(MILLIS)) + val testDoc = """{ + "@timestamp": "$testTime", + "message" : "This is an error from IAD region", + "test_strict_date_time" : "$testTime", + "test_field" : "us-west-2" + }""" + indexDoc(aliasName, "1", testDoc) + indexDoc(aliasName, "2", testDoc) + indexDoc(aliasName, "4", testDoc) + indexDoc(aliasName, "5", testDoc) + indexDoc(aliasName, "6", testDoc) + indexDoc(aliasName, "7", testDoc) + OpenSearchTestCase.waitUntil( + { searchFindings(monitor).size == 6 }, 2, TimeUnit.MINUTES + ) + + rolloverDatastream(aliasName) + indexDoc(aliasName, "11", testDoc) + indexDoc(aliasName, "12", testDoc) + indexDoc(aliasName, "14", testDoc) + indexDoc(aliasName, "15", testDoc) + indexDoc(aliasName, "16", testDoc) + indexDoc(aliasName, "17", testDoc) + OpenSearchTestCase.waitUntil( + { searchFindings(monitor).size == 6 }, 2, TimeUnit.MINUTES + ) + + deleteDataStream(aliasName) } } diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/MonitorDataSourcesIT.kt b/alerting/src/test/kotlin/org/opensearch/alerting/MonitorDataSourcesIT.kt index 4c273361d..7480a02c7 100644 --- a/alerting/src/test/kotlin/org/opensearch/alerting/MonitorDataSourcesIT.kt +++ b/alerting/src/test/kotlin/org/opensearch/alerting/MonitorDataSourcesIT.kt @@ -843,7 +843,10 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { .get() Assert.assertTrue(getAlertsResponse != null) Assert.assertTrue(getAlertsResponse.alerts.size == 1) - Assert.assertTrue(getAlertsResponse.alerts[0].errorMessage == "IndexClosedException[closed]") + Assert.assertTrue( + getAlertsResponse.alerts[0].errorMessage == + "AlertingException[closed]; nested: Exception[org.opensearch.indices.IndexClosedException: closed]; " + ) // Reopen index client().admin().indices().open(OpenIndexRequest(index)).get() // Close queryIndex @@ -861,7 +864,10 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { .get() Assert.assertTrue(getAlertsResponse != null) Assert.assertTrue(getAlertsResponse.alerts.size == 1) - Assert.assertTrue(getAlertsResponse.alerts[0].errorHistory[0].message == "IndexClosedException[closed]") + Assert.assertTrue( + getAlertsResponse.alerts[0].errorHistory[0].message == + "AlertingException[closed]; nested: Exception[org.opensearch.indices.IndexClosedException: closed]; " + ) Assert.assertEquals(1, getAlertsResponse.alerts[0].errorHistory.size) Assert.assertTrue(getAlertsResponse.alerts[0].errorMessage!!.contains("Failed to run percolate search")) } @@ -950,7 +956,10 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { .get() Assert.assertTrue(getAlertsResponse != null) Assert.assertEquals(1, getAlertsResponse.alerts.size) - Assert.assertTrue(getAlertsResponse.alerts[0].errorMessage == "IndexClosedException[closed]") + Assert.assertTrue( + getAlertsResponse.alerts[0].errorMessage == + "AlertingException[closed]; nested: Exception[org.opensearch.indices.IndexClosedException: closed]; " + ) Assert.assertNull(getAlertsResponse.alerts[0].endTime) // Open index to have monitor run successfully @@ -973,7 +982,10 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { .get() Assert.assertTrue(getAlertsResponse != null) Assert.assertEquals(1, getAlertsResponse.alerts.size) - Assert.assertTrue(getAlertsResponse.alerts[0].errorMessage == "IndexClosedException[closed]") + Assert.assertTrue( + getAlertsResponse.alerts[0].errorMessage == + "AlertingException[closed]; nested: Exception[org.opensearch.indices.IndexClosedException: closed]; " + ) Assert.assertNotNull(getAlertsResponse.alerts[0].endTime) } @@ -1035,7 +1047,10 @@ class MonitorDataSourcesIT : AlertingSingleNodeTestCase() { Assert.assertTrue(getAlertsResponse != null) Assert.assertEquals(1 + 10, getAlertsResponse.alerts.size) - val newErrorAlert = getAlertsResponse.alerts.firstOrNull { it.errorMessage == "IndexClosedException[closed]" } + val newErrorAlert = getAlertsResponse.alerts.firstOrNull { + it.errorMessage == + "AlertingException[closed]; nested: Exception[org.opensearch.indices.IndexClosedException: closed]; " + } Assert.assertNotNull(newErrorAlert) Assert.assertNull(newErrorAlert!!.endTime) diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/TestHelpers.kt b/alerting/src/test/kotlin/org/opensearch/alerting/TestHelpers.kt index e99c9635e..6a66579ef 100644 --- a/alerting/src/test/kotlin/org/opensearch/alerting/TestHelpers.kt +++ b/alerting/src/test/kotlin/org/opensearch/alerting/TestHelpers.kt @@ -802,3 +802,9 @@ fun randomAlertContext( sampleDocs = sampleDocs ) } + +@Suppress("UNCHECKED_CAST") +/** helper that returns a field in a json map whose values are all json objects */ +fun Map.objectMap(key: String): Map> { + return this[key] as Map> +} diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutRequestTests.kt b/alerting/src/test/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutRequestTests.kt new file mode 100644 index 000000000..05d59063c --- /dev/null +++ b/alerting/src/test/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutRequestTests.kt @@ -0,0 +1,90 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.action + +import org.opensearch.alerting.ALWAYS_RUN +import org.opensearch.alerting.model.ActionExecutionTime +import org.opensearch.alerting.model.IndexExecutionContext +import org.opensearch.alerting.model.MonitorMetadata +import org.opensearch.alerting.randomDocumentLevelMonitor +import org.opensearch.alerting.randomDocumentLevelTrigger +import org.opensearch.alerting.workflow.WorkflowRunContext +import org.opensearch.common.io.stream.BytesStreamOutput +import org.opensearch.commons.alerting.model.DocLevelMonitorInput +import org.opensearch.commons.alerting.model.DocLevelQuery +import org.opensearch.commons.alerting.model.IntervalSchedule +import org.opensearch.commons.alerting.model.Monitor +import org.opensearch.commons.alerting.model.Workflow +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.core.index.shard.ShardId +import org.opensearch.index.seqno.SequenceNumbers +import org.opensearch.test.OpenSearchTestCase +import java.time.Instant +import java.time.temporal.ChronoUnit +import java.util.UUID + +class DocLevelMonitorFanOutRequestTests : OpenSearchTestCase() { + + fun `test doc level monitor fan out request as stream`() { + val docQuery = DocLevelQuery(query = "test_field:\"us-west-2\"", fields = listOf(), name = "3") + val docLevelInput = DocLevelMonitorInput("description", listOf("test-index"), listOf(docQuery)) + + val trigger = randomDocumentLevelTrigger(condition = ALWAYS_RUN) + val monitor = randomDocumentLevelMonitor( + inputs = listOf(docLevelInput), + triggers = listOf(trigger), + enabled = true, + schedule = IntervalSchedule(1, ChronoUnit.MINUTES) + ) + val monitorMetadata = MonitorMetadata( + "test", + SequenceNumbers.UNASSIGNED_SEQ_NO, + SequenceNumbers.UNASSIGNED_PRIMARY_TERM, + Monitor.NO_ID, + listOf(ActionExecutionTime("", Instant.now())), + mutableMapOf("index" to mutableMapOf("1" to "1")), + mutableMapOf("test-index" to ".opensearch-sap-test_windows-queries-000001") + ) + val indexExecutionContext = IndexExecutionContext( + listOf(docQuery), + mutableMapOf("index" to mutableMapOf("1" to "1")), + mutableMapOf("index" to mutableMapOf("1" to "1")), + "test-index", + "test-index", + listOf("test-index"), + listOf("test-index"), + listOf("test-field"), + listOf("1", "2") + ) + val workflowRunContext = WorkflowRunContext( + Workflow.NO_ID, + Workflow.NO_ID, + Monitor.NO_ID, + mutableMapOf("index" to listOf("1")), + true + ) + val docLevelMonitorFanOutRequest = DocLevelMonitorFanOutRequest( + monitor, + false, + monitorMetadata, + UUID.randomUUID().toString(), + indexExecutionContext, + listOf(ShardId("test-index", UUID.randomUUID().toString(), 0)), + listOf("test-index"), + workflowRunContext + ) + val out = BytesStreamOutput() + docLevelMonitorFanOutRequest.writeTo(out) + val sin = StreamInput.wrap(out.bytes().toBytesRef().bytes) + val newDocLevelMonitorFanOutRequest = DocLevelMonitorFanOutRequest(sin) + assertEquals(docLevelMonitorFanOutRequest.monitor, newDocLevelMonitorFanOutRequest.monitor) + assertEquals(docLevelMonitorFanOutRequest.executionId, newDocLevelMonitorFanOutRequest.executionId) + assertEquals(docLevelMonitorFanOutRequest.monitorMetadata, newDocLevelMonitorFanOutRequest.monitorMetadata) + assertEquals(docLevelMonitorFanOutRequest.indexExecutionContext, newDocLevelMonitorFanOutRequest.indexExecutionContext) + assertEquals(docLevelMonitorFanOutRequest.shardIds, newDocLevelMonitorFanOutRequest.shardIds) + assertEquals(docLevelMonitorFanOutRequest.workflowRunContext, newDocLevelMonitorFanOutRequest.workflowRunContext) + } +} diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutResponseTests.kt b/alerting/src/test/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutResponseTests.kt new file mode 100644 index 000000000..c3b1d38a3 --- /dev/null +++ b/alerting/src/test/kotlin/org/opensearch/alerting/action/DocLevelMonitorFanOutResponseTests.kt @@ -0,0 +1,56 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.alerting.action + +import org.opensearch.alerting.model.InputRunResults +import org.opensearch.alerting.randomDocumentLevelTriggerRunResult +import org.opensearch.common.io.stream.BytesStreamOutput +import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.test.OpenSearchTestCase + +class DocLevelMonitorFanOutResponseTests : OpenSearchTestCase() { + fun `test doc level monitor fan out response with errors as stream`() { + val docLevelMonitorFanOutResponse = DocLevelMonitorFanOutResponse( + "nodeid", + "eid", + "monitorId", + mutableMapOf("index" to mutableMapOf("1" to "1")), + InputRunResults(error = null), + mapOf("1" to randomDocumentLevelTriggerRunResult(), "2" to randomDocumentLevelTriggerRunResult()) + ) + val out = BytesStreamOutput() + docLevelMonitorFanOutResponse.writeTo(out) + val sin = StreamInput.wrap(out.bytes().toBytesRef().bytes) + val newDocLevelMonitorFanOutResponse = DocLevelMonitorFanOutResponse(sin) + assertEquals(docLevelMonitorFanOutResponse.nodeId, newDocLevelMonitorFanOutResponse.nodeId) + assertEquals(docLevelMonitorFanOutResponse.executionId, newDocLevelMonitorFanOutResponse.executionId) + assertEquals(docLevelMonitorFanOutResponse.monitorId, newDocLevelMonitorFanOutResponse.monitorId) + assertEquals(docLevelMonitorFanOutResponse.lastRunContexts, newDocLevelMonitorFanOutResponse.lastRunContexts) + assertEquals(docLevelMonitorFanOutResponse.inputResults, newDocLevelMonitorFanOutResponse.inputResults) + assertEquals(docLevelMonitorFanOutResponse.triggerResults, newDocLevelMonitorFanOutResponse.triggerResults) + } + + fun `test doc level monitor fan out response as stream`() { + val workflow = DocLevelMonitorFanOutResponse( + "nodeid", + "eid", + "monitorId", + mapOf("index" to mapOf("1" to "1")) as MutableMap, + InputRunResults(), + mapOf("1" to randomDocumentLevelTriggerRunResult(), "2" to randomDocumentLevelTriggerRunResult()) + ) + val out = BytesStreamOutput() + workflow.writeTo(out) + val sin = StreamInput.wrap(out.bytes().toBytesRef().bytes) + val newWorkflow = DocLevelMonitorFanOutResponse(sin) + assertEquals(workflow.nodeId, newWorkflow.nodeId) + assertEquals(workflow.executionId, newWorkflow.executionId) + assertEquals(workflow.monitorId, newWorkflow.monitorId) + assertEquals(workflow.lastRunContexts, newWorkflow.lastRunContexts) + assertEquals(workflow.inputResults, newWorkflow.inputResults) + assertEquals(workflow.triggerResults, newWorkflow.triggerResults) + } +} diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/action/GetFindingsRequestTests.kt b/alerting/src/test/kotlin/org/opensearch/alerting/action/GetFindingsRequestTests.kt index d1bd6f7e3..990da66fa 100644 --- a/alerting/src/test/kotlin/org/opensearch/alerting/action/GetFindingsRequestTests.kt +++ b/alerting/src/test/kotlin/org/opensearch/alerting/action/GetFindingsRequestTests.kt @@ -9,6 +9,7 @@ import org.opensearch.common.io.stream.BytesStreamOutput import org.opensearch.commons.alerting.action.GetFindingsRequest import org.opensearch.commons.alerting.model.Table import org.opensearch.core.common.io.stream.StreamInput +import org.opensearch.index.query.BoolQueryBuilder import org.opensearch.test.OpenSearchTestCase class GetFindingsRequestTests : OpenSearchTestCase() { @@ -17,7 +18,13 @@ class GetFindingsRequestTests : OpenSearchTestCase() { val table = Table("asc", "sortString", null, 1, 0, "") - val req = GetFindingsRequest("2121", table, "1", "finding_index_name") + val req = GetFindingsRequest( + "2121", + table, + "1", + "finding_index_name", + boolQueryBuilder = BoolQueryBuilder() + ) assertNotNull(req) val out = BytesStreamOutput() diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/bwc/AlertingBackwardsCompatibilityIT.kt b/alerting/src/test/kotlin/org/opensearch/alerting/bwc/AlertingBackwardsCompatibilityIT.kt index 2c77fd480..280074964 100644 --- a/alerting/src/test/kotlin/org/opensearch/alerting/bwc/AlertingBackwardsCompatibilityIT.kt +++ b/alerting/src/test/kotlin/org/opensearch/alerting/bwc/AlertingBackwardsCompatibilityIT.kt @@ -8,15 +8,27 @@ package org.opensearch.alerting.bwc import org.apache.hc.core5.http.ContentType.APPLICATION_JSON import org.apache.hc.core5.http.io.entity.StringEntity import org.opensearch.alerting.ALERTING_BASE_URI +import org.opensearch.alerting.ALWAYS_RUN import org.opensearch.alerting.AlertingRestTestCase import org.opensearch.alerting.makeRequest +import org.opensearch.alerting.objectMap +import org.opensearch.alerting.randomDocumentLevelMonitor +import org.opensearch.alerting.randomDocumentLevelTrigger +import org.opensearch.client.Node +import org.opensearch.client.Request import org.opensearch.common.settings.Settings import org.opensearch.common.xcontent.XContentType +import org.opensearch.commons.alerting.model.DocLevelMonitorInput +import org.opensearch.commons.alerting.model.DocLevelQuery +import org.opensearch.commons.alerting.model.IntervalSchedule import org.opensearch.commons.alerting.model.Monitor import org.opensearch.core.rest.RestStatus import org.opensearch.index.query.QueryBuilders import org.opensearch.search.builder.SearchSourceBuilder import org.opensearch.test.OpenSearchTestCase +import java.time.ZonedDateTime +import java.time.format.DateTimeFormatter +import java.time.temporal.ChronoUnit import java.util.concurrent.TimeUnit class AlertingBackwardsCompatibilityIT : AlertingRestTestCase() { @@ -92,6 +104,32 @@ class AlertingBackwardsCompatibilityIT : AlertingRestTestCase() { } } + @Throws(Exception::class) + @Suppress("UNCHECKED_CAST") + fun `test backwards compatibility for doc-level monitors`() { + val uri = getPluginUri() + val responseMap = getAsMap(uri)["nodes"] as Map> + for (response in responseMap.values) { + val plugins = response["plugins"] as List> + val pluginNames = plugins.map { plugin -> plugin["name"] }.toSet() + when (CLUSTER_TYPE) { + ClusterType.OLD -> { + assertTrue(pluginNames.contains("opensearch-alerting")) + createDocLevelMonitor() + } + ClusterType.MIXED -> { + assertTrue(pluginNames.contains("opensearch-alerting")) + verifyMonitorExecutionSuccess() + } + ClusterType.UPGRADED -> { + assertTrue(pluginNames.contains("opensearch-alerting")) + verifyMonitorExecutionSuccess() + } + } + break + } + } + private enum class ClusterType { OLD, MIXED, @@ -179,6 +217,25 @@ class AlertingBackwardsCompatibilityIT : AlertingRestTestCase() { assertTrue("Create monitor response has incorrect version", createdVersion > 0) } + private fun createDocLevelMonitor(): Pair { + val testIndex = createTestIndex(index = "test-index", settings = Settings.builder().put("number_of_shards", "7").build()) + val docQuery = DocLevelQuery(id = "4", query = "test_field:\"us-west-2\"", fields = listOf(), name = "4") + val docLevelInput = DocLevelMonitorInput("description", listOf(testIndex), listOf(docQuery)) + + val trigger = randomDocumentLevelTrigger(condition = ALWAYS_RUN) + val monitor = createMonitor( + randomDocumentLevelMonitor( + name = "test-monitor", + inputs = listOf(docLevelInput), + triggers = listOf(trigger), + enabled = true, + schedule = IntervalSchedule(1, ChronoUnit.MINUTES) + ) + ) + assertNotNull(monitor.id) + return Pair(monitor, testIndex) + } + @Throws(Exception::class) @Suppress("UNCHECKED_CAST") private fun verifyMonitorExists(uri: String) { @@ -193,7 +250,7 @@ class AlertingBackwardsCompatibilityIT : AlertingRestTestCase() { val xcp = createParser(XContentType.JSON.xContent(), searchResponse.entity.content) val hits = xcp.map()["hits"]!! as Map> val numberDocsFound = hits["total"]?.get("value") - assertEquals("Unexpected number of Monitors returned", 1, numberDocsFound) + assertEquals("Unexpected number of Monitors returned", 2, numberDocsFound) } @Throws(Exception::class) @@ -219,4 +276,56 @@ class AlertingBackwardsCompatibilityIT : AlertingRestTestCase() { assertEquals("Some nodes in stats response failed", totalNodes, successfulNodes) assertEquals("Not all nodes are on schedule", totalNodes, nodesOnSchedule) } + + @Suppress("UNCHECKED_CAST") + private fun verifyMonitorExecutionSuccess() { + val nodes = mutableListOf() + val nodesResponse = client().performRequest(Request("GET", "/_nodes")).asMap() + (nodesResponse["nodes"] as Map).forEach { nodeId, nodeInfo -> + val host = ((nodeInfo as Map)["http"] as Map)["publish_address"] + val version = nodeInfo["version"].toString() + if (nodes.isEmpty() && version >= "3.0.0") { + logger.info("use node-$nodeId") + client().nodes.forEach { + if (it.host.toHostString().contains(host.toString())) { + nodes.add(it) + } + } + } + } + val client = client() + client.setNodes(nodes) + val searchResponse = searchMonitors() + val monitorId = searchResponse.hits.hits.filter { it.sourceAsMap["name"] == "test-monitor" }.first().id + val testIndex = "test-index" + + val testTime = DateTimeFormatter.ISO_OFFSET_DATE_TIME.format(ZonedDateTime.now().truncatedTo(ChronoUnit.MILLIS)) + val testDoc = """{ + "message" : "This is an error from IAD region", + "test_strict_date_time" : "$testTime", + "test_field" : "us-west-2" + }""" + + indexDoc(client(), testIndex, testDoc) + + var passed = false + OpenSearchTestCase.waitUntil({ + try { + val response = executeMonitor(client, monitorId) + val output = entityAsMap(response) + assertEquals("test-monitor", output["monitor_name"]) + @Suppress("UNCHECKED_CAST") + val searchResult = (output.objectMap("input_results")["results"] as List>).first() + @Suppress("UNCHECKED_CAST") + val matchingDocsToQuery = searchResult["4"] as List + passed = matchingDocsToQuery.isNotEmpty() + return@waitUntil true + } catch (e: AssertionError) { + return@waitUntil false + } + }, 1, TimeUnit.MINUTES) + if (!passed) { + assertTrue(passed) + } + } } diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/model/WriteableTests.kt b/alerting/src/test/kotlin/org/opensearch/alerting/model/WriteableTests.kt index 6ef15f8d8..c1d3ac304 100644 --- a/alerting/src/test/kotlin/org/opensearch/alerting/model/WriteableTests.kt +++ b/alerting/src/test/kotlin/org/opensearch/alerting/model/WriteableTests.kt @@ -5,23 +5,24 @@ package org.opensearch.alerting.model +import org.junit.Assert import org.opensearch.alerting.model.destination.email.EmailAccount import org.opensearch.alerting.model.destination.email.EmailGroup -import org.opensearch.alerting.randomActionRunResult import org.opensearch.alerting.randomBucketLevelMonitorRunResult import org.opensearch.alerting.randomBucketLevelTriggerRunResult import org.opensearch.alerting.randomDocumentLevelMonitorRunResult -import org.opensearch.alerting.randomDocumentLevelTriggerRunResult import org.opensearch.alerting.randomEmailAccount import org.opensearch.alerting.randomEmailGroup import org.opensearch.alerting.randomInputRunResults import org.opensearch.alerting.randomQueryLevelMonitorRunResult import org.opensearch.alerting.randomQueryLevelTriggerRunResult +import org.opensearch.common.UUIDs import org.opensearch.common.io.stream.BytesStreamOutput import org.opensearch.commons.alerting.model.SearchInput import org.opensearch.core.common.io.stream.StreamInput import org.opensearch.search.builder.SearchSourceBuilder import org.opensearch.test.OpenSearchTestCase +import java.time.Instant class WriteableTests : OpenSearchTestCase() { @@ -126,4 +127,35 @@ class WriteableTests : OpenSearchTestCase() { val newEmailGroup = EmailGroup.readFrom(sin) assertEquals("Round tripping EmailGroup doesn't work", emailGroup, newEmailGroup) } + + fun `test DocumentLevelTriggerRunResult as stream`() { + val workflow = randomDocumentLevelTriggerRunResult() + val out = BytesStreamOutput() + workflow.writeTo(out) + val sin = StreamInput.wrap(out.bytes().toBytesRef().bytes) + val newWorkflow = DocumentLevelTriggerRunResult(sin) + Assert.assertEquals("Round tripping dltrr failed", newWorkflow, workflow) + } + + fun randomDocumentLevelTriggerRunResult(): DocumentLevelTriggerRunResult { + val map = mutableMapOf() + map.plus(Pair("key1", randomActionRunResult())) + map.plus(Pair("key2", randomActionRunResult())) + return DocumentLevelTriggerRunResult( + "trigger-name", + mutableListOf(UUIDs.randomBase64UUID().toString()), + null, + mutableMapOf(Pair("alertId", map)) + ) + } + + fun randomActionRunResult(): ActionRunResult { + val map = mutableMapOf() + map.plus(Pair("key1", "val1")) + map.plus(Pair("key2", "val2")) + return ActionRunResult( + "1234", "test-action", map, + false, Instant.now(), null + ) + } } diff --git a/alerting/src/test/kotlin/org/opensearch/alerting/triggeraction/TriggerExpressionResolverTests.kt b/alerting/src/test/kotlin/org/opensearch/alerting/triggeraction/TriggerExpressionResolverTests.kt index 1dd19d9d1..67b8f7e9f 100644 --- a/alerting/src/test/kotlin/org/opensearch/alerting/triggeraction/TriggerExpressionResolverTests.kt +++ b/alerting/src/test/kotlin/org/opensearch/alerting/triggeraction/TriggerExpressionResolverTests.kt @@ -27,7 +27,7 @@ class TriggerExpressionResolverTests : OpenSearchTestCase() { val equation = TriggerExpressionParser(eqString).parse() val queryToDocIds = mutableMapOf>() queryToDocIds[DocLevelQuery("", "sigma-123", listOf(), "", emptyList())] = mutableSetOf("6", "3", "7") - queryToDocIds[DocLevelQuery("id1456", "", listOf(), "", emptyList())] = mutableSetOf("1", "2", "3") + queryToDocIds[DocLevelQuery("id1456", "sigma-456", listOf(), "", emptyList())] = mutableSetOf("1", "2", "3") Assert.assertEquals("query[name=sigma-123] query[id=id1456] && ", equation.toString()) Assert.assertEquals(mutableSetOf("3"), equation.evaluate(queryToDocIds)) } @@ -37,7 +37,7 @@ class TriggerExpressionResolverTests : OpenSearchTestCase() { val equation = TriggerExpressionParser(eqString).parse() val queryToDocIds = mutableMapOf>() queryToDocIds[DocLevelQuery("", "sigma-123", listOf(), "", emptyList())] = mutableSetOf("6", "8", "7") - queryToDocIds[DocLevelQuery("", "", listOf(), "", mutableListOf("tag=sev2"))] = mutableSetOf("1", "2", "3") + queryToDocIds[DocLevelQuery("", "sigma-456", listOf(), "", mutableListOf("tag=sev2"))] = mutableSetOf("1", "2", "3") Assert.assertEquals("query[name=sigma-123] query[tag=sev2] && ", equation.toString()) Assert.assertEquals(emptySet(), equation.evaluate(queryToDocIds)) } @@ -57,7 +57,7 @@ class TriggerExpressionResolverTests : OpenSearchTestCase() { val equation = TriggerExpressionParser(eqString).parse() val queryToDocIds = mutableMapOf>() queryToDocIds[DocLevelQuery("", "sigma-123", listOf(), "", emptyList())] = mutableSetOf("6", "3", "7") - queryToDocIds[DocLevelQuery("id1456", "", listOf(), "", emptyList())] = mutableSetOf("1", "2", "3") + queryToDocIds[DocLevelQuery("id1456", "sigma-456", listOf(), "", emptyList())] = mutableSetOf("1", "2", "3") Assert.assertEquals("query[name=sigma-123] query[id=id1456] || ", equation.toString()) Assert.assertEquals(mutableSetOf("6", "3", "7", "1", "2", "3"), equation.evaluate(queryToDocIds)) } @@ -67,7 +67,7 @@ class TriggerExpressionResolverTests : OpenSearchTestCase() { val equation = TriggerExpressionParser(eqString).parse() val queryToDocIds = mutableMapOf>() queryToDocIds[DocLevelQuery("", "sigma-123", listOf(), "", emptyList())] = mutableSetOf("6", "8", "7") - queryToDocIds[DocLevelQuery("", "", listOf(), "", mutableListOf("tag=sev2"))] = emptySet() + queryToDocIds[DocLevelQuery("", "sigma-456", listOf(), "", mutableListOf("tag=sev2"))] = emptySet() Assert.assertEquals("query[name=sigma-123] query[tag=sev2] || ", equation.toString()) Assert.assertEquals(mutableSetOf("6", "8", "7"), equation.evaluate(queryToDocIds)) } @@ -88,7 +88,7 @@ class TriggerExpressionResolverTests : OpenSearchTestCase() { val queryToDocIds = mutableMapOf>() queryToDocIds[DocLevelQuery("", "sigma-123", listOf(), "", emptyList())] = mutableSetOf("1", "2", "3", "11") queryToDocIds[DocLevelQuery("", "sigma-456", listOf(), "", emptyList())] = mutableSetOf("3", "4", "5") - queryToDocIds[DocLevelQuery("id_new", "", listOf(), "", emptyList())] = mutableSetOf("11", "12", "13") + queryToDocIds[DocLevelQuery("id_new", "sigma-789", listOf(), "", emptyList())] = mutableSetOf("11", "12", "13") Assert.assertEquals("query[name=sigma-123] query[name=sigma-456] ! && ", equation.toString()) Assert.assertEquals(mutableSetOf("1", "2", "11"), equation.evaluate(queryToDocIds)) } @@ -98,8 +98,8 @@ class TriggerExpressionResolverTests : OpenSearchTestCase() { val equation = TriggerExpressionParser(eqString).parse() val queryToDocIds = mutableMapOf>() queryToDocIds[DocLevelQuery("", "sigma-123", listOf(), "", emptyList())] = mutableSetOf("6", "3", "7") - queryToDocIds[DocLevelQuery("id1456", "", listOf(), "", emptyList())] = mutableSetOf("11", "12", "15") - queryToDocIds[DocLevelQuery("id_new", "", listOf(), "", emptyList())] = mutableSetOf("11", "12", "13") + queryToDocIds[DocLevelQuery("id1456", "sigma-456", listOf(), "", emptyList())] = mutableSetOf("11", "12", "15") + queryToDocIds[DocLevelQuery("id_new", "sigma-789", listOf(), "", emptyList())] = mutableSetOf("11", "12", "13") Assert.assertEquals("query[name=sigma-123] query[id=id1456] ! || ", equation.toString()) Assert.assertEquals(mutableSetOf("6", "3", "7", "13"), equation.evaluate(queryToDocIds)) } @@ -110,9 +110,9 @@ class TriggerExpressionResolverTests : OpenSearchTestCase() { val queryToDocIds = mutableMapOf>() queryToDocIds[DocLevelQuery("", "sigma-123", listOf(), "", emptyList())] = mutableSetOf("1", "2", "3") - queryToDocIds[DocLevelQuery("id_random1", "", listOf(), "", mutableListOf("sev1"))] = mutableSetOf("2", "3", "4") + queryToDocIds[DocLevelQuery("id_random1", "sigma-456", listOf(), "", mutableListOf("sev1"))] = mutableSetOf("2", "3", "4") queryToDocIds[DocLevelQuery("", "sigma-789", listOf(), "", emptyList())] = mutableSetOf("11", "12", "13") - queryToDocIds[DocLevelQuery("id-2aw34", "", listOf(), "", emptyList())] = mutableSetOf("13", "14", "15") + queryToDocIds[DocLevelQuery("id-2aw34", "sigma-101112", listOf(), "", emptyList())] = mutableSetOf("13", "14", "15") Assert.assertEquals( "query[name=sigma-123] query[tag=sev1] && query[name=sigma-789] ! query[id=id-2aw34] || ! || ", diff --git a/alerting/src/test/resources/sample.pem b/alerting/src/test/resources/sample.pem index a1fc20a77..b690a603d 100644 --- a/alerting/src/test/resources/sample.pem +++ b/alerting/src/test/resources/sample.pem @@ -1,9 +1,9 @@ -----BEGIN CERTIFICATE----- -MIIEPDCCAySgAwIBAgIUZjrlDPP8azRDPZchA/XEsx0X2iIwDQYJKoZIhvcNAQEL +MIIEPDCCAySgAwIBAgIUaYSlET3nzsotWTrWueVPPh10yLYwDQYJKoZIhvcNAQEL BQAwgY8xEzARBgoJkiaJk/IsZAEZFgNjb20xFzAVBgoJkiaJk/IsZAEZFgdleGFt cGxlMRkwFwYDVQQKDBBFeGFtcGxlIENvbSBJbmMuMSEwHwYDVQQLDBhFeGFtcGxl IENvbSBJbmMuIFJvb3QgQ0ExITAfBgNVBAMMGEV4YW1wbGUgQ29tIEluYy4gUm9v -dCBDQTAeFw0yMzA4MjkwNDIzMTJaFw0zMzA4MjYwNDIzMTJaMFcxCzAJBgNVBAYT +dCBDQTAeFw0yNDAyMjAxNzAzMjVaFw0zNDAyMTcxNzAzMjVaMFcxCzAJBgNVBAYT AmRlMQ0wCwYDVQQHDAR0ZXN0MQ0wCwYDVQQKDARub2RlMQ0wCwYDVQQLDARub2Rl MRswGQYDVQQDDBJub2RlLTAuZXhhbXBsZS5jb20wggEiMA0GCSqGSIb3DQEBAQUA A4IBDwAwggEKAoIBAQCm93kXteDQHMAvbUPNPW5pyRHKDD42XGWSgq0k1D29C/Ud @@ -16,10 +16,10 @@ BEAwPogFKgMEBQWCEm5vZGUtMC5leGFtcGxlLmNvbYIJbG9jYWxob3N0hxAAAAAA AAAAAAAAAAAAAAABhwR/AAABMAsGA1UdDwQEAwIF4DAdBgNVHSUEFjAUBggrBgEF BQcDAQYIKwYBBQUHAwIwDAYDVR0TAQH/BAIwADAdBgNVHQ4EFgQU0/qDQaY10jIo wCjLUpz/HfQXyt8wHwYDVR0jBBgwFoAUF4ffoFrrZhKn1dD4uhJFPLcrAJwwDQYJ -KoZIhvcNAQELBQADggEBAD2hkndVih6TWxoe/oOW0i2Bq7ScNO/n7/yHWL04HJmR -MaHv/Xjc8zLFLgHuHaRvC02ikWIJyQf5xJt0Oqu2GVbqXH9PBGKuEP2kCsRRyU27 -zTclAzfQhqmKBTYQ/3lJ3GhRQvXIdYTe+t4aq78TCawp1nSN+vdH/1geG6QjMn5N -1FU8tovDd4x8Ib/0dv8RJx+n9gytI8n/giIaDCEbfLLpe4EkV5e5UNpOnRgJjjuy -vtZutc81TQnzBtkS9XuulovDE0qI+jQrKkKu8xgGLhgH0zxnPkKtUg2I3Aq6zl1L -zYkEOUF8Y25J6WeY88Yfnc0iigI+Pnz5NK8R9GL7TYo= +KoZIhvcNAQELBQADggEBAGbij5WyF0dKhQodQfTiFDb73ygU6IyeJkFSnxF67gDz +pQJZKFvXuVBa3cGP5e7Qp3TK50N+blXGH0xXeIV9lXeYUk4hVfBlp9LclZGX8tGi +7Xa2enMvIt5q/Yg3Hh755ZxnDYxCoGkNOXUmnMusKstE0YzvZ5Gv6fcRKFBUgZLh +hUBqIEAYly1EqH/y45APiRt3Nor1yF6zEI4TnL0yNrHw6LyQkUNCHIGMJLfnJQ9L +camMGIXOx60kXNMTigF9oXXwixWAnDM9y3QT8QXA7hej/4zkbO+vIeV/7lGUdkyg +PAi92EvyxmsliEMyMR0VINl8emyobvfwa7oMeWMR+hg= -----END CERTIFICATE----- diff --git a/alerting/src/test/resources/test-kirk.jks b/alerting/src/test/resources/test-kirk.jks index 6dbc51e71..6c8c5ef77 100644 Binary files a/alerting/src/test/resources/test-kirk.jks and b/alerting/src/test/resources/test-kirk.jks differ