diff --git a/llm/src/main/java/com/instana/dc/llm/LLMDcUtil.java b/llm/src/main/java/com/instana/dc/llm/LLMDcUtil.java index 4474d75..32f2461 100644 --- a/llm/src/main/java/com/instana/dc/llm/LLMDcUtil.java +++ b/llm/src/main/java/com/instana/dc/llm/LLMDcUtil.java @@ -70,4 +70,32 @@ public class LLMDcUtil { public static final String LLM_REQ_COUNT_NAME = "llm.request.count"; public static final String LLM_REQ_COUNT_DESC = "The total count of watsonx calls by interval"; public static final String LLM_REQ_COUNT_UNIT = "{count}"; + + public static final String LLM_SERVICE_COST_NAME = "llm.service.usage.cost"; + public static final String LLM_SERVICE_COST_DESC = "The total cost of watsonx calls by interval"; + public static final String LLM_SERVICE_COST_UNIT = "{cost}"; + + public static final String LLM_SERVICE_INPUT_COST_NAME = "llm.service.usage.input_cost"; + public static final String LLM_SERVICE_INPUT_COST_DESC = "The input cost of watsonx calls by interval"; + public static final String LLM_SERVICE_INPUT_COST_UNIT = "{cost}"; + + public static final String LLM_SERVICE_OUTPUT_COST_NAME = "llm.service.usage.output_cost"; + public static final String LLM_SERVICE_OUTPUT_COST_DESC = "The output cost of watsonx calls by interval"; + public static final String LLM_SERVICE_OUTPUT_COST_UNIT = "{cost}"; + + public static final String LLM_SERVICE_TOKEN_NAME = "llm.service.usage.total_tokens"; + public static final String LLM_SERVICE_TOKEN_DESC = "The total tokens of watsonx calls by interval"; + public static final String LLM_SERVICE_TOKEN_UNIT = "{token}"; + + public static final String LLM_SERVICE_INPUT_TOKEN_NAME = "llm.service.usage.input_tokens"; + public static final String LLM_SERVICE_INPUT_TOKEN_DESC = "The input tokens of watsonx calls by interval"; + public static final String LLM_SERVICE_INPUT_TOKEN_UNIT = "{token}"; + + public static final String LLM_SERVICE_OUTPUT_TOKEN_NAME = "llm.service.usage.output_tokens"; + public static final String LLM_SERVICE_OUTPUT_TOKEN_DESC = "The output tokens of watsonx calls by interval"; + public static final String LLM_SERVICE_OUTPUT_TOKEN_UNIT = "{token}"; + + public static final String LLM_SERVICE_REQ_COUNT_NAME = "llm.service.request.count"; + public static final String LLM_SERVICE_REQ_COUNT_DESC = "The total count of watsonx calls by interval"; + public static final String LLM_SERVICE_REQ_COUNT_UNIT = "{count}"; } \ No newline at end of file diff --git a/llm/src/main/java/com/instana/dc/llm/LLMRawMetricRegistry.java b/llm/src/main/java/com/instana/dc/llm/LLMRawMetricRegistry.java index d2126cb..33f5cae 100644 --- a/llm/src/main/java/com/instana/dc/llm/LLMRawMetricRegistry.java +++ b/llm/src/main/java/com/instana/dc/llm/LLMRawMetricRegistry.java @@ -30,6 +30,27 @@ import static com.instana.dc.llm.LLMDcUtil.LLM_REQ_COUNT_DESC; import static com.instana.dc.llm.LLMDcUtil.LLM_REQ_COUNT_NAME; import static com.instana.dc.llm.LLMDcUtil.LLM_REQ_COUNT_UNIT; +import static com.instana.dc.llm.LLMDcUtil.LLM_SERVICE_COST_DESC; +import static com.instana.dc.llm.LLMDcUtil.LLM_SERVICE_COST_NAME; +import static com.instana.dc.llm.LLMDcUtil.LLM_SERVICE_COST_UNIT; +import static com.instana.dc.llm.LLMDcUtil.LLM_SERVICE_INPUT_COST_DESC; +import static com.instana.dc.llm.LLMDcUtil.LLM_SERVICE_INPUT_COST_NAME; +import static com.instana.dc.llm.LLMDcUtil.LLM_SERVICE_INPUT_COST_UNIT; +import static com.instana.dc.llm.LLMDcUtil.LLM_SERVICE_INPUT_TOKEN_DESC; +import static com.instana.dc.llm.LLMDcUtil.LLM_SERVICE_INPUT_TOKEN_NAME; +import static com.instana.dc.llm.LLMDcUtil.LLM_SERVICE_INPUT_TOKEN_UNIT; +import static com.instana.dc.llm.LLMDcUtil.LLM_SERVICE_OUTPUT_COST_DESC; +import static com.instana.dc.llm.LLMDcUtil.LLM_SERVICE_OUTPUT_COST_NAME; +import static com.instana.dc.llm.LLMDcUtil.LLM_SERVICE_OUTPUT_COST_UNIT; +import static com.instana.dc.llm.LLMDcUtil.LLM_SERVICE_OUTPUT_TOKEN_DESC; +import static com.instana.dc.llm.LLMDcUtil.LLM_SERVICE_OUTPUT_TOKEN_NAME; +import static com.instana.dc.llm.LLMDcUtil.LLM_SERVICE_OUTPUT_TOKEN_UNIT; +import static com.instana.dc.llm.LLMDcUtil.LLM_SERVICE_REQ_COUNT_DESC; +import static com.instana.dc.llm.LLMDcUtil.LLM_SERVICE_REQ_COUNT_NAME; +import static com.instana.dc.llm.LLMDcUtil.LLM_SERVICE_REQ_COUNT_UNIT; +import static com.instana.dc.llm.LLMDcUtil.LLM_SERVICE_TOKEN_DESC; +import static com.instana.dc.llm.LLMDcUtil.LLM_SERVICE_TOKEN_NAME; +import static com.instana.dc.llm.LLMDcUtil.LLM_SERVICE_TOKEN_UNIT; import static com.instana.dc.llm.LLMDcUtil.LLM_STATUS_DESC; import static com.instana.dc.llm.LLMDcUtil.LLM_STATUS_NAME; import static com.instana.dc.llm.LLMDcUtil.LLM_STATUS_UNIT; @@ -43,7 +64,7 @@ import com.instana.dc.RawMetric; public class LLMRawMetricRegistry { - private final Map map = new ConcurrentHashMap() {{ + private final Map map = new ConcurrentHashMap<>() {{ put(LLM_STATUS_NAME, new RawMetric(GAUGE, LLM_STATUS_NAME, LLM_STATUS_DESC, LLM_STATUS_UNIT, true, null)); put(LLM_DURATION_NAME, new RawMetric(GAUGE, LLM_DURATION_NAME, LLM_DURATION_DESC, LLM_DURATION_UNIT, true, "model_id")); put(LLM_DURATION_MAX_NAME, new RawMetric(GAUGE, LLM_DURATION_MAX_NAME, LLM_DURATION_MAX_DESC, LLM_DURATION_MAX_UNIT, true, "model_id")); @@ -54,6 +75,14 @@ public class LLMRawMetricRegistry { put(LLM_INPUT_TOKEN_NAME, new RawMetric(GAUGE, LLM_INPUT_TOKEN_NAME, LLM_INPUT_TOKEN_DESC, LLM_INPUT_TOKEN_UNIT, false, "model_id")); put(LLM_OUTPUT_TOKEN_NAME, new RawMetric(GAUGE, LLM_OUTPUT_TOKEN_NAME, LLM_OUTPUT_TOKEN_DESC, LLM_OUTPUT_TOKEN_UNIT, false, "model_id")); put(LLM_REQ_COUNT_NAME, new RawMetric(UPDOWN_COUNTER, LLM_REQ_COUNT_NAME, LLM_REQ_COUNT_DESC, LLM_REQ_COUNT_UNIT, false, "model_id")); + + put(LLM_SERVICE_COST_NAME, new RawMetric(GAUGE, LLM_SERVICE_COST_NAME, LLM_SERVICE_COST_DESC, LLM_SERVICE_COST_UNIT, false, "model_id")); + put(LLM_SERVICE_INPUT_COST_NAME, new RawMetric(GAUGE, LLM_SERVICE_INPUT_COST_NAME, LLM_SERVICE_INPUT_COST_DESC, LLM_SERVICE_INPUT_COST_UNIT, false, "model_id")); + put(LLM_SERVICE_OUTPUT_COST_NAME, new RawMetric(GAUGE, LLM_SERVICE_OUTPUT_COST_NAME, LLM_SERVICE_OUTPUT_COST_DESC, LLM_SERVICE_OUTPUT_COST_UNIT, false, "model_id")); + put(LLM_SERVICE_TOKEN_NAME, new RawMetric(GAUGE, LLM_SERVICE_TOKEN_NAME, LLM_SERVICE_TOKEN_DESC, LLM_SERVICE_TOKEN_UNIT, false, "model_id")); + put(LLM_SERVICE_INPUT_TOKEN_NAME, new RawMetric(GAUGE, LLM_SERVICE_INPUT_TOKEN_NAME, LLM_SERVICE_INPUT_TOKEN_DESC, LLM_SERVICE_INPUT_TOKEN_UNIT, false, "model_id")); + put(LLM_SERVICE_OUTPUT_TOKEN_NAME, new RawMetric(GAUGE, LLM_SERVICE_OUTPUT_TOKEN_NAME, LLM_SERVICE_OUTPUT_TOKEN_DESC, LLM_SERVICE_OUTPUT_TOKEN_UNIT, false, "model_id")); + put(LLM_SERVICE_REQ_COUNT_NAME, new RawMetric(UPDOWN_COUNTER, LLM_SERVICE_REQ_COUNT_NAME, LLM_SERVICE_REQ_COUNT_DESC, LLM_SERVICE_REQ_COUNT_UNIT, false, "model_id")); }}; public Map getMap() { diff --git a/llm/src/main/java/com/instana/dc/llm/impl/llm/LLMDc.java b/llm/src/main/java/com/instana/dc/llm/impl/llm/LLMDc.java index b4cb467..fdfabbf 100644 --- a/llm/src/main/java/com/instana/dc/llm/impl/llm/LLMDc.java +++ b/llm/src/main/java/com/instana/dc/llm/impl/llm/LLMDc.java @@ -15,6 +15,13 @@ import static com.instana.dc.llm.LLMDcUtil.LLM_OUTPUT_TOKEN_NAME; import static com.instana.dc.llm.LLMDcUtil.LLM_PRICES_PROPERTIES; import static com.instana.dc.llm.LLMDcUtil.LLM_REQ_COUNT_NAME; +import static com.instana.dc.llm.LLMDcUtil.LLM_SERVICE_COST_NAME; +import static com.instana.dc.llm.LLMDcUtil.LLM_SERVICE_INPUT_COST_NAME; +import static com.instana.dc.llm.LLMDcUtil.LLM_SERVICE_INPUT_TOKEN_NAME; +import static com.instana.dc.llm.LLMDcUtil.LLM_SERVICE_OUTPUT_COST_NAME; +import static com.instana.dc.llm.LLMDcUtil.LLM_SERVICE_OUTPUT_TOKEN_NAME; +import static com.instana.dc.llm.LLMDcUtil.LLM_SERVICE_REQ_COUNT_NAME; +import static com.instana.dc.llm.LLMDcUtil.LLM_SERVICE_TOKEN_NAME; import static com.instana.dc.llm.LLMDcUtil.LLM_STATUS_NAME; import static com.instana.dc.llm.LLMDcUtil.LLM_TOKEN_NAME; import static com.instana.dc.llm.LLMDcUtil.OTEL_AGENTLESS_MODE; @@ -42,20 +49,20 @@ public class LLMDc extends AbstractLLMDc { private static final Logger logger = Logger.getLogger(LLMDc.class.getName()); - private HashMap modelAggrMap = new HashMap<>(); - private MetricsCollectorService metricsCollector = new MetricsCollectorService(); - private Boolean otelAgentlessMode = Boolean.FALSE; - private Integer callbackInterval = DEFAULT_LLM_CLBK_INTERVAL; - private Integer otelPollInterval = DEFAULT_LLM_POLL_INTERVAL; - private HashMap llmTokenPrices = new HashMap<>(); - private int listenPort = 0; + private final HashMap modelAggrMap = new HashMap<>(); + private final HashMap> serviceModelAggrMap = new HashMap<>(); + private final MetricsCollectorService metricsCollector = new MetricsCollectorService(); + private final Boolean otelAgentlessMode; + private final Integer otelPollInterval; + private final HashMap llmTokenPrices = new HashMap<>(); + private final int listenPort; /** * The poll rate in the configuration, in seconds. In other words, the number of * seconds between calls to Watsonx. */ - private class ModelAggregation { + private static class ModelAggregation { private final String modelId; private final String aiSystem; private long deltaInputTokens; @@ -116,7 +123,7 @@ public void resetDeltaValues() { public LLMDc(Map properties, CustomDcConfig cdcConfig) throws Exception { super(properties, cdcConfig); otelAgentlessMode = (Boolean) properties.getOrDefault(OTEL_AGENTLESS_MODE, Boolean.FALSE); - callbackInterval = (Integer) properties.getOrDefault(CALLBACK_INTERVAL, DEFAULT_LLM_CLBK_INTERVAL); + Integer callbackInterval = (Integer) properties.getOrDefault(CALLBACK_INTERVAL, DEFAULT_LLM_CLBK_INTERVAL); otelPollInterval = (Integer) properties.getOrDefault(POLLING_INTERVAL, callbackInterval); listenPort = (int) properties.getOrDefault(SERVICE_LISTEN_PORT, 8000); @@ -194,30 +201,32 @@ public void collectData() { for (OtelMetric metric : otelMetrics) { try { - String modelId = metric.getModelId(); - String aiSystem = metric.getAiSystem(); - long inputTokens = metric.getDeltaInputTokens(); - long outputTokens = metric.getDeltaOutputTokens(); - long duration = metric.getDeltaDuration(); - long requestCount = metric.getDeltaRequestCount(); - - ModelAggregation modelAggr = modelAggrMap.get(modelId); - if (modelAggr == null) { - modelAggr = new ModelAggregation(modelId, aiSystem); - modelAggrMap.put(modelId, modelAggr); - } - modelAggr.addDeltaInputTokens(inputTokens); - modelAggr.addDeltaOutputTokens(outputTokens); - modelAggr.addDeltaDuration(duration); - modelAggr.addDeltaRequestCount(requestCount); - + updateModelAggregation(metric, modelAggrMap); + updateModelAggregation(metric, + serviceModelAggrMap.computeIfAbsent(metric.getServiceName(), k -> new HashMap<>())); } catch (Exception e) { e.printStackTrace(); } } logger.info("-----------------------------------------"); - for(Map.Entry entry : modelAggrMap.entrySet()){ + int divisor = Boolean.TRUE.equals(otelAgentlessMode) ? 1 : otelPollInterval; + processModelMetrics(divisor); + logger.info("-----------------------------------------"); + processServiceMetrics(divisor); + logger.info("-----------------------------------------"); + } + + private void updateModelAggregation(OtelMetric metric, Map modelAggrMap) { + ModelAggregation modelAggr = modelAggrMap.computeIfAbsent(metric.getModelId(), k -> new ModelAggregation(k, metric.getAiSystem())); + modelAggr.addDeltaInputTokens(metric.getDeltaInputTokens()); + modelAggr.addDeltaOutputTokens(metric.getDeltaOutputTokens()); + modelAggr.addDeltaDuration(metric.getDeltaDuration()); + modelAggr.addDeltaRequestCount(metric.getDeltaRequestCount()); + } + + private void processModelMetrics(int divisor) { + for (Map.Entry entry : modelAggrMap.entrySet()) { ModelAggregation aggr = entry.getValue(); String modelId = aggr.getModelId(); String aiSystem = aggr.getAiSystem(); @@ -228,52 +237,31 @@ public void collectData() { long maxDurationSoFar = aggr.getMaxDurationSoFar(); aggr.resetDeltaValues(); - long avgDurationPerReq = deltaRequestCount == 0 ? 0 : deltaDuration/deltaRequestCount; - if(avgDurationPerReq > maxDurationSoFar) { + long avgDurationPerReq = deltaRequestCount == 0 ? 0 : deltaDuration / deltaRequestCount; + if (avgDurationPerReq > maxDurationSoFar) { maxDurationSoFar = avgDurationPerReq; aggr.setMaxDurationSoFar(maxDurationSoFar); } - int divisor = otelAgentlessMode? 1:otelPollInterval; - - String inputPriceKey = aiSystem + "." + modelId + ".input"; - Double priceInputTokens = llmTokenPrices.get(inputPriceKey.toLowerCase()); - if(priceInputTokens == null) { - String inputFlatPriceKey = aiSystem + ".*.input"; - priceInputTokens = llmTokenPrices.get(inputFlatPriceKey.toLowerCase()); - if (priceInputTokens == null) { - priceInputTokens = 0.0; - } - } - String outputPriceKey = aiSystem + "." + modelId + ".output"; - Double priceOutputTokens = llmTokenPrices.get(outputPriceKey.toLowerCase()); - if(priceOutputTokens == null) { - String outputFlatPriceKey = aiSystem + ".*.output"; - priceOutputTokens = llmTokenPrices.get(outputFlatPriceKey.toLowerCase()); - if (priceOutputTokens == null) { - priceOutputTokens = 0.0; - } - } - double intervalReqCount = (double)deltaRequestCount/divisor; - double intervalInputTokens = (double)deltaInputTokens/divisor; - double intervalOutputTokens = (double)deltaOutputTokens/divisor; + double intervalReqCount = (double) deltaRequestCount / divisor; + double intervalInputTokens = (double) deltaInputTokens / divisor; + double intervalOutputTokens = (double) deltaOutputTokens / divisor; double intervalTotalTokens = intervalInputTokens + intervalOutputTokens; - double intervalInputCost = intervalInputTokens/1000 * priceInputTokens; - double intervalOutputCost = intervalOutputTokens/1000 * priceOutputTokens; + double intervalInputCost = intervalInputTokens / 1000 * getTokenPrice(aiSystem, modelId, "input"); + double intervalOutputCost = intervalOutputTokens / 1000 * getTokenPrice(aiSystem, modelId, "output"); double intervalTotalCost = intervalInputCost + intervalOutputCost; - // This costs are 10000 times the actual value to prevent very small numbers from being rounded off. + // This costs are 10000 times the actual value to prevent very small numbers from being rounded off. // And it will be adjusted to the correct value on UI. - String backwardCompatible = System.getenv("FORCE_BACKWARD_COMPATIBLE"); - if (backwardCompatible != null && backwardCompatible.equalsIgnoreCase("true")) { - System.out.printf("FORCE_BACKWARD_COMPATIBLE is set."); + if (isForceBackwardCompatible()) { + System.out.printf("FORCE_BACKWARD_COMPATIBLE is set."); } else { intervalTotalCost = intervalTotalCost * 10000; intervalInputCost = intervalInputCost * 10000; intervalOutputCost = intervalOutputCost * 10000; } - + System.out.printf("Metrics for model %s of %s:%n", modelId, aiSystem); System.out.println(" - Average Duration : " + avgDurationPerReq + " ms"); System.out.println(" - Maximum Duration : " + maxDurationSoFar + " ms"); @@ -301,6 +289,70 @@ public void collectData() { getRawMetric(LLM_OUTPUT_TOKEN_NAME).getDataPoint(modelIdExt).setValue(intervalOutputTokens, attributes); getRawMetric(LLM_REQ_COUNT_NAME).getDataPoint(modelIdExt).setValue(intervalReqCount, attributes); } - logger.info("-----------------------------------------"); + } + + private void processServiceMetrics(int divisor) { + for (Map.Entry> serviceAggrEntry : serviceModelAggrMap.entrySet()) { + double serviceIntervalReqCount = 0.0; + double serviceIntervalInputTokens = 0.0, serviceIntervalOutputTokens = 0.0, serviceIntervalTotalTokens; + double serviceIntervalInputCost = 0.0, serviceIntervalOutputCost = 0.0, serviceIntervalTotalCost; + for (Map.Entry entry : serviceAggrEntry.getValue().entrySet()) { + ModelAggregation aggr = entry.getValue(); + String modelId = aggr.getModelId(); + String aiSystem = aggr.getAiSystem(); + + serviceIntervalReqCount += (double) aggr.getDeltaRequestCount() / divisor; + + double intervalInputTokens = (double) aggr.getDeltaInputTokens() / divisor; + serviceIntervalInputTokens += intervalInputTokens; + serviceIntervalInputCost += intervalInputTokens / 1000 * getTokenPrice(aiSystem, modelId, "input"); + + double intervalOutputTokens = (double) aggr.getDeltaOutputTokens() / divisor; + serviceIntervalOutputTokens += intervalOutputTokens; + serviceIntervalOutputCost += intervalOutputTokens / 1000 * getTokenPrice(aiSystem, modelId, "output"); + + aggr.resetDeltaValues(); + } + serviceIntervalTotalTokens = serviceIntervalInputTokens + serviceIntervalOutputTokens; + serviceIntervalTotalCost = serviceIntervalInputCost + serviceIntervalOutputCost; + + // This costs are 10000 times the actual value to prevent very small numbers from being rounded off. + // And it will be adjusted to the correct value on UI. + if (isForceBackwardCompatible()) { + System.out.println("FORCE_BACKWARD_COMPATIBLE is set."); + } else { + serviceIntervalTotalCost = serviceIntervalTotalCost * 10000; + serviceIntervalInputCost = serviceIntervalInputCost * 10000; + serviceIntervalOutputCost = serviceIntervalOutputCost * 10000; + } + + String serviceName = serviceAggrEntry.getKey(); + System.out.printf("Metrics for service %s", serviceName); + System.out.println(" - Interval Tokens : " + serviceIntervalTotalTokens); + System.out.println(" - Interval Input Tokens : " + serviceIntervalInputTokens); + System.out.println(" - Interval Output Tokens : " + serviceIntervalOutputTokens); + System.out.println(" - Interval Cost : " + serviceIntervalTotalCost); + System.out.println(" - Interval Input Cost : " + serviceIntervalInputCost); + System.out.println(" - Interval Output Cost : " + serviceIntervalOutputCost); + System.out.println(" - Interval Request : " + serviceIntervalReqCount); + + Map attributes = Map.of("service_name", serviceName); + getRawMetric(LLM_SERVICE_COST_NAME).getDataPoint(serviceName).setValue(serviceIntervalTotalCost, attributes); + getRawMetric(LLM_SERVICE_INPUT_COST_NAME).getDataPoint(serviceName).setValue(serviceIntervalInputCost, attributes); + getRawMetric(LLM_SERVICE_OUTPUT_COST_NAME).getDataPoint(serviceName).setValue(serviceIntervalOutputCost, attributes); + getRawMetric(LLM_SERVICE_TOKEN_NAME).getDataPoint(serviceName).setValue(serviceIntervalTotalTokens, attributes); + getRawMetric(LLM_SERVICE_INPUT_TOKEN_NAME).getDataPoint(serviceName).setValue(serviceIntervalInputTokens, attributes); + getRawMetric(LLM_SERVICE_OUTPUT_TOKEN_NAME).getDataPoint(serviceName).setValue(serviceIntervalOutputTokens, attributes); + getRawMetric(LLM_SERVICE_REQ_COUNT_NAME).getDataPoint(serviceName).setValue(serviceIntervalReqCount, attributes); + } + } + + private static boolean isForceBackwardCompatible() { + return "true".equalsIgnoreCase(System.getenv("FORCE_BACKWARD_COMPATIBLE")); + } + + private Double getTokenPrice(String aiSystem, String modelId, String io) { + return llmTokenPrices.getOrDefault(String.join(".", aiSystem, modelId, io).toLowerCase(), + llmTokenPrices.getOrDefault((aiSystem + ".*." + io).toLowerCase(), 0.0)); } } diff --git a/llm/src/main/java/com/instana/dc/llm/impl/llm/MetricsCollectorService.java b/llm/src/main/java/com/instana/dc/llm/impl/llm/MetricsCollectorService.java index 53dd3ca..0fd76d7 100644 --- a/llm/src/main/java/com/instana/dc/llm/impl/llm/MetricsCollectorService.java +++ b/llm/src/main/java/com/instana/dc/llm/impl/llm/MetricsCollectorService.java @@ -33,6 +33,7 @@ class MetricsCollectorService extends MetricsServiceGrpc.MetricsServiceImplBase public class OtelMetric { private String modelId; + private String serviceName; private double inputTokenSum; private double outputTokenSum; private double durationSum; @@ -50,6 +51,7 @@ public OtelMetric() {} public OtelMetric(OtelMetric other) { this.modelId = other.modelId; + this.serviceName = other.serviceName; this.inputTokenSum = other.inputTokenSum; this.outputTokenSum = other.outputTokenSum; this.durationSum = other.durationSum; @@ -68,6 +70,10 @@ public String getModelId() { return modelId; } + public String getServiceName() { + return serviceName; + } + public String getAiSystem() { return aiSystem; } @@ -120,6 +126,10 @@ public void setModelId(String modelId) { this.modelId = modelId; } + public void setServiceName(String serviceName) { + this.serviceName = serviceName; + } + public void setAiSystem(String aiSystem) { this.aiSystem = aiSystem; } @@ -305,6 +315,7 @@ private void processTokenMetric(Metric metric, String serviceName) { exportMetrics.put(modelKey, otelMetric); otelMetric.setModelId(modelId); otelMetric.setAiSystem(aiSystem); + otelMetric.setServiceName(serviceName); } if (tokenType.compareTo("input") == 0) { @@ -371,6 +382,7 @@ private void processDurationMetric(Metric metric, String serviceName) { exportMetrics.put(modelKey, otelMetric); otelMetric.setModelId(modelId); otelMetric.setAiSystem(aiSystem); + otelMetric.setServiceName(serviceName); } long lastStartTime = otelMetric.getLastDurationStartTime();