Skip to content

Commit

Permalink
Refactor multimodal input/output handling classes
Browse files Browse the repository at this point in the history
Consolidated `MultiModalInput` and `MultiModalOutput` into `MultiModalData`.
Replaced `MultiModalInputs` and `MultiModalOutputs` with `MultiModalDataCollection`.
Updated `ChatGPTIPAProvider.cpp`, `ExternalIPAResponse.h`, `IPARequest.h`, `IPAResponse.h`, `InputModalityComponentListener.h`, `InputNotificationMediator.h`, `ModalityManager.h`, `OutputModalityComponent.h`, `ExternalIPAResponse.cpp`, `IPARequest.cpp`, and `IPAResponse.cpp` to use new classes.
Removed `MultiModalOutput.h` and `MultiModalOutputs.h`.
Updated `CMakeLists.txt` to reflect new file names and correct formatting.
Simplified and unified multimodal data handling.
  • Loading branch information
schnelle committed Dec 17, 2024
1 parent fe02241 commit 88de619
Show file tree
Hide file tree
Showing 37 changed files with 154 additions and 447 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@

#include <w3c/voiceinteraction/ipa/TextModalityType.h>

#include "w3c/voiceinteraction/ipa/reference/TextMultiModalInput.h"
#include "w3c/voiceinteraction/ipa/reference/TextMultiModalOutput.h"
#include "w3c/voiceinteraction/ipa/reference/TextMultiModalData.h"

#include "w3c/voiceinteraction/ipa/reference/external/ipa/chatgpt/ChatGPTIPAProvider.h"
#include "w3c/voiceinteraction/ipa/reference/external/ipa/chatgpt/ChatGPTMessage.h"
Expand Down Expand Up @@ -107,13 +106,13 @@ const std::shared_ptr<ExternalIPAResponse> ChatGPTIPAProvider::processInput(
req.model = std::string("gpt-3.5-turbo");
ChatGPTMessage systemMessage {"system",
"You are a standards maniac."};
std::shared_ptr<MultiModalInputs> multiModalInputs =
std::shared_ptr<MultiModalDataCollection> multiModalInputs =
request->getMultiModalInputs();
std::shared_ptr<MultiModalInput> input =
multiModalInputs->getMultiModalInput(TextMultiModalInput::MODALITY);
std::shared_ptr<MultiModalData> input =
multiModalInputs->getMultiModalData(TextMultiModalInput::MODALITY);
std::shared_ptr<TextMultiModalInput> textInput =
std::dynamic_pointer_cast<TextMultiModalInput>(input);
const std::string& text = textInput->getTextInput();
const std::string& text = textInput->getText();
ChatGPTMessage userMessage { "user", text };
req.messages = std::vector({ systemMessage, userMessage });
req.temperature = 1;
Expand Down Expand Up @@ -196,11 +195,11 @@ const std::shared_ptr<ExternalIPAResponse> ChatGPTIPAProvider::processInput(
nlohmann::json responseData = nlohmann::json::parse(response);
ChatGPTJSONResponse parsedResponse = responseData;
std::string textOutput = parsedResponse.choices[0].message.content;
std::shared_ptr<MultiModalOutput> output =
std::make_shared<TextMultiModalOutput>(textOutput);
std::shared_ptr<MultiModalOutputs> outputs =
std::make_shared<MultiModalOutputs>();
outputs->addMultiModalOutput(output);
std::shared_ptr<MultiModalData> output =
std::make_shared<TextMultiModalInput>(textOutput);
std::shared_ptr<MultiModalDataCollection> outputs =
std::make_shared<MultiModalDataCollection>();
outputs->addMultiModalData(output);
std::shared_ptr<ExternalIPAResponse> out =
std::make_shared<ExternalIPAResponse>(request->getSessionId(),
request->getRequestId(), outputs, nullptr);
Expand Down
16 changes: 7 additions & 9 deletions source/w3cipa/w3cipademo/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,14 @@ target_link_libraries(${PROJECT_NAME} w3cipachatgptipaprovider)
target_link_libraries(${PROJECT_NAME} w3cipareferenceimplementation)
target_link_libraries(${PROJECT_NAME} w3cipaframework)

install(
TARGETS ${PROJECT_NAME}
LIBRARY DESTINATION lib
RUNTIME DESTINATION bin
install(TARGETS ${PROJECT_NAME}
LIBRARY DESTINATION lib
RUNTIME DESTINATION bin
)
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/log4cplus.properties DESTINATION config)
install(DIRECTORY ${W3CIPA_OPEN_SOURCE_SRC}/bin/ DESTINATION bin)
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/log4cplus.properties
DESTINATION config)
install(DIRECTORY ${W3CIPA_OPEN_SOURCE_SRC}/bin/
DESTINATION bin)

#
# Add configuration files
Expand All @@ -47,6 +48,3 @@ set(PROJECT_CONFIG_DIR ${CMAKE_CURRENT_BINARY_DIR}/config)
file(MAKE_DIRECTORY ${PROJECT_CONFIG_DIR})
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/log4cplus.properties
${PROJECT_CONFIG_DIR}/log4cplus.properties COPYONLY)



12 changes: 4 additions & 8 deletions source/w3cipa/w3cipaframework/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,8 @@ set(HEADERS
include/w3c/voiceinteraction/ipa/LanguageDependent.h
include/w3c/voiceinteraction/ipa/MetaData.h
include/w3c/voiceinteraction/ipa/ModalityType.h
include/w3c/voiceinteraction/ipa/MultiModalInput.h
include/w3c/voiceinteraction/ipa/MultiModalInputs.h
include/w3c/voiceinteraction/ipa/MultiModalOutput.h
include/w3c/voiceinteraction/ipa/MultiModalOutputs.h
include/w3c/voiceinteraction/ipa/MultiModalData.h
include/w3c/voiceinteraction/ipa/MultiModalDataCollection.h
include/w3c/voiceinteraction/ipa/RequestId.h
include/w3c/voiceinteraction/ipa/SemanticInterpretation.h
include/w3c/voiceinteraction/ipa/SessionId.h
Expand Down Expand Up @@ -75,10 +73,8 @@ set(SOURCES
src/w3c/voiceinteraction/ipa/IPAResponse.cpp
src/w3c/voiceinteraction/ipa/Language.cpp
src/w3c/voiceinteraction/ipa/Metadata.cpp
src/w3c/voiceinteraction/ipa/MultiModalInput.cpp
src/w3c/voiceinteraction/ipa/MultiModalInputs.cpp
src/w3c/voiceinteraction/ipa/MultiModalOutput.cpp
src/w3c/voiceinteraction/ipa/MultiModalOutputs.cpp
src/w3c/voiceinteraction/ipa/MultiModalData.cpp
src/w3c/voiceinteraction/ipa/MultiModalDataCollection.cpp
src/w3c/voiceinteraction/ipa/RequestId.cpp
src/w3c/voiceinteraction/ipa/SessionId.cpp
src/w3c/voiceinteraction/ipa/TextModalityType.cpp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

#include "CallResult.h"
#include "ErrorMessage.h"
#include "MultiModalOutputs.h"
#include "MultiModalDataCollection.h"
#include "IPAData.h"
#include "SemanticInterpretation.h"

Expand All @@ -41,7 +41,7 @@ class ExternalIPAResponse : public IPAData {
*/
ExternalIPAResponse(const std::shared_ptr<SessionId>& sessionIdentifier,
const std::shared_ptr<RequestId>& requestIdentifier,
const std::shared_ptr<MultiModalOutputs>& multiModalOutputs,
const std::shared_ptr<MultiModalDataCollection>& multiModalOutputs,
const std::shared_ptr<SemanticInterpretation> semanticInterpretation);

/**
Expand All @@ -63,7 +63,7 @@ class ExternalIPAResponse : public IPAData {
* Returns the multimodal outputs to be returned to the client.
* @return The multimodal outputs to be returned to the client.
*/
const std::shared_ptr<MultiModalOutputs> getMultiModalOutputs() const;
const std::shared_ptr<MultiModalDataCollection> getMultiModalOutputs() const;

/**
* Returns the multimodal outputs to be returned to the client.
Expand All @@ -90,7 +90,7 @@ class ExternalIPAResponse : public IPAData {
const std::shared_ptr<ErrorMessage> getErrorMessage() const;
private:
/** The multimodal outputs to be returned to the client. */
std::shared_ptr<MultiModalOutputs> outputs;
std::shared_ptr<MultiModalDataCollection> outputs;
/** Semantic interpretation of an utterance. */
std::shared_ptr<SemanticInterpretation> interpretation;
/** Optionalla a caught error. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include <memory>

#include "AudioData.h"
#include "MultiModalInputs.h"
#include "MultiModalDataCollection.h"
#include "MetaData.h"
#include "IPAData.h"

Expand All @@ -41,7 +41,7 @@ class IPARequest : public IPAData {
*/
IPARequest(const std::shared_ptr<SessionId>& sessionIdentifier,
const std::shared_ptr<RequestId>& requestIdentifier,
const std::shared_ptr<MultiModalInputs>& multiModalInputs,
const std::shared_ptr<MultiModalDataCollection>& multiModalInputs,
const std::shared_ptr<AudioData>& audioDataToSend,
const std::shared_ptr<MetaData> metaDataToSend);

Expand All @@ -60,7 +60,7 @@ class IPARequest : public IPAData {
* Returns the multimodal inputs to be sent to the client.
* qreturn The multimodal inputs to be sent to the client.
*/
const std::shared_ptr<MultiModalInputs>& getMultiModalInputs();
const std::shared_ptr<MultiModalDataCollection>& getMultiModalInputs();

/**
* Returns the metadata of the request.
Expand All @@ -72,7 +72,7 @@ class IPARequest : public IPAData {
/** The audio data. */
std::shared_ptr<AudioData> audioData;
/** The multimodal outputs to be returned to the client. */
std::shared_ptr<MultiModalInputs> inputs;
std::shared_ptr<MultiModalDataCollection> inputs;
/** The metadata. */
std::shared_ptr<MetaData> metaData;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include <memory>

#include "AudioData.h"
#include "MultiModalOutputs.h"
#include "MultiModalDataCollection.h"
#include "MetaData.h"
#include "IPAData.h"

Expand All @@ -35,14 +35,14 @@ class IPAResponse : public IPAData {
* Constructs a new object.
* @param sessionIdentifier the session identifier
* @param requestIdentifier the request identifier
* @param multiModalOutputs The multimodal outputs to be returned to the
* @param multiModalData The multimodal data elements to be returned to the
* client.
* @param audioDataToSend audio data to send
* @param metaDataToSend meta data to send
*/
IPAResponse(const std::shared_ptr<SessionId>& sessionIdentifier,
const std::shared_ptr<RequestId>& requestIdentifier,
const std::shared_ptr<MultiModalOutputs>& multiModalOutputs,
const std::shared_ptr<MultiModalDataCollection>& multiModalData,
const std::shared_ptr<AudioData>& audioDataToSend,
const std::shared_ptr<MetaData> metaDataToSend);

Expand All @@ -61,7 +61,7 @@ class IPAResponse : public IPAData {
* Returns the multimodal outputs to be returned to the client.
* qreturn The multimodal outputs to be returned to the client.
*/
virtual const std::shared_ptr<MultiModalOutputs> getMultiModalOutputs();
virtual const std::shared_ptr<MultiModalDataCollection> getMultiModalOutputs();

/**
* Returns the metadata of the request.
Expand All @@ -73,7 +73,7 @@ class IPAResponse : public IPAData {
/** The audio data. */
std::shared_ptr<AudioData> audioData;
/** The multimodal outputs to be returned to the client. */
std::shared_ptr<MultiModalOutputs> outputs;
std::shared_ptr<MultiModalDataCollection> outputs;
/** The metadata. */
std::shared_ptr<MetaData> metaData;
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
* [1] https://www.w3.org/Consortium/Legal/copyright-software
*/

#if !defined(MULTI_MODAL_INPUT_H)
#define MULTI_MODAL_INPUT_H
#if !defined(MULTI_MODAL_DATA_H)
#define MULTI_MODAL_DATA_H

#include "ModalityType.h"

Expand All @@ -20,23 +20,22 @@ namespace voiceinteraction {
namespace ipa {

/**
* This interface is used to represent input from a modality.
* This interface is used to represent input or output from a modality.
* @author Dirk Schnelle-Walka
*/
class MultiModalInput
{
class MultiModalData {

public:
/**
* Constructs a new object.
* @param modality The modality of this input.
*/
MultiModalInput(const ModalityType modality);
MultiModalData(const ModalityType modality);

/**
* Destroys the object.
*/
virtual ~MultiModalInput();
virtual ~MultiModalData();

/**
* Retrieves a string identifying the modality of this input.
Expand All @@ -54,4 +53,4 @@ class MultiModalInput
} // namespace voiceinteraction
} // namespace w3c

#endif // !defined(MULTI_MODAL_INPUT_H)
#endif // !defined(MULTI_MODAL_DATA_H)
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@
* [1] https://www.w3.org/Consortium/Legal/copyright-software
*/

#if !defined(MULTIMODALINPUTS_H)
#define MULTIMODALINPUTS_H
#if !defined(MULTIMODALDATACOLLECTION_H)
#define MULTIMODALDATACOLLECTION_H

#include <memory>
#include <map>
#include <list>

#include "ModalityType.h"
#include "MultiModalInput.h"
#include "MultiModalData.h"
#include "Language.h"

namespace w3c {
Expand All @@ -30,56 +30,61 @@ namespace ipa {
* allows for only on input per modality.
* @author Dirk Schnelle-Walka
*/
class MultiModalInputs
{
class MultiModalDataCollection {

public:
/**
* Constructs an empty set of multimodal inputs.
*/
MultiModalInputs();
MultiModalDataCollection();

/**
* Destroys the set of multimodal inputs.
*/
virtual ~MultiModalInputs();
virtual ~MultiModalDataCollection();

/**
* Adds a multimodal input to the set. An existing entry for the
* provided modality will be overwritten.
* @param input The multimodal input to add.
*/
void addMultiModalInput(const std::shared_ptr<MultiModalInput>& input);
void addMultiModalData(const std::shared_ptr<MultiModalData>& input);

/**
* Returns the multimodal input for the given modality.
* Returns the multimodal data elements for the given modality.
* @param modality The modality to return.
* @return The multimodal input for the given modality, {@code nullptr} if
* @return The multimodal data for the given modality, {@code nullptr} if
* the modality could not be found.
*/
std::shared_ptr<MultiModalInput> getMultiModalInput(const ModalityType& modality) const;
std::shared_ptr<MultiModalData> getMultiModalData(const ModalityType& modality) const;

/**
* Retrieves all modality types provided in this input.
* Retrieves all modality types provided in this multimodal data collection.
* @return list of all modality types
*/
std::list<ModalityType> getInputModalities() const;
std::list<ModalityType> getMultiModalData() const;

/**
* Retrieves all languages supported by the input.
* Retrieves all languages supported by this multimodal data collection.
* @return list of all supported languages
*/
std::list<Language> getInputLanguages() const;

private:
/**
* The map of known multimodal inputs.
* Retrieves all modality types provided.
* @return list of all modality types
*/
std::list<ModalityType> getModalityTypes() const;

private:
/**
* The map of known multimodal data elements.
*/
std::map<ModalityType, std::shared_ptr<MultiModalInput>> inputs;
std::map<ModalityType, std::shared_ptr<MultiModalData>> multiModalData;
};

} // namespace ipa
} // namespace voiceinteraction
} // namespace w3c

#endif // !defined(MULTIMODALINPUTS_H)
#endif // !defined(MULTIMODALDATACOLLECTION_H)
Loading

0 comments on commit 88de619

Please sign in to comment.