diff --git a/3rdparty/voice_text/.gitignore b/3rdparty/voice_text/.gitignore deleted file mode 100644 index 46bf7136a..000000000 --- a/3rdparty/voice_text/.gitignore +++ /dev/null @@ -1 +0,0 @@ -src/voice_text.cpp diff --git a/3rdparty/voice_text/CMakeLists.txt b/3rdparty/voice_text/CMakeLists.txt index ef07dcceb..74441c5a4 100644 --- a/3rdparty/voice_text/CMakeLists.txt +++ b/3rdparty/voice_text/CMakeLists.txt @@ -1,11 +1,16 @@ -cmake_minimum_required(VERSION 2.8.3) +cmake_minimum_required(VERSION 3.5.1) project(voice_text) + +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-write-strings") + find_package(catkin REQUIRED COMPONENTS dynamic_reconfigure roscpp message_generation) -find_package(Boost REQUIRED COMPONENTS filesystem) +find_package(Boost REQUIRED COMPONENTS + filesystem) generate_dynamic_reconfigure_options( cfg/VoiceText.cfg @@ -20,57 +25,19 @@ generate_messages() catkin_package(CATKIN_DEPENDS message_runtime) -file(GLOB VT_ROOT /usr/vt/*/*) -if(NOT VT_ROOT) - message(WARNING "VoiceText directory should be /usr/vt/*/* (e.g., /usr/vt/sayaka/M16) but is not found") - set(VT_ROOT /usr/vt/sayaka/M16) # default value for following configure_file -else() - if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "4") - set(VT_LIB_PATH_OLD ${VT_ROOT}/bin/x86_32/RAMIO/libvt_jpn.so) # e.g., /usr/vt/sayaka/M16/bin/x86_32/RAMIO/libvt_jpn.so - set(VT_LIB_PATH_NEW ${VT_ROOT}/bin/LINUX32_GLIBC3/RAMIO/libvt_jpn.so) # e.g., /usr/vt/risa/H16/bin/LINUX32_GLIBC3/RAMIO/libvt_jpn.so - elseif("${CMAKE_SIZEOF_VOID_P}" STREQUAL "8") - set(VT_LIB_PATH_OLD ${VT_ROOT}/bin/x86_64/RAMIO/libvt_jpn.so) # e.g., /usr/vt/sayaka/M16/bin/x86_64/RAMIO/libvt_jpn.so - set(VT_LIB_PATH_NEW ${VT_ROOT}/bin/LINUX64_GLIBC3/RAMIO/libvt_jpn.so) # e.g., /usr/vt/risa/H16/bin/LINUX64_GLIBC3/RAMIO/libvt_jpn.so - endif() - if(EXISTS ${VT_LIB_PATH_OLD}) - set(VT_LIB_PATH ${VT_LIB_PATH_OLD}) - else() - if(EXISTS ${VT_LIB_PATH_NEW}) - set(VT_LIB_PATH ${VT_LIB_PATH_NEW}) - endif() - endif() - if(VT_LIB_PATH) - message(WARNING "VoiceText library is found at ${VT_LIB_PATH}") - else() - message(WARNING "VoiceText library is not found at ${VT_LIB_PATH_OLD} or ${VT_LIB_PATH_NEW}") - endif() -endif() -configure_file(src/voice_text.cpp.in ${PROJECT_SOURCE_DIR}/src/voice_text.cpp) - - include_directories( - ${Boost_INCLUDE_DIRS} - ${catkin_INCLUDE_DIRS} - ) - add_executable(voice_text src/voice_text.cpp) - add_dependencies(voice_text ${PROJECT_NAME}_generate_messages_cpp ${PROJECT_NAME}_gencfg) - set_target_properties(voice_text PROPERTIES COMPILE_FLAGS -D_REENTRANT) +include_directories( + include + ${Boost_INCLUDE_DIRS} + ${catkin_INCLUDE_DIRS} +) -if(NOT VT_LIB_PATH) - message(WARNING "Building dummy library") - add_library(vt_dummy src/dummy/vt_dummy.cpp) - set_target_properties(vt_dummy PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}) - set_target_properties(vt_dummy PROPERTIES LIBRARY_OUTPUT_NAME vt_jpn) - set_target_properties(voice_text PROPERTIES COMPILE_FLAGS -DUSE_DUMMY_INCLUDE) - set(VT_LIB_PATH ${PROJECT_BINARY_DIR}/libvt_jpn.so) -endif() +add_executable(voice_text src/voice_text.cpp src/vt_handler.cpp) +add_dependencies(voice_text ${PROJECT_NAME}_generate_messages_cpp ${PROJECT_NAME}_gencfg) +set_target_properties(voice_text PROPERTIES COMPILE_FLAGS -D_REENTRANT) - target_link_libraries(voice_text - ${catkin_LIBRARIES} - ${VT_LIB_PATH} -lm -lpthread - ) -if(NOT EXISTS ${VT_LIB_PATH}) - add_dependencies(voice_text vt_dummy) -endif() +target_link_libraries(voice_text + ${catkin_LIBRARIES} -lm -lpthread -ldl +) install(TARGETS voice_text # do not install vt_dummy target, that should be installed from voice_text library ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} diff --git a/3rdparty/voice_text/README.md b/3rdparty/voice_text/README.md index ed1159ecf..bea0daf47 100644 --- a/3rdparty/voice_text/README.md +++ b/3rdparty/voice_text/README.md @@ -5,9 +5,20 @@ ROS Interface for HOYA VoiceText Speech Synthesis Engine ## Installation -1. Install VoiceText SDK -2. Put license file -3. Build this package +### 1. Install VoiceText SDK +#### If you have voicetext sdk install binary, please follow the official guide and install both engine and SDK +#### If you don't have the sdk install binary but have ReadSpeaker API binary, please follow the guide below. +1. Install VoiceText Engine by official guide +2. Copy VoiceText API binaries to VoiceText binary directory + VoiceText API package includes binary libraries and header file. You have to copy those of them to specific directory by executing following commands. + ```bash + cd /path_to_api_package_directory # e.g. cd ~/Downloads/RS_VTAPI_SDK_Linux_4.3.0.2/20201113_VTAPI4.3.0.2_LINUX + cd bin/x64 # You have to cd x86 if your system is x86 architecture + # Assuming VoiceText engine's talker is hikari, type is D16. If it is different, please set appropriate directory. + sudo cp -a * /usr/vt/hikari/D16/bin # Don't forget to add -a not to break symbolic link. + ``` +### 2. Put license file +### 3. Build this package ```bash cd /path/to/catkin_workspace diff --git a/3rdparty/voice_text/include/vt_handler.h b/3rdparty/voice_text/include/vt_handler.h new file mode 100644 index 000000000..f52b20dd3 --- /dev/null +++ b/3rdparty/voice_text/include/vt_handler.h @@ -0,0 +1,105 @@ +/* + * vt_handler.h + * Author: Yoshiki Obinata + */ + +#ifndef VT_HANDLER_H_ +#define VT_HANDLER_H_ + +#include +#include +#include +#include +#include + +// logging +#include + +#include "vt_jpn.h" +#include "vtapi.h" + +#if __x86_64__ || __ppc64__ +#define ENV64 +#else +#define ENV32 +#endif + +typedef enum VT_TYPE{ +NO_VT, +VT_SDK, +VT_API +} VT_Types; + +namespace fs = boost::filesystem; + +class VTHandler{ + public: + VTHandler(const std::string license_path, const std::string db_path); + ~VTHandler(); + bool VTH_TextToFile(const int pitch, const int speed, const int volume, const int pause, + const std::string text, const std::string wave_path); + + private: + void* dl_handle; + VT_Types vt_type; + + // define handle + bool LoadSym(); + + // Load symbols + // Related to VoiceText SDK + std::vector VTSDK_func_ = { + "VT_LOADTTS_JPN", + "VT_UNLOADTTS_JPN", + "VT_GetTTSInfo_JPN", + "VT_TextToFile_JPN" + }; + + // Related to ReadSpeaker API + std::vector VTAPI_func_ = { + "VTAPI_Init", + "VTAPI_CreateHandle", + "VTAPI_SetLicenseFolder", + "VTAPI_GetEngine", + "VTAPI_SetEngineHandle", + "VTAPI_SetAttr", + "VTAPI_SetOutputFile", + "VTAPI_TextToFile", + "VTAPI_GetLastErrorInfo", + "VTAPI_ReleaseHandle", + "VTAPI_UnloadEngine", + "VTAPI_Exit" + }; + + // symbol map + std::map VTSDK_s_map_; + std::map VTAPI_s_map_; + + // Load Functions + // Related to VoiceText SDK + short (*VT_LOADTTS_JPN)(HWND, int, char*, char*); + void (*VT_UNLOADTTS_JPN)(int); + int (*VT_GetTTSInfo_JPN)(int, char*, void*, int); + short (*VT_TextToFile_JPN)(int, char*, char*, int, int, int, int, int, int, int); + + // Related to ReadSpeaker API + int (*VTAPI_Init)(char*); + VTAPI_HANDLE (*VTAPI_CreateHandle)(); + void (*VTAPI_SetLicenseFolder)(char*); + VTAPI_ENGINE_HANDLE (*VTAPI_GetEngine)(char*, char*); + int (*VTAPI_SetEngineHandle)(VTAPI_HANDLE, VTAPI_ENGINE_HANDLE); + int (*VTAPI_SetAttr)(VTAPI_HANDLE, int, int); + int (*VTAPI_SetOutputFile)(VTAPI_HANDLE, char*, int); + int (*VTAPI_TextToFile)(VTAPI_HANDLE, void*, int, int); + VTAPI_ERRS_INFO* (*VTAPI_GetLastErrorInfo)(VTAPI_HANDLE); + void (*VTAPI_ReleaseHandle)(VTAPI_HANDLE); + int (*VTAPI_UnloadEngine)(VTAPI_ENGINE_HANDLE); + void (*VTAPI_Exit)(); + + // ReadSpeaker API handler + VTAPI_HANDLE hVTAPI; + VTAPI_ENGINE_HANDLE hEngine; +}; + + +#endif // VT_HANDLER_H_ diff --git a/3rdparty/voice_text/include/vt_jpn.h b/3rdparty/voice_text/include/vt_jpn.h new file mode 100644 index 000000000..6a8000d23 --- /dev/null +++ b/3rdparty/voice_text/include/vt_jpn.h @@ -0,0 +1,294 @@ +/* +* Copyright (c) 2004 Voiceware Co., Ltd., All rights reserved. +* +* VoiceText +*/ + +#ifndef VT_JPN_H +#define VT_JPN_H + +#if defined(__cplusplus) + extern "C" { +#endif + +#if !defined(VT_BASIC_DEFINE) + #if defined(WIN32) + #if !defined(_DllMode) + #define _DllMode(_type_) __declspec( dllimport ) _type_ + #endif + #else + #if !defined(_DllMode) + #define _DllMode(_type_) extern _type_ + #endif + typedef int HWND; + #endif +#endif + + + + +/*===========================================================================*/ +/* Text format (used in texttype) */ +#if !defined(VT_BASIC_DEFINE) + #if !defined(VT_TEXT_FMT_PLAIN_TEXT) + #define VT_TEXT_FMT_PLAIN_TEXT 0 + #endif + + #if !defined(VT_TEXT_FMT_JEITA) + #define VT_TEXT_FMT_JEITA 4 + #endif + + #if !defined(VT_TEXT_FMT_JEITA_PLUS) + #define VT_TEXT_FMT_JEITA_PLUS 6 + #endif +#endif + + + +/*===========================================================================*/ +/* LOAD & UNLOAD */ +#if !defined(VT_BASIC_DEFINE) + /* Return Value */ + #define VT_LOADTTS_SUCCESS 0 + #define VT_LOADTTS_ERROR_CONFLICT_DBPATH 1 + #define VT_LOADTTS_ERROR_TTS_STRUCTURE 2 + #define VT_LOADTTS_ERROR_TAGGER 3 + #define VT_LOADTTS_ERROR_BREAK_INDEX 4 + #define VT_LOADTTS_ERROR_TPP_DICT 5 + #define VT_LOADTTS_ERROR_TABLE 6 + #define VT_LOADTTS_ERROR_UNIT_INDEX 7 + #define VT_LOADTTS_ERROR_PROSODY_DB 8 + #define VT_LOADTTS_ERROR_PCM_DB 9 + #define VT_LOADTTS_ERROR_PM_DB 10 + #define VT_LOADTTS_ERROR_UNKNOWN 11 +#endif + +_DllMode(short) VT_LOADTTS_JPN(HWND hWnd, int nSpeakerID, char *db_path, char *licensefile); +_DllMode(void) VT_UNLOADTTS_JPN(int nSpeakerID); + + + +/*===========================================================================*/ +/* Load/Unload UserDict API */ +#if !defined(VT_BASIC_DEFINE) + /* Return Value */ + #define VT_LOAD_USERDICT_SUCCESS (1) + #define VT_LOAD_USERDICT_ERROR_INVALID_INDEX (-1) + #define VT_LOAD_USERDICT_ERROR_INDEX_BUSY (-2) + #define VT_LOAD_USERDICT_ERROR_LOAD_FAIL (-3) + #define VT_LOAD_USERDICT_ERROR_UNKNOWN (-4) + + #define VT_UNLOAD_USERDICT_SUCCESS (1) + #define VT_UNLOAD_USERDICT_ERROR_NULL_INDEX (-1) + #define VT_UNLOAD_USERDICT_ERROR_INVALID_INDEX (-2) + #define VT_UNLOAD_USERDICT_ERROR_UNKNOWN (-3) +#endif + +_DllMode(short) VT_LOAD_UserDict_JPN(int dictidx, char *filename); +_DllMode(short) VT_UNLOAD_UserDict_JPN(int dictidx); + + + +/*===========================================================================*/ +/* SOUND CARD API */ +#if !defined(VT_BASIC_DEFINE) + /* Return Value */ + #define VT_PLAY_API_SUCCESS (1) + #define VT_PLAY_API_ERROR_CREATE_THREAD (-1) + #define VT_PLAY_API_ERROR_NULL_TEXT (-2) + #define VT_PLAY_API_ERROR_EMPTY_TEXT (-3) + #define VT_PLAY_API_ERROR_DB_NOT_LOADED (-4) + #define VT_PLAY_API_ERROR_INITPLAY (-5) + #define VT_PLAY_API_ERROR_UNKNOWN (-6) +#endif + +#if defined(WIN32) + _DllMode(short) VT_PLAYTTS_JPN(HWND hcaller, UINT umsg, char *text_buff, int nSpeakerID, int pitch, int speed, int volume, int pause, int dictidx, int texttype); + _DllMode(void) VT_STOPTTS_JPN(void); + _DllMode(void) VT_RESTARTTTS_JPN(void); + _DllMode(void) VT_PAUSETTS_JPN(void); +#endif + + + +/*===========================================================================*/ +/* FILE WRITE API */ +#if !defined(VT_BASIC_DEFINE) + /* Return Value */ + #define VT_FILE_API_SUCCESS (1) + #define VT_FILE_API_ERROR_INVALID_FORMAT (-1) + #define VT_FILE_API_ERROR_CREATE_THREAD (-2) + #define VT_FILE_API_ERROR_NULL_TEXT (-3) + #define VT_FILE_API_ERROR_EMPTY_TEXT (-4) + #define VT_FILE_API_ERROR_DB_NOT_LOADED (-5) + #define VT_FILE_API_ERROR_OUT_FILE_OPEN (-6) + #define VT_FILE_API_ERROR_UNKNOWN (-7) + + /* Audio Format */ + enum { + VT_FILE_API_FMT_S16PCM = 0, + VT_FILE_API_FMT_ALAW = 1, + VT_FILE_API_FMT_MULAW = 2, + VT_FILE_API_FMT_DADPCM = 3, + VT_FILE_API_FMT_S16PCM_WAVE = 4, + VT_FILE_API_FMT_U08PCM_WAVE = 5, + // VT_FILE_API_FMT_IMA_WAVE = 6, /* not supported! */ + VT_FILE_API_FMT_ALAW_WAVE = 7, + VT_FILE_API_FMT_MULAW_WAVE = 8, + VT_FILE_API_FMT_MULAW_AU = 9, + }; +#endif + +_DllMode(short) VT_TextToFile_JPN(int fmt, char *tts_text, char *filename, int nSpeakerID, int pitch, int speed, int volume, int pause, int dictidx, int texttype); + + + +/*===========================================================================*/ +/* BUFFER I/O API */ +#if !defined(VT_BASIC_DEFINE) + /* Return Value */ + #define VT_BUFFER_API_PROCESSING (0) + #define VT_BUFFER_API_DONE (1) + #define VT_BUFFER_API_ERROR_INVALID_FORMAT (-1) + #define VT_BUFFER_API_ERROR_CREATE_THREAD (-2) + #define VT_BUFFER_API_ERROR_NULL_TEXT (-3) + #define VT_BUFFER_API_ERROR_EMPTY_TEXT (-4) + #define VT_BUFFER_API_ERROR_NULL_BUFFER (-5) + #define VT_BUFFER_API_ERROR_DB_NOT_LOADED (-6) + #define VT_BUFFER_API_ERROR_THREAD_BUSY (-7) + #define VT_BUFFER_API_ERROR_ABNORMAL_CONDITION (-8) + #define VT_BUFFER_API_ERROR_UNKNOWN (-9) + + /* Audio Format */ + enum { + VT_BUFFER_API_FMT_S16PCM = VT_FILE_API_FMT_S16PCM, + VT_BUFFER_API_FMT_ALAW = VT_FILE_API_FMT_ALAW, + VT_BUFFER_API_FMT_MULAW = VT_FILE_API_FMT_MULAW, + VT_BUFFER_API_FMT_DADPCM = VT_FILE_API_FMT_DADPCM, + }; +#endif + +_DllMode(int) VT_TextToBuffer_JPN(int fmt, char *tts_text, char *output_buff, int *output_len, int flag, int nThreadID, int nSpeakerID, int pitch, int speed, int volume, int pause, int dictidx, int texttype); + + + +/*===========================================================================*/ +/* CONFIGURE API */ +_DllMode(void) VT_SetPitchSpeedVolumePause_JPN(int pitch, int speed, int volume, int pause, int nSpeakerID); +_DllMode(void) VT_SetCommaPause_JPN(int pause, int nSpeakerID); + + + +/*=========================================================================== +SYNOPSIS + int VT_GetTTSInfo_JPN(int request, char *licensefile, void *value, int valuesize); + +PARAMETERS + request + VT_BUILD_DATE (char*): library build date + VT_VERIFY_CODE (int *): verification result(licensefile is required) + VT_MAX_CHANNEL (int *): max no. of possible channels(licensefile is required) + VT_DB_DIRECTORY (char*): default root DB fold name + VT_LOAD_SUCCESS_CODE (int *): return value, when db loading is success + VT_MAX_SPEAKER (int *): max no. of speaker ( >= 0 ) + VT_DEF_SPEAKER (int *): default speaker id ( >= 0 && < max no. of speaker ) + VT_CODEPAGE (int *): supported ansi codepage (WIN32 only) + VT_DB_ACCESS_MODE (int *): file or ram i/o ? (file:0, ram:1) + VT_FIXED_POINT_SUPPORT (int *): fixed point simulated or not? (float:0, fixed:1) + VT_SAMPLING_FREQUENCY (int *): current sampling frequency (8000, 11025, 16000 ) + VT_MAX_PITCH_RATE (int *): max value of pitch rate (%) + VT_DEF_PITCH_RATE (int *): default value of pitch rate (%) + VT_MIN_PITCH_RATE (int *): min value of pitch rate (%) + VT_MAX_SPEED_RATE (int *): max value of speed rate (%) + VT_DEF_SPEED_RATE (int *): default value of speed rate (%) + VT_MIN_SPEED_RATE (int *): min value of speed rate (%) + VT_MAX_VOLUME (int *): max value of volume (%) + VT_DEF_VOLUME (int *): default value of volume (%) + VT_MIN_VOLUME (int *): min value of volume (%) + VT_MAX_SENT_PAUSE (int *): max value of sentence pause (msec) + VT_DEF_SENT_PAUSE (int *): default value of sentence pause (msec) + VT_MIN_SENT_PAUSE (int *): min value of sentence pause (msec) + VT_DB_BUILD_DATE (char*): embedded db build date (for embedded engine only) + VT_MAX_COMMA_PAUSE (int *): max value of comma pause (msec) + VT_DEF_COMMA_PAUSE (int *): default value of comma pause (msec) + VT_MIN_COMMA_PAUSE (int *): min value of comma pause (msec) + + licensefile + if NULL, use default licensefile. + + value + VT_DB_DIRECTORY and VT_BUILD_DATE requests are (char *), and any other request is (int *) + + valuesize + maximum length of value in characters + +RETURN VALUE + On success, zero(VT_INFO_SUCCESS) is returned. + On error, the return value depends on the operation: + VT_INFO_ERROR_NOT_SUPPORTED_REQUEST (1) + VT_INFO_ERROR_INVALID_REQUEST (2) + VT_INFO_ERROR_NULL_VALUE (3) + VT_INFO_ERROR_SHORT_LENGTH_VALUE (4) + VT_INFO_ERROR_UNKNOWN (5) +===========================================================================*/ + +#if !defined(VT_BASIC_DEFINE) + /* Return Value */ + #define VT_INFO_SUCCESS (0) + #define VT_INFO_ERROR_NOT_SUPPORTED_REQUEST (1) + #define VT_INFO_ERROR_INVALID_REQUEST (2) + #define VT_INFO_ERROR_NULL_VALUE (3) + #define VT_INFO_ERROR_SHORT_LENGTH_VALUE (4) + #define VT_INFO_ERROR_UNKNOWN (5) + + /* Request */ + enum + { + VT_BUILD_DATE = 0, + VT_VERIFY_CODE = 1, + VT_MAX_CHANNEL = 2, + VT_DB_DIRECTORY = 3, + VT_LOAD_SUCCESS_CODE = 4, + VT_MAX_SPEAKER = 5, + VT_DEF_SPEAKER = 6, + VT_CODEPAGE = 7, + VT_DB_ACCESS_MODE = 8, + VT_FIXED_POINT_SUPPORT = 9, + VT_SAMPLING_FREQUENCY = 10, + VT_MAX_PITCH_RATE = 11, + VT_DEF_PITCH_RATE = 12, + VT_MIN_PITCH_RATE = 13, + VT_MAX_SPEED_RATE = 14, + VT_DEF_SPEED_RATE = 15, + VT_MIN_SPEED_RATE = 16, + VT_MAX_VOLUME = 17, + VT_DEF_VOLUME = 18, + VT_MIN_VOLUME = 19, + VT_MAX_SENT_PAUSE = 20, + VT_DEF_SENT_PAUSE = 21, + VT_MIN_SENT_PAUSE = 22, + VT_DB_BUILD_DATE = 23, + VT_MAX_COMMA_PAUSE = 24, + VT_DEF_COMMA_PAUSE = 25, + VT_MIN_COMMA_PAUSE = 26, + VT_MAX_SYMBOL_OPEN_PAUSE = 27, + VT_DEF_SYMBOL_OPEN_PAUSE = 28, + VT_MIN_SYMBOL_OPEN_PAUSE = 29, + VT_MAX_SYMBOL_CLOSE_PAUSE = 30, + VT_DEF_SYMBOL_CLOSE_PAUSE = 31, + VT_MIN_SYMBOL_CLOSE_PAUSE = 32, + }; +#endif + +_DllMode(int) VT_GetTTSInfo_JPN(int request, char *licensefile, void *value, int valuesize); + + +#if !defined(VT_BASIC_DEFINE) + #define VT_BASIC_DEFINE +#endif + +#if defined(__cplusplus) + } +#endif + +#endif /* VT_JPN_H */ diff --git a/3rdparty/voice_text/include/vtapi.h b/3rdparty/voice_text/include/vtapi.h new file mode 100644 index 000000000..9996df717 --- /dev/null +++ b/3rdparty/voice_text/include/vtapi.h @@ -0,0 +1,262 @@ +/* + * Copyright (c) 2000-2019 ReadSpeaker + * All Rights Reserved. + */ + +#ifndef _VTAPI_H_ +#define _VTAPI_H_ + +#if defined(__cplusplus) +extern "C" +{ +#endif + + +#if !defined(VTAPI_decl) +# if defined(WIN32) || defined(WINCE) +# define VTAPI_decl __declspec( dllexport ) +# define _CRTDBG_MAP_ALLOC +# include +# if defined(WINCE) +# else +# include +# endif + #else + #define VTAPI_decl extern + #endif +#endif + + +#if defined(WIN32) || defined(WINCE) +#pragma warning(disable:4996) +#include +#else +#if !defined(HWND_DEFINE) +#define HWND_DEFINE +typedef int HWND; +#endif +#if !defined(DWORD_DEFINE) +#define DWORD_DEFINE +typedef unsigned long DWORD; +#endif +#if !defined(UINT_DEFINE) +#define UINT_DEFINE +typedef unsigned int UINT; +#endif +#endif + +typedef struct VOICE_INFO* VTAPI_HANDLE; +typedef struct ENGINE_INFO* VTAPI_ENGINE_HANDLE; +#if 1 //defined(USE_NEW_USERDICT_MANAGER) +typedef struct USERDICT_INFO* VTAPI_USERDICT_HANDLE; +#endif + +#define VTAPI_VERSION "4.3.0.2" + +typedef enum OUTPUTFORMAT +{ + FORMAT_16PCM = 0, + FORMAT_8PCM = 1, + FORMAT_ALAW_PCM = 2, + FORMAT_MULAW_PCM = 3, + FORMAT_ADPCM_PCM = 4, + FORMAT_16PCM_WAV = 5, + FORMAT_8PCM_WAV = 6, + FORMAT_ALAW_WAV = 7, + FORMAT_MULAW_WAV = 8, + + FORMAT_MAX +} Output_Format; + +typedef enum TEXTTYPES +{ + TEXT_FORMAT_DEFAULT = 0, // multibyte + TEXT_FORMAT_UTF8 = 2 +} Text_Types; + +typedef enum AUDIOEVENTTYPES +{ + AUDIO_EVENT_NO_EVENTS = 0, + AUDIO_EVENT_START_INPUT_STREAM = 1, + AUDIO_EVENT_END_INPUT_STREAM = 2, + AUDIO_EVENT_VOICE_CHANGE = 4, + AUDIO_EVENT_TTS_MARK = 8, + AUDIO_EVENT_WORD_BOUNDARY = 16, + AUDIO_EVENT_SENTENCE_BOUNDARY = 32, + AUDIO_EVENT_BUFFER = 64, + AUDIO_EVENT_BUFFER_END = 128, + AUDIO_EVENT_FAILURE = 256, + AUDIO_EVENT_READY_PLAY = 512, + AUDIO_EVENT_ALL = 1023 +} Audio_Event_Types; + +typedef enum ATTRFLAGS +{ + ATTR_PITCH = 0, + ATTR_SPEED = 1, + ATTR_VOLUME = 2, + ATTR_PAUSE = 3, + ATTR_COMMAPAUSE = 5 +} Attr_Flags; + +typedef enum PRIORITYTYPES +{ + PRIOR_DEFAULT = 0, + PRIOR_OVERLAP = 1, + PRIOR_ALERT = 2, +} Priority_Types; + +#define MAX_STR_LEN 256 +#define MAX_ERR_MSG 512 + +typedef struct _audio_format +{ + int nChannel; + int nBits; + int nSampling; +} VTAPI_AUDIO_FORMAT; + +typedef struct _audio_info +{ + VTAPI_AUDIO_FORMAT audiofmt; + int nDevNo; +} VTAPI_AUDIO_INFO; + +typedef struct +{ + int nId; + int nTextStartOffset; // Text position of Word or Sentence or Mark + int nTextEndOffset; // Text position of Word or Sentence or Mark + char szStrValue[MAX_STR_LEN]; // Name of Mark + + int nOffsetInStream; // Buffer position of Mark + int nOffsetInBuffer; // Buffer position of Mark + + int nError; // Error // VTAPI4 Version over 4.2.2.x + + int nBufferSize; // Buffer size of Word or FrameBuffer + char* pFrameBuffer; // FrameBuffer +} VTAPI_AUDIO_EVENT; + + +typedef struct _vtapi_errs +{ + int nErr; + char szMsg[MAX_ERR_MSG]; +} VTAPI_ERRS_INFO; + + +#define VTAPI_SUCCESS 0 + +#define VTAPI_INVALID_PARAM_ERROR -1 +#define VTAPI_INVALID_VALUE_ERROR -2 +#define VTAPI_INCORRECT_SET_ERROR -3 +#define VTAPI_INVALID_ID_OR_THREAD_ERROR -4 +#define VTAPI_USERDICT_ERROR -5 +#define VTAPI_SSML_SYNTAX_ERROR -6 +#define VTAPI_MEMORY_ALLOC_ERROR -7 +#define VTAPI_NOT_OPENBUFFER_ERROR -8 +#define VTAPI_VTSSML_INVALID_ERROR -9 +#define VTAPI_INVALID_ENGINE_ERROR -10 +#define VTAPI_OVER_CHANNEL_ERROR -11 +#define VTAPI_NODATA_ERROR -12 +#define VTAPI_NOT_SUPPORTED_ERROR -13 + +#define VTAPI_DLL_VTPLAY_NOT_LINKING -21 +#define VTAPI_DLL_VTSSML_NOT_LINKING -22 +#define VTAPI_DLL_VTSAVE_NOT_LINKING -23 +#define VTAPI_DLL_VTCONV_NOT_LINKING -24 +#define VTAPI_DLL_VTEFFECT_NOT_LINKING -25 + + + +// API for VTAPI + +// VTAPI_INIT Return Value : You can check by XOR the lower values. +// If all Library links succeed, 0 or 0x11000 +#define VTAPI_LIB_VTPLAY_NOT_LINKING 0x1 +#define VTAPI_LIB_VTSAVE_NOT_LINKING 0x10 +#define VTAPI_LIB_VTCONV_NOT_LINKING 0x100 +#define VTAPI_LIB_VTSSML_NOT_LINKING 0x1000 + +VTAPI_decl int VTAPI_Init(char *szWorkingDir); // It is automatically Initiated when using other functions. +VTAPI_decl void VTAPI_Exit(); +VTAPI_decl void VTAPI_SetLicenseFolder(char *szPath); + +// API for VTAPI-Voice +VTAPI_decl VTAPI_HANDLE VTAPI_CreateHandle(); +VTAPI_decl void VTAPI_ReleaseHandle(VTAPI_HANDLE hVTAPI); + + +VTAPI_decl int VTAPI_SetEngineHandle(VTAPI_HANDLE hVTAPI, VTAPI_ENGINE_HANDLE hEngine); +VTAPI_decl VTAPI_ENGINE_HANDLE VTAPI_GetEngineHandle(VTAPI_HANDLE hVTAPI); + +VTAPI_decl int VTAPI_SetOutputAudio(VTAPI_HANDLE hVTAPI, VTAPI_AUDIO_INFO stAudioInfo); +VTAPI_decl int VTAPI_SetOutputFile(VTAPI_HANDLE hVTAPI, char *pszFileName, int nOutputFormat); +VTAPI_decl int VTAPI_SetOutputBuffer(VTAPI_HANDLE hVTAPI, int nOutputFormat); + +VTAPI_decl int VTAPI_SetAttr(VTAPI_HANDLE hVTAPI, int nFlag, int nValue); +VTAPI_decl int VTAPI_GetAttr(VTAPI_HANDLE hVTAPI, int nFlag, int *nValue); + + +#if !defined(__VTAPI_DEFINED_CALLBACK_FUNCTION__) +#define __VTAPI_DEFINED_CALLBACK_FUNCTION__ +typedef void (*LPPEventProc)(VTAPI_HANDLE hVTAPI, int nEventType, void *pParam); +#endif + +VTAPI_decl int VTAPI_SetEvent(VTAPI_HANDLE hVTAPI, void *pEventProc, int nEventTypes); + +#if defined(WIN32) || defined(WINCE) +// API for Audio Output (VTAPI_Speak - File Output included) +VTAPI_decl int VTAPI_Speak(VTAPI_HANDLE hVTAPI, void *pszText, int nTextLength, int nTextType); +VTAPI_decl int VTAPI_Stop(VTAPI_HANDLE hVTAPI); +VTAPI_decl int VTAPI_Pause(VTAPI_HANDLE hVTAPI); +VTAPI_decl int VTAPI_Resume(VTAPI_HANDLE hVTAPI); +VTAPI_decl int VTAPI_SetPriority(VTAPI_HANDLE hVTAPI, int nPrior); +#endif + +// API for FileSave +VTAPI_decl int VTAPI_TextToFile(VTAPI_HANDLE hVTAPI, void *pszText, int nTextLength, int nTextType); + +// API for Buffer +typedef enum BUFSIZE_VALUE +{ + VAL_DEFAULT = -1, + VAL_ONEBUF = 0, +} BUFSIZE_VALUE; +VTAPI_decl int VTAPI_BufferOpen(VTAPI_HANDLE hVTAPI, void *pszText, int nTextLength, int nTextType, int nBufferSize); +VTAPI_decl int VTAPI_GetBuffer(VTAPI_HANDLE hVTAPI, char** pFrameBuffer); +VTAPI_decl void VTAPI_FreeBuffer(char* ptr); +VTAPI_decl int VTAPI_BufferClose(VTAPI_HANDLE hVTAPI); + +VTAPI_decl VTAPI_ERRS_INFO *VTAPI_GetLastErrorInfo(VTAPI_HANDLE hVTAPI); + +VTAPI_decl int VTAPI_SetUserData(VTAPI_HANDLE hVTAPI, void *ptr); +VTAPI_decl void *VTAPI_GetUserData(VTAPI_HANDLE hVTAPI); + +VTAPI_decl void VTAPI_GetVersion(char output[100]); + +// API for tts engine +VTAPI_decl VTAPI_ENGINE_HANDLE VTAPI_GetEngine(char *pszSpeaker, char *pszType) ; +// support for not installed engines +VTAPI_decl VTAPI_ENGINE_HANDLE VTAPI_AddNewEngineInfo(char *pszSpeaker, char *pszType, int nSpeakerID, char *pszDBPath, char *pszLang, char *pszGender, int nCodePage, char *pszISOCode, char *pszVendor, int nSamplingRate); + +VTAPI_decl int VTAPI_GetEngineInfoFieldEx(VTAPI_ENGINE_HANDLE hEngine, char *pszSpeaker, char *pszType,char *pszDllPath, char *pszDBPath, char *pszLang, char *pszGender, char *pszVersion, int *nSampling, int *nChannel, char *pszISOCode); + +VTAPI_decl int VTAPI_LoadEngine(VTAPI_ENGINE_HANDLE hEngine); +VTAPI_decl int VTAPI_UnloadEngine(VTAPI_ENGINE_HANDLE hEngine); +VTAPI_decl int VTAPI_GetEngineVersion(VTAPI_ENGINE_HANDLE hEngine, char output[100]); + +#if 1 //defined(USE_NEW_USERDICT_MANAGER) +VTAPI_decl VTAPI_USERDICT_HANDLE VTAPI_CreateUserDictHandle(char* pszDicFile); +VTAPI_decl void VTAPI_ReleaseUserDictHandle(VTAPI_USERDICT_HANDLE hUSERDICT); +VTAPI_decl int VTAPI_SetUserDictHandle(VTAPI_HANDLE hVTAPI, VTAPI_USERDICT_HANDLE hUSERDICT); +VTAPI_decl VTAPI_USERDICT_HANDLE VTAPI_GetUserDictHandle(VTAPI_HANDLE hVTAPI); +VTAPI_decl int VTAPI_GetUserDictInfo(VTAPI_USERDICT_HANDLE hUSERDICT, char* pszDicFile); +#endif + +#if defined(__cplusplus) +} +#endif + +#endif /* _VTAPI_H_ */ diff --git a/3rdparty/voice_text/launch/voice_text.launch b/3rdparty/voice_text/launch/voice_text.launch index 95e7d781f..54413b5a5 100644 --- a/3rdparty/voice_text/launch/voice_text.launch +++ b/3rdparty/voice_text/launch/voice_text.launch @@ -14,7 +14,8 @@ machine="$(arg voice_text_machine)"> - # db_path: /usr/vt/sayaka/M16 # Commented out to support other speakers than SAYAKA + db_path: /usr/vt/sayaka/M16 # Commented out to support other speakers than SAYAKA + license_path: /usr/vt/sayaka/M16/data-common/verify/verification.txt pitch: 100 speed: 100 volume: 100 diff --git a/3rdparty/voice_text/package.xml b/3rdparty/voice_text/package.xml index 2fd9e303a..b9df97f21 100644 --- a/3rdparty/voice_text/package.xml +++ b/3rdparty/voice_text/package.xml @@ -1,14 +1,16 @@ voice_text 2.1.24 - voice_text (www.voicetext.jp) + The ROS package for VoiceText, ReadSpeaker (https://readspeaker.jp/) Kei Okada + Yoshiki Obinata HOYA License http://ros.org/wiki/voice_text Kei Okada + Yoshiki Obinata catkin roscpp diff --git a/3rdparty/voice_text/src/dummy/vt_dummy.cpp b/3rdparty/voice_text/src/dummy/vt_dummy.cpp deleted file mode 100644 index 0add52d66..000000000 --- a/3rdparty/voice_text/src/dummy/vt_dummy.cpp +++ /dev/null @@ -1,13 +0,0 @@ -#include "vt_dummy.h" -#include - -void VT_UNLOADTTS_JPN(int) {}; -int VT_LOADTTS_JPN(int, int, char*, char*) { - fprintf(stderr, "LOADING DUMMY VT_LOADTTS_JPN\n"); - fprintf(stderr, "You need to install voice_text application\n"); - fprintf(stderr, "exiting....\n"); - return -1; -}; -int VT_TextToFile_JPN(int, char *, char *, int, int, int, int, int, int, int) {}; - -void VT_GetTTSInfo_JPN(int , char *, void *, int) {}; diff --git a/3rdparty/voice_text/src/dummy/vt_dummy.h b/3rdparty/voice_text/src/dummy/vt_dummy.h deleted file mode 100644 index 3d7742967..000000000 --- a/3rdparty/voice_text/src/dummy/vt_dummy.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef __VT_DUMMY_H__ -#define __VT_DUMMY_H__ -extern "C" { - void VT_UNLOADTTS_JPN(int); - int VT_LOADTTS_JPN(int, int, char*, char*); - int VT_TextToFile_JPN(int, char *, char *, int, int, int, int, int, int, int); - void VT_GetTTSInfo_JPN(int , char *, void *, int); - int VT_LOADTTS_SUCCESS = 0; - int VT_FILE_API_SUCCESS = 0; - int VT_FILE_API_FMT_S16PCM_WAVE = 4; // https://pastebin.com/9LeCr2HN -} - -enum - { - VT_BUILD_DATE = 0, - VT_VERIFY_CODE = 1, - VT_MAX_CHANNEL = 2, - VT_DB_DIRECTORY = 3, - VT_LOAD_SUCCESS_CODE = 4, - VT_MAX_SPEAKER = 5, - VT_DEF_SPEAKER = 6, - VT_CODEPAGE = 7, - VT_DB_ACCESS_MODE = 8, - VT_FIXED_POINT_SUPPORT = 9, - VT_SAMPLING_FREQUENCY = 10, - VT_MAX_PITCH_RATE = 11, - VT_DEF_PITCH_RATE = 12, - VT_MIN_PITCH_RATE = 13, - VT_MAX_SPEED_RATE = 14, - VT_DEF_SPEED_RATE = 15, - VT_MIN_SPEED_RATE = 16, - VT_MAX_VOLUME = 17, - VT_DEF_VOLUME = 18, - VT_MIN_VOLUME = 19, - VT_MAX_SENT_PAUSE = 20, - VT_DEF_SENT_PAUSE = 21, - VT_MIN_SENT_PAUSE = 22, - VT_DB_BUILD_DATE = 23, - VT_MAX_COMMA_PAUSE = 24, - VT_DEF_COMMA_PAUSE = 25, - VT_MIN_COMMA_PAUSE = 26, - VT_MAX_SYMBOL_OPEN_PAUSE = 27, - VT_DEF_SYMBOL_OPEN_PAUSE = 28, - VT_MIN_SYMBOL_OPEN_PAUSE = 29, - VT_MAX_SYMBOL_CLOSE_PAUSE = 30, - VT_DEF_SYMBOL_CLOSE_PAUSE = 31, - VT_MIN_SYMBOL_CLOSE_PAUSE = 32, - }; - -#endif //__VT_DUMMY_H__ diff --git a/3rdparty/voice_text/src/voice_text.cpp b/3rdparty/voice_text/src/voice_text.cpp new file mode 100644 index 000000000..7a1ea5165 --- /dev/null +++ b/3rdparty/voice_text/src/voice_text.cpp @@ -0,0 +1,86 @@ +/* + * voice_text.cpp + * Author: Yuki Furuta , Yoshiki Obinata + */ + +#include + +#include +#include + +// ROS +#include +#include +#include +#include + +#include "vt_handler.h" + +namespace fs = boost::filesystem; + +class VoiceText { +public: + typedef voice_text::VoiceTextConfig Config; + + VoiceText() : nh_(), pnh_("~"), db_path_(""), license_path_(""), dyn_srv_(pnh_){ + pnh_.param("db_path", db_path_, ""); + pnh_.setParam("db_path", db_path_); // for backward compatibility (db_path is usually set previously) + pnh_.param("license_path", license_path_, ""); + + dynamic_reconfigure::Server::CallbackType f = + boost::bind(&VoiceText::config_callback, this, _1, _2); + dyn_srv_.setCallback(f); + + h_vt_ = (new VTHandler(license_path_, db_path_)); + + srv_ = nh_.advertiseService("text_to_speech", &VoiceText::text_to_speech, this); + ROS_INFO("Advertised service text_to_speech\n"); + } + + ~VoiceText(){ + delete h_vt_; + } + + void config_callback(Config &config, uint32_t level) { + boost::mutex::scoped_lock lock(mutex_); + config_ = config; + } + + bool text_to_speech(voice_text::TextToSpeech::Request &req, + voice_text::TextToSpeech::Response &res) { + boost::mutex::scoped_lock lock(mutex_); + // load text from file + if (!fs::exists(fs::path(req.text_path))) { + ROS_ERROR_STREAM("text file " << req.text_path << " not found"); + res.ok = false; + return true; + } + std::ifstream ifs(req.text_path.c_str()); + std::string text = "", line = ""; + while (ifs && std::getline(ifs, line)) { + text += line; + } + res.ok = h_vt_->VTH_TextToFile(config_.pitch, config_.speed, config_.volume, config_.pause, + text, req.wave_path); + return true; + } + + ros::NodeHandle nh_, pnh_; + boost::mutex mutex_; + dynamic_reconfigure::Server dyn_srv_; + Config config_; + ros::ServiceServer srv_; + bool initialized_; + std::string db_path_, license_path_; + VTHandler* h_vt_; +}; + +int main(int argc, char** argv) { + ros::init(argc, argv, "voice_text"); + + VoiceText vt; + + ros::spin(); + + return 0; +} diff --git a/3rdparty/voice_text/src/voice_text.cpp.in b/3rdparty/voice_text/src/voice_text.cpp.in deleted file mode 100644 index f52955098..000000000 --- a/3rdparty/voice_text/src/voice_text.cpp.in +++ /dev/null @@ -1,178 +0,0 @@ -/* - * voice_text_server.cpp - * Author: Yuki Furuta - */ - -#include -#include -#include -#include - -#include -#include - -// ROS -#include -#include -#include -#include - -// VoiceText -#ifdef USE_DUMMY_INCLUDE -#include "dummy/vt_dummy.h" -#else -#include "@VT_ROOT@/inc/vt_jpn.h" -#endif - -#define PATH_MAX 1024 - -namespace fs = boost::filesystem; - - -class VoiceText { -public: - typedef voice_text::VoiceTextConfig Config; - - VoiceText() : nh_(), pnh_("~"), db_path_(""), license_path_(""), dyn_srv_(pnh_) { - pnh_.param("db_path", db_path_, "@VT_ROOT@"); - pnh_.setParam("db_path", db_path_); // for backward compatibility (db_path is usually set previously) - pnh_.param("license_path", license_path_, ""); - - dynamic_reconfigure::Server::CallbackType f = - boost::bind(&VoiceText::config_callback, this, _1, _2); - dyn_srv_.setCallback(f); - } - - ~VoiceText() { - if (initialized_) { - VT_UNLOADTTS_JPN(-1); - } - } - - void config_callback(Config &config, uint32_t level) { - boost::mutex::scoped_lock lock(mutex_); - config_ = config; - } - - bool initialize() { - // initialize voice text - int ret = -1; - char* db_path_char = (char*)calloc(std::strlen(db_path_.c_str())+1, sizeof(char)); - std::strcpy(db_path_char, db_path_.c_str()); - char* license_path_char = NULL; - if (!license_path_.empty()) { - license_path_char = (char*)calloc(std::strlen(license_path_.c_str())+1, sizeof(char)); - std::strcpy(license_path_char, license_path_.c_str()); - } - ret = VT_LOADTTS_JPN((int)NULL, -1, db_path_char, license_path_char); - free(db_path_char); - if (!license_path_.empty()) free(license_path_char); - if (ret != VT_LOADTTS_SUCCESS) { - ROS_FATAL("Failed to load TTS engine (code %d)", ret); - if (ret == -1) { - ROS_FATAL("You must install voice_text library before building this library"); - } - return false; - } - VT_GetTTSInfo_JPN(VT_VERIFY_CODE, NULL, &ret, sizeof(int)); - if (ret != 0) { - ROS_FATAL_STREAM("Verification failed (VT_VERIFY_CODE " << ret << ")"); - return false; - } - - // Print voice text info - char szTmp[PATH_MAX]; - char szTmp2[PATH_MAX]; - VT_GetTTSInfo_JPN(VT_BUILD_DATE, NULL, szTmp, PATH_MAX); - sprintf(szTmp2, "VT_GetTTSInfo_JPN(VT_BUILD_DATE = %s\n", szTmp); - ROS_INFO_STREAM(szTmp2); - VT_GetTTSInfo_JPN(VT_VERIFY_CODE, NULL, &ret, sizeof(int)); - sprintf(szTmp2, "VT_GetTTSInfo_JPN(VT_VERIFY_CODE = %d\n", ret); - ROS_INFO_STREAM(szTmp2); - VT_GetTTSInfo_JPN(VT_MAX_CHANNEL, NULL, &ret, sizeof(int)); - sprintf(szTmp2, "VT_GetTTSInfo_JPN(VT_MAX_CHANNEL = %d\n", ret); - ROS_INFO_STREAM(szTmp2); - VT_GetTTSInfo_JPN(VT_DB_DIRECTORY, NULL, szTmp, PATH_MAX); - sprintf(szTmp2, "VT_GetTTSInfo_JPN(VT_DB_DIRECTORY = %s\n", szTmp); - ROS_INFO_STREAM(szTmp2); - VT_GetTTSInfo_JPN(VT_DEF_SPEAKER, NULL, &ret, sizeof(int)); - sprintf(szTmp2, "VT_GetTTSInfo_JPN(VT_DEF_SPEAKER = %d\n", ret); - ROS_INFO_STREAM(szTmp2); - VT_GetTTSInfo_JPN(VT_CODEPAGE, NULL, &ret, sizeof(int)); - sprintf(szTmp2, "VT_GetTTSInfo_JPN(VT_CODEPAGE = %d\n", ret); - ROS_INFO_STREAM(szTmp2); - - // advertise service - srv_ = nh_.advertiseService("text_to_speech", &VoiceText::text_to_speech, this); - - ROS_INFO_STREAM("Advertised service text_to_speech"); - - return true; - } - - bool text_to_speech(voice_text::TextToSpeech::Request &req, - voice_text::TextToSpeech::Response &res) { - boost::mutex::scoped_lock lock(mutex_); - // load text from file - if (!fs::exists(fs::path(req.text_path))) { - ROS_ERROR_STREAM("text file " << req.text_path << " not found"); - res.ok = false; - return true; - } - std::ifstream ifs(req.text_path.c_str()); - std::string text = "", line = ""; - while (ifs && std::getline(ifs, line)) { - text += line; - } - char* text_char = (char*)calloc(std::strlen(text.c_str())+1, sizeof(char)); - std::strcpy(text_char, text.c_str()); - - char* wave_char = (char*)calloc(std::strlen(req.wave_path.c_str())+1, sizeof(char)); - std::strcpy(wave_char, req.wave_path.c_str()); - - int ret = VT_TextToFile_JPN(VT_FILE_API_FMT_S16PCM_WAVE, - text_char, - wave_char, - -1, - config_.pitch, - config_.speed, - config_.volume, - config_.pause, - -1, -1); - - ROS_INFO_STREAM("voice text wave file is outputted to " << wave_char); - - free(text_char); - free(wave_char); - - if (ret != VT_FILE_API_SUCCESS) { - ROS_ERROR("Failed to execute tts: (code: %d)", ret); - res.ok = false; - return true; - } - - res.ok = true; - return true; - } - - ros::NodeHandle nh_, pnh_; - boost::mutex mutex_; - dynamic_reconfigure::Server dyn_srv_; - Config config_; - ros::ServiceServer srv_; - bool initialized_; - std::string db_path_, license_path_; -}; - -int main(int argc, char** argv) { - ros::init(argc, argv, "voice_text"); - - VoiceText vt; - if (!vt.initialize()) { - return 1; - }; - - ros::spin(); - - return 0; -} diff --git a/3rdparty/voice_text/src/vt_handler.cpp b/3rdparty/voice_text/src/vt_handler.cpp new file mode 100644 index 000000000..9508cd439 --- /dev/null +++ b/3rdparty/voice_text/src/vt_handler.cpp @@ -0,0 +1,248 @@ +/* + * vt_handler.cpp + * Author: Yoshiki Obinata + */ + +#include "vt_handler.h" + +VTHandler::VTHandler(const std::string license_path, const std::string db_path){ + glob_t sdk_old_gbuf_, sdk_new_gbuf_, api_gbuf_; + char *lib_file_; + char *dl_err_, *db_path_char_, *license_path_char_; + bool sym_status_; + int ret_; + + // Locate libraries +#ifdef ENV64 + glob(("/usr/vt/*/*/bin/x86_64/RAMIO/libvt_jpn.so"), 0, NULL, &sdk_old_gbuf_); // e.g., /usr/vt/sayaka/M16/bin/x86_64/RAMIO/libvt_jpn.so + glob(("/usr/vt/*/*/bin/LINUX64_GLIBC3/RAMIO/libvt_jpn.so"), 0, NULL, &sdk_new_gbuf_); // e.g., /usr/vt/risa/H16/bin/LINUX64_GLIBC3/RAMIO/libvt_jpn.so +#elif ENV32 + glob(("/usr/vt/*/*/bin/x86_32/RAMIO/libvt_jpn.so"), 0, NULL, &sdk_old_gbuf_); // e.g., /usr/vt/sayaka/M16/bin/x86_32/RAMIO/libvt_jpn.so + glob(("/usr/vt/*/*/bin/LINUX32_GLIBC3/RAMIO/libvt_jpn.so"), 0, NULL, &sdk_new_gbuf_); // e.g., /usr/vt/risa/H16/bin/LINUX32_GLIBC3/RAMIO/libvt_jpn.so +#else + ROS_FATAL("Your architecture is not supported"); + return; +#endif + glob(("/usr/vt/*/*/bin/libvtapi.so"), 0, NULL, &api_gbuf_); // e.g., /usr/vt/hikari/D16/bin/libvtapi.so + + if(sdk_old_gbuf_.gl_pathc > 0){ + this->vt_type = VT_SDK; + strcpy(lib_file_, sdk_old_gbuf_.gl_pathv[0]); + }else if(sdk_new_gbuf_.gl_pathc > 0){ + this->vt_type = VT_SDK; + strcpy(lib_file_, sdk_new_gbuf_.gl_pathv[0]); + }else if(api_gbuf_.gl_pathc > 0){ + this->vt_type = VT_API; + strcpy(lib_file_, api_gbuf_.gl_pathv[0]); + }else{ + this->vt_type = NO_VT; + } + + globfree(&sdk_old_gbuf_); + globfree(&sdk_new_gbuf_); + globfree(&api_gbuf_); + + // Load libraries + if(this->vt_type != NO_VT){ + ROS_INFO("Opening %s ", lib_file_); + this->dl_handle = dlopen(lib_file_, RTLD_NOW); + if(this->dl_handle == NULL){ + dl_err_ = dlerror(); + ROS_FATAL_STREAM("Error occured when opening VoiceText or ReadSpeaker libraries " << + dl_err_); + return; + } + }else{ + ROS_FATAL("No VoiceText or ReadSpeaker libraries have found"); + return; + } + + // Load symbols + sym_status_ = LoadSym(); + if(!sym_status_){ + return; + } + + // Initialize VT Handler + // db_path is for backward compatibility + db_path_char_ = (char*)calloc(std::strlen(db_path.c_str())+1, sizeof(char)); + std::strcpy(db_path_char_, db_path.c_str()); + + // Load license file + license_path_char_ = NULL; + if(!license_path.empty()){ + license_path_char_ = (char*)calloc(std::strlen(license_path.c_str())+1, sizeof(char)); + std::strcpy(license_path_char_, license_path.c_str()); + }else{ + ROS_FATAL("Please set license file"); + return; + } + + // Load license file + if(this->vt_type == VT_SDK){ + ret_ = VT_LOADTTS_JPN((int)NULL, -1, db_path_char_, license_path_char_); + if(ret_ != VT_LOADTTS_SUCCESS){ + ROS_FATAL("[VoiceText SDK] Failed to load TTS engine (code %d)", ret_); + return; + } + VT_GetTTSInfo_JPN(VT_VERIFY_CODE, NULL, &ret_, sizeof(int)); + if (ret_ != 0) { + ROS_FATAL_STREAM("[VoiceText SDK] Verification failed (VT_VERIFY_CODE " << ret_ << ")"); + return; + } + }else if(this->vt_type == VT_API){ + fs::path p_ = lib_file_; + std::vector elements_; + std::string lib_path_, speaker_, type_; + char *lib_path_char_, *speaker_char_, *type_char_; + + // Get ReadSpeaker library directory + lib_path_ = p_.parent_path().string(); + lib_path_char_ = (char*)calloc(std::strlen(lib_path_.c_str())+1, sizeof(char)); + std::strcpy(lib_path_char_, lib_path_.c_str()); + + // Get speaker and type + for(auto& part_ : p_){ + elements_.push_back(part_.string()); + } + speaker_ = elements_.at(3); + speaker_char_ = (char*)calloc(std::strlen(speaker_.c_str())+1, sizeof(char)); + std::strcpy(speaker_char_, speaker_.c_str()); + type_ = elements_.at(4); + type_char_ = (char*)calloc(std::strlen(type_.c_str())+1, sizeof(char)); + std::strcpy(type_char_, type_.c_str()); + + VTAPI_Init(lib_path_char_); + this->hVTAPI = VTAPI_CreateHandle(); + if(this->hVTAPI == 0){ + ROS_FATAL("[ReadSpeaker API] Failed to create API handler. STATUS: %s", VTAPI_GetLastErrorInfo(0)->szMsg); + return; + } + VTAPI_SetLicenseFolder(license_path_char_); + // Load engine + this->hEngine = VTAPI_GetEngine(speaker_char_, type_char_); + ret_ = VTAPI_SetEngineHandle(this->hVTAPI, this->hEngine); + if(ret_ < VTAPI_SUCCESS){ + ROS_FATAL("[ReadSpeaker API] Failed to create engine handler. CODE: %d, MESSAGE: %s ", + ret_, VTAPI_GetLastErrorInfo(this->hVTAPI)->szMsg); + return; + } + + free(lib_path_char_); + free(speaker_char_); + free(type_char_); + } + + free(db_path_char_); + if (!license_path.empty()) free(license_path_char_); + +} + +VTHandler::~VTHandler(){ + if(this->dl_handle != NULL){ + // TODO release handle before close dl + dlclose(this->dl_handle); + } +} + +bool VTHandler::LoadSym(){ + const char* dl_err_; + if(vt_type == VT_SDK){ + ROS_INFO("Found VoiceText SDK"); + // load symbol + for(auto& itr: VTSDK_func_){ + VTSDK_s_map_[itr] = dlsym(this->dl_handle, itr); + dl_err_ = dlerror(); + if(dl_err_ != NULL){ + ROS_FATAL_STREAM("Failed to load VoiceText libraries. STATUS: " + << dl_err_); + dlclose(this->dl_handle); + return false; + break; + } + } + // cast + VT_LOADTTS_JPN = reinterpret_cast(VTSDK_s_map_.at("VT_LOADTTS_JPN")); + VT_UNLOADTTS_JPN = reinterpret_cast(VTSDK_s_map_.at("VT_UNLOADTTS_JPN")); + VT_GetTTSInfo_JPN = reinterpret_cast(VTSDK_s_map_.at("VT_GetTTSInfo_JPN")); + VT_TextToFile_JPN = reinterpret_cast(VTSDK_s_map_.at("VT_TextToFile_JPN")); + }else if(vt_type == VT_API){ + ROS_INFO("Found ReadSpeaker API"); + // load symbol + for(auto& itr: VTAPI_func_){ + VTAPI_s_map_[itr] = dlsym(this->dl_handle, itr); + dl_err_ = dlerror(); + if(dl_err_ != NULL){ + ROS_FATAL_STREAM("Failed to load ReadSpeaker libraries. STATUS: " + << dl_err_); + dlclose(this->dl_handle); + return false; + break; + } + } + // cast + VTAPI_Init = reinterpret_cast(VTAPI_s_map_.at("VTAPI_Init")); + VTAPI_CreateHandle = reinterpret_cast(VTAPI_s_map_.at("VTAPI_CreateHandle")); + VTAPI_SetLicenseFolder = reinterpret_cast(VTAPI_s_map_.at("VTAPI_SetLicenseFolder")); + VTAPI_GetEngine = reinterpret_cast(VTAPI_s_map_.at("VTAPI_GetEngine")); + VTAPI_SetEngineHandle = reinterpret_cast(VTAPI_s_map_.at("VTAPI_SetEngineHandle")); + VTAPI_SetAttr = reinterpret_cast(VTAPI_s_map_.at("VTAPI_SetAttr")); + VTAPI_SetOutputFile = reinterpret_cast(VTAPI_s_map_.at("VTAPI_SetOutputFile")); + VTAPI_TextToFile = reinterpret_cast(VTAPI_s_map_.at("VTAPI_TextToFile")); + VTAPI_GetLastErrorInfo = reinterpret_cast(VTAPI_s_map_.at("VTAPI_GetLastErrorInfo")); + VTAPI_ReleaseHandle = reinterpret_cast(VTAPI_s_map_.at("VTAPI_ReleaseHandle")); + VTAPI_UnloadEngine = reinterpret_cast(VTAPI_s_map_.at("VTAPI_UnloadEngine")); + VTAPI_Exit = reinterpret_cast(VTAPI_s_map_.at("VTAPI_Exit")); + } + return true; +} + +bool VTHandler::VTH_TextToFile(const int pitch, const int speed, const int volume, const int pause, + const std::string text, const std::string wave_path){ + char *text_char_, *wave_path_char_; + int ret_; + bool success_ = true; + + text_char_ = (char*)calloc(std::strlen(text.c_str())+1, sizeof(char)); + std::strcpy(text_char_, text.c_str()); + wave_path_char_ = (char*)calloc(std::strlen(wave_path.c_str())+1, sizeof(char)); + std::strcpy(wave_path_char_, wave_path.c_str()); + + if(this->vt_type == VT_SDK){ + ret_ = VT_TextToFile_JPN(VT_FILE_API_FMT_S16PCM_WAVE, + text_char_, + wave_path_char_, + -1, + pitch, + speed, + volume, + pause, + -1, -1); + if(ret_ != VT_FILE_API_SUCCESS){ + ROS_ERROR("[VoiceText SDK] Failed to execute TTS (code %d)", ret_); + success_ = false; + } + }else if(this->vt_type == VT_API){ + VTAPI_SetAttr(this->hVTAPI, ATTR_PITCH, pitch); + VTAPI_SetAttr(this->hVTAPI, ATTR_SPEED, speed); + VTAPI_SetAttr(this->hVTAPI, ATTR_VOLUME, volume); + VTAPI_SetAttr(this->hVTAPI, ATTR_PAUSE, pause); + ret_ = VTAPI_SetOutputFile(this->hVTAPI, wave_path_char_, FORMAT_16PCM_WAV); + if(ret_ != VTAPI_SUCCESS){ + ROS_ERROR("[ReadSpeaker API] Failed to execute VTAPI_SetOutputFile. STATUS: %s", + VTAPI_GetLastErrorInfo(this->hVTAPI)->szMsg); + success_ = false; + } + ret_ = VTAPI_TextToFile(this->hVTAPI, text_char_, -1, TEXT_FORMAT_DEFAULT); + if(ret_ != VTAPI_SUCCESS){ + ROS_ERROR("[ReadSpeaker API] Failed to execute VTAPI_TextToFile. STATUS: %s", + VTAPI_GetLastErrorInfo(this->hVTAPI)->szMsg); + success_ = false; + } + + free(text_char_); + free(wave_path_char_); + + return success_; + } +}