diff --git a/3rdparty/voice_text/.gitignore b/3rdparty/voice_text/.gitignore
deleted file mode 100644
index 46bf7136a..000000000
--- a/3rdparty/voice_text/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-src/voice_text.cpp
diff --git a/3rdparty/voice_text/CMakeLists.txt b/3rdparty/voice_text/CMakeLists.txt
index ef07dcceb..74441c5a4 100644
--- a/3rdparty/voice_text/CMakeLists.txt
+++ b/3rdparty/voice_text/CMakeLists.txt
@@ -1,11 +1,16 @@
-cmake_minimum_required(VERSION 2.8.3)
+cmake_minimum_required(VERSION 3.5.1)
 project(voice_text)
+
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-write-strings")
+
 find_package(catkin REQUIRED COMPONENTS
   dynamic_reconfigure
   roscpp
   message_generation)
 
-find_package(Boost REQUIRED COMPONENTS filesystem)
+find_package(Boost REQUIRED COMPONENTS
+  filesystem)
 
 generate_dynamic_reconfigure_options(
   cfg/VoiceText.cfg
@@ -20,57 +25,19 @@ generate_messages()
 
 catkin_package(CATKIN_DEPENDS message_runtime)
 
-file(GLOB VT_ROOT /usr/vt/*/*)
-if(NOT VT_ROOT)
-  message(WARNING "VoiceText directory should be /usr/vt/*/* (e.g., /usr/vt/sayaka/M16) but is not found")
-  set(VT_ROOT /usr/vt/sayaka/M16)  # default value for following configure_file
-else()
-  if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "4")
-    set(VT_LIB_PATH_OLD ${VT_ROOT}/bin/x86_32/RAMIO/libvt_jpn.so)  # e.g., /usr/vt/sayaka/M16/bin/x86_32/RAMIO/libvt_jpn.so
-    set(VT_LIB_PATH_NEW ${VT_ROOT}/bin/LINUX32_GLIBC3/RAMIO/libvt_jpn.so)  # e.g., /usr/vt/risa/H16/bin/LINUX32_GLIBC3/RAMIO/libvt_jpn.so
-  elseif("${CMAKE_SIZEOF_VOID_P}" STREQUAL "8")
-    set(VT_LIB_PATH_OLD ${VT_ROOT}/bin/x86_64/RAMIO/libvt_jpn.so)  # e.g., /usr/vt/sayaka/M16/bin/x86_64/RAMIO/libvt_jpn.so
-    set(VT_LIB_PATH_NEW ${VT_ROOT}/bin/LINUX64_GLIBC3/RAMIO/libvt_jpn.so)  # e.g., /usr/vt/risa/H16/bin/LINUX64_GLIBC3/RAMIO/libvt_jpn.so
-  endif()
-  if(EXISTS ${VT_LIB_PATH_OLD})
-    set(VT_LIB_PATH ${VT_LIB_PATH_OLD})
-  else()
-    if(EXISTS ${VT_LIB_PATH_NEW})
-      set(VT_LIB_PATH ${VT_LIB_PATH_NEW})
-    endif()
-  endif()
-  if(VT_LIB_PATH)
-    message(WARNING "VoiceText library is found at ${VT_LIB_PATH}")
-  else()
-    message(WARNING "VoiceText library is not found at ${VT_LIB_PATH_OLD} or ${VT_LIB_PATH_NEW}")
-  endif()
-endif()
-configure_file(src/voice_text.cpp.in ${PROJECT_SOURCE_DIR}/src/voice_text.cpp)
-
-  include_directories(
-    ${Boost_INCLUDE_DIRS}
-    ${catkin_INCLUDE_DIRS}
-  )
-  add_executable(voice_text src/voice_text.cpp)
-  add_dependencies(voice_text ${PROJECT_NAME}_generate_messages_cpp ${PROJECT_NAME}_gencfg)
-  set_target_properties(voice_text PROPERTIES COMPILE_FLAGS -D_REENTRANT)
+include_directories(
+  include
+  ${Boost_INCLUDE_DIRS}
+  ${catkin_INCLUDE_DIRS}
+)
 
-if(NOT VT_LIB_PATH)
-  message(WARNING "Building dummy library")
-  add_library(vt_dummy src/dummy/vt_dummy.cpp)
-  set_target_properties(vt_dummy PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR})
-  set_target_properties(vt_dummy PROPERTIES LIBRARY_OUTPUT_NAME vt_jpn)
-  set_target_properties(voice_text PROPERTIES COMPILE_FLAGS -DUSE_DUMMY_INCLUDE)
-  set(VT_LIB_PATH ${PROJECT_BINARY_DIR}/libvt_jpn.so)
-endif()
+add_executable(voice_text src/voice_text.cpp src/vt_handler.cpp)
+add_dependencies(voice_text ${PROJECT_NAME}_generate_messages_cpp ${PROJECT_NAME}_gencfg)
+set_target_properties(voice_text PROPERTIES COMPILE_FLAGS -D_REENTRANT)
 
-  target_link_libraries(voice_text
-    ${catkin_LIBRARIES}
-    ${VT_LIB_PATH} -lm -lpthread
-  )
-if(NOT EXISTS ${VT_LIB_PATH})
-  add_dependencies(voice_text vt_dummy)
-endif()
+target_link_libraries(voice_text
+    ${catkin_LIBRARIES} -lm -lpthread -ldl
+)
 
 install(TARGETS voice_text # do not install vt_dummy target, that should be installed from voice_text library
   ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
diff --git a/3rdparty/voice_text/README.md b/3rdparty/voice_text/README.md
index ed1159ecf..bea0daf47 100644
--- a/3rdparty/voice_text/README.md
+++ b/3rdparty/voice_text/README.md
@@ -5,9 +5,20 @@ ROS Interface for HOYA VoiceText Speech Synthesis Engine
 
 ## Installation
 
-1. Install VoiceText SDK
-2. Put license file
-3. Build this package
+### 1. Install VoiceText SDK
+#### If you have voicetext sdk install binary, please follow the official guide and install both engine and SDK
+#### If you don't have the sdk install binary but have ReadSpeaker API binary, please follow the guide below.
+1. Install VoiceText Engine by official guide
+2. Copy VoiceText API binaries to VoiceText binary directory
+  VoiceText API package includes binary libraries and header file. You have to copy those of them to specific directory by executing following commands.
+  ```bash
+  cd /path_to_api_package_directory # e.g. cd ~/Downloads/RS_VTAPI_SDK_Linux_4.3.0.2/20201113_VTAPI4.3.0.2_LINUX
+  cd bin/x64 # You have to cd x86 if your system is x86 architecture
+  # Assuming VoiceText engine's talker is hikari, type is D16. If it is different, please set appropriate directory.
+  sudo cp -a * /usr/vt/hikari/D16/bin # Don't forget to add -a not to break symbolic link. 
+  ```
+### 2. Put license file
+### 3. Build this package
 
 ```bash
 cd /path/to/catkin_workspace
diff --git a/3rdparty/voice_text/include/vt_handler.h b/3rdparty/voice_text/include/vt_handler.h
new file mode 100644
index 000000000..f52b20dd3
--- /dev/null
+++ b/3rdparty/voice_text/include/vt_handler.h
@@ -0,0 +1,105 @@
+/*
+ * vt_handler.h
+ * Author: Yoshiki Obinata <obinata@jsk.imi.i.u-tokyo.ac.jp>
+ */
+
+#ifndef VT_HANDLER_H_
+#define VT_HANDLER_H_
+
+#include <cstdlib>
+#include <dlfcn.h>
+#include <glob.h>
+#include <string>
+#include <boost/filesystem.hpp>
+
+// logging
+#include <ros/ros.h>
+
+#include "vt_jpn.h"
+#include "vtapi.h"
+
+#if __x86_64__ || __ppc64__
+#define ENV64
+#else
+#define ENV32
+#endif
+
+typedef enum VT_TYPE{
+NO_VT,
+VT_SDK,
+VT_API
+} VT_Types;
+
+namespace fs = boost::filesystem;
+
+class VTHandler{
+    public:
+        VTHandler(const std::string license_path, const std::string db_path);
+        ~VTHandler();
+        bool VTH_TextToFile(const int pitch, const int speed, const int volume, const int pause,
+                            const std::string text, const std::string wave_path);
+
+    private:
+        void* dl_handle;
+        VT_Types vt_type;
+
+        // define handle
+        bool LoadSym();
+
+        // Load symbols
+        // Related to VoiceText SDK
+        std::vector<char*> VTSDK_func_ = {
+        "VT_LOADTTS_JPN",
+        "VT_UNLOADTTS_JPN",
+        "VT_GetTTSInfo_JPN",
+        "VT_TextToFile_JPN"
+        };
+
+        // Related to ReadSpeaker API
+        std::vector<char*> VTAPI_func_ = {
+        "VTAPI_Init",
+        "VTAPI_CreateHandle",
+        "VTAPI_SetLicenseFolder",
+        "VTAPI_GetEngine",
+        "VTAPI_SetEngineHandle",
+        "VTAPI_SetAttr",
+        "VTAPI_SetOutputFile",
+        "VTAPI_TextToFile",
+        "VTAPI_GetLastErrorInfo",
+        "VTAPI_ReleaseHandle",
+        "VTAPI_UnloadEngine",
+        "VTAPI_Exit"
+        };
+
+        // symbol map
+        std::map<char*, void*> VTSDK_s_map_;
+        std::map<char*, void*> VTAPI_s_map_;
+
+        // Load Functions
+        // Related to VoiceText SDK
+        short (*VT_LOADTTS_JPN)(HWND, int, char*, char*);
+        void (*VT_UNLOADTTS_JPN)(int);
+        int (*VT_GetTTSInfo_JPN)(int, char*, void*, int);
+        short (*VT_TextToFile_JPN)(int, char*, char*, int, int, int, int, int, int, int);
+
+        // Related to ReadSpeaker API
+        int (*VTAPI_Init)(char*);
+        VTAPI_HANDLE (*VTAPI_CreateHandle)();
+        void (*VTAPI_SetLicenseFolder)(char*);
+        VTAPI_ENGINE_HANDLE (*VTAPI_GetEngine)(char*, char*);
+        int (*VTAPI_SetEngineHandle)(VTAPI_HANDLE, VTAPI_ENGINE_HANDLE);
+        int (*VTAPI_SetAttr)(VTAPI_HANDLE, int, int);
+        int (*VTAPI_SetOutputFile)(VTAPI_HANDLE, char*, int);
+        int (*VTAPI_TextToFile)(VTAPI_HANDLE, void*, int, int);
+        VTAPI_ERRS_INFO* (*VTAPI_GetLastErrorInfo)(VTAPI_HANDLE);
+        void (*VTAPI_ReleaseHandle)(VTAPI_HANDLE);
+        int (*VTAPI_UnloadEngine)(VTAPI_ENGINE_HANDLE);
+        void (*VTAPI_Exit)();
+
+        // ReadSpeaker API handler
+        VTAPI_HANDLE hVTAPI;
+        VTAPI_ENGINE_HANDLE hEngine;
+};
+
+
+#endif // VT_HANDLER_H_
diff --git a/3rdparty/voice_text/include/vt_jpn.h b/3rdparty/voice_text/include/vt_jpn.h
new file mode 100644
index 000000000..6a8000d23
--- /dev/null
+++ b/3rdparty/voice_text/include/vt_jpn.h
@@ -0,0 +1,294 @@
+/*
+* Copyright (c) 2004 Voiceware Co., Ltd., All rights reserved.
+*
+* VoiceText
+*/
+
+#ifndef VT_JPN_H
+#define VT_JPN_H
+
+#if defined(__cplusplus)
+	extern "C" {
+#endif
+
+#if !defined(VT_BASIC_DEFINE)
+	#if defined(WIN32)
+		#if !defined(_DllMode)
+			#define _DllMode(_type_)		__declspec( dllimport ) _type_
+		#endif
+	#else
+		#if !defined(_DllMode)
+			#define		_DllMode(_type_)		extern _type_
+		#endif
+		typedef		int						HWND;
+	#endif
+#endif
+
+
+
+
+/*===========================================================================*/
+/* Text format (used in texttype) */
+#if !defined(VT_BASIC_DEFINE)
+	#if !defined(VT_TEXT_FMT_PLAIN_TEXT)
+		#define	VT_TEXT_FMT_PLAIN_TEXT						0
+	#endif
+
+	#if !defined(VT_TEXT_FMT_JEITA)
+		#define	VT_TEXT_FMT_JEITA							4
+	#endif
+
+	#if !defined(VT_TEXT_FMT_JEITA_PLUS)
+		#define	VT_TEXT_FMT_JEITA_PLUS						6
+	#endif
+#endif
+
+
+
+/*===========================================================================*/
+/* LOAD & UNLOAD */
+#if !defined(VT_BASIC_DEFINE)
+	/* Return Value */
+	#define		VT_LOADTTS_SUCCESS							0
+	#define		VT_LOADTTS_ERROR_CONFLICT_DBPATH			1
+	#define		VT_LOADTTS_ERROR_TTS_STRUCTURE				2
+	#define		VT_LOADTTS_ERROR_TAGGER						3
+	#define		VT_LOADTTS_ERROR_BREAK_INDEX				4
+	#define		VT_LOADTTS_ERROR_TPP_DICT					5
+	#define		VT_LOADTTS_ERROR_TABLE						6
+	#define		VT_LOADTTS_ERROR_UNIT_INDEX					7
+	#define		VT_LOADTTS_ERROR_PROSODY_DB					8
+	#define		VT_LOADTTS_ERROR_PCM_DB						9
+	#define		VT_LOADTTS_ERROR_PM_DB						10
+	#define		VT_LOADTTS_ERROR_UNKNOWN					11
+#endif
+
+_DllMode(short)	VT_LOADTTS_JPN(HWND hWnd, int nSpeakerID, char *db_path, char *licensefile);
+_DllMode(void)	VT_UNLOADTTS_JPN(int nSpeakerID);
+
+
+
+/*===========================================================================*/
+/* Load/Unload UserDict API */
+#if !defined(VT_BASIC_DEFINE)
+	/* Return Value */
+	#define		VT_LOAD_USERDICT_SUCCESS					(1)
+	#define		VT_LOAD_USERDICT_ERROR_INVALID_INDEX		(-1)
+	#define		VT_LOAD_USERDICT_ERROR_INDEX_BUSY			(-2)
+	#define		VT_LOAD_USERDICT_ERROR_LOAD_FAIL			(-3)
+	#define		VT_LOAD_USERDICT_ERROR_UNKNOWN				(-4)
+
+	#define		VT_UNLOAD_USERDICT_SUCCESS					(1)
+	#define		VT_UNLOAD_USERDICT_ERROR_NULL_INDEX			(-1)
+	#define		VT_UNLOAD_USERDICT_ERROR_INVALID_INDEX		(-2)
+	#define		VT_UNLOAD_USERDICT_ERROR_UNKNOWN			(-3)
+#endif
+
+_DllMode(short) VT_LOAD_UserDict_JPN(int dictidx, char *filename);
+_DllMode(short) VT_UNLOAD_UserDict_JPN(int dictidx);
+
+
+
+/*===========================================================================*/
+/* SOUND CARD API */
+#if !defined(VT_BASIC_DEFINE)
+	/* Return Value */
+	#define		VT_PLAY_API_SUCCESS						(1)
+	#define		VT_PLAY_API_ERROR_CREATE_THREAD			(-1)
+	#define		VT_PLAY_API_ERROR_NULL_TEXT				(-2)
+	#define		VT_PLAY_API_ERROR_EMPTY_TEXT			(-3)
+	#define		VT_PLAY_API_ERROR_DB_NOT_LOADED			(-4)
+	#define		VT_PLAY_API_ERROR_INITPLAY				(-5)
+	#define		VT_PLAY_API_ERROR_UNKNOWN				(-6)
+#endif
+
+#if defined(WIN32)
+	_DllMode(short) VT_PLAYTTS_JPN(HWND hcaller, UINT umsg, char *text_buff, int nSpeakerID, int pitch, int speed, int volume, int pause, int dictidx, int texttype);
+	_DllMode(void)	VT_STOPTTS_JPN(void);
+	_DllMode(void)	VT_RESTARTTTS_JPN(void);
+	_DllMode(void)	VT_PAUSETTS_JPN(void);
+#endif
+
+
+
+/*===========================================================================*/
+/* FILE WRITE API */
+#if !defined(VT_BASIC_DEFINE)
+	/* Return Value */
+	#define		VT_FILE_API_SUCCESS						(1)
+	#define		VT_FILE_API_ERROR_INVALID_FORMAT		(-1)
+	#define		VT_FILE_API_ERROR_CREATE_THREAD			(-2)
+	#define		VT_FILE_API_ERROR_NULL_TEXT				(-3)
+	#define		VT_FILE_API_ERROR_EMPTY_TEXT			(-4)
+	#define		VT_FILE_API_ERROR_DB_NOT_LOADED			(-5)
+	#define		VT_FILE_API_ERROR_OUT_FILE_OPEN			(-6)
+	#define		VT_FILE_API_ERROR_UNKNOWN				(-7)
+
+	/* Audio Format */
+	enum {
+		VT_FILE_API_FMT_S16PCM		= 0,
+		VT_FILE_API_FMT_ALAW		= 1,
+		VT_FILE_API_FMT_MULAW		= 2,
+		VT_FILE_API_FMT_DADPCM		= 3,
+		VT_FILE_API_FMT_S16PCM_WAVE	= 4,
+		VT_FILE_API_FMT_U08PCM_WAVE	= 5,
+	//	VT_FILE_API_FMT_IMA_WAVE	= 6, /* not supported! */
+		VT_FILE_API_FMT_ALAW_WAVE	= 7,
+		VT_FILE_API_FMT_MULAW_WAVE	= 8,
+		VT_FILE_API_FMT_MULAW_AU	= 9,
+	};
+#endif
+
+_DllMode(short) VT_TextToFile_JPN(int fmt, char *tts_text, char *filename, int nSpeakerID, int pitch, int speed, int volume, int pause, int dictidx, int texttype);
+
+
+
+/*===========================================================================*/
+/* BUFFER I/O API */
+#if !defined(VT_BASIC_DEFINE)
+	/* Return Value */
+	#define		VT_BUFFER_API_PROCESSING					(0)
+	#define		VT_BUFFER_API_DONE							(1)
+	#define		VT_BUFFER_API_ERROR_INVALID_FORMAT			(-1)
+	#define		VT_BUFFER_API_ERROR_CREATE_THREAD			(-2)
+	#define		VT_BUFFER_API_ERROR_NULL_TEXT				(-3)
+	#define		VT_BUFFER_API_ERROR_EMPTY_TEXT				(-4)
+	#define		VT_BUFFER_API_ERROR_NULL_BUFFER				(-5)
+	#define		VT_BUFFER_API_ERROR_DB_NOT_LOADED			(-6)
+	#define		VT_BUFFER_API_ERROR_THREAD_BUSY				(-7)
+	#define		VT_BUFFER_API_ERROR_ABNORMAL_CONDITION		(-8)
+	#define		VT_BUFFER_API_ERROR_UNKNOWN					(-9)
+
+	/* Audio Format */
+	enum {
+		VT_BUFFER_API_FMT_S16PCM = VT_FILE_API_FMT_S16PCM,
+		VT_BUFFER_API_FMT_ALAW   = VT_FILE_API_FMT_ALAW,
+		VT_BUFFER_API_FMT_MULAW  = VT_FILE_API_FMT_MULAW,
+		VT_BUFFER_API_FMT_DADPCM = VT_FILE_API_FMT_DADPCM,
+	};
+#endif
+
+_DllMode(int) VT_TextToBuffer_JPN(int fmt, char *tts_text, char *output_buff, int *output_len, int flag, int nThreadID, int nSpeakerID, int pitch, int speed, int volume, int pause, int dictidx, int texttype);
+
+
+
+/*===========================================================================*/
+/* CONFIGURE API */
+_DllMode(void) VT_SetPitchSpeedVolumePause_JPN(int pitch, int speed, int volume, int pause, int nSpeakerID);
+_DllMode(void) VT_SetCommaPause_JPN(int pause, int nSpeakerID);
+
+
+
+/*===========================================================================
+SYNOPSIS
+	int VT_GetTTSInfo_JPN(int request, char *licensefile, void *value, int valuesize);
+
+PARAMETERS
+	request
+		VT_BUILD_DATE			 (char*): library build date
+		VT_VERIFY_CODE         	 (int *): verification result(licensefile is required)
+		VT_MAX_CHANNEL         	 (int *): max no. of possible channels(licensefile is required)
+		VT_DB_DIRECTORY        	 (char*): default root DB fold name
+		VT_LOAD_SUCCESS_CODE   	 (int *): return value, when db loading is success
+		VT_MAX_SPEAKER         	 (int *): max no. of speaker ( >= 0 )
+		VT_DEF_SPEAKER         	 (int *): default speaker id ( >= 0 && < max no. of speaker )
+		VT_CODEPAGE            	 (int *): supported ansi codepage (WIN32 only)
+		VT_DB_ACCESS_MODE      	 (int *): file or ram i/o ? (file:0, ram:1)
+		VT_FIXED_POINT_SUPPORT 	 (int *): fixed point simulated or not? (float:0, fixed:1)
+		VT_SAMPLING_FREQUENCY  	 (int *): current sampling frequency (8000, 11025, 16000 )
+		VT_MAX_PITCH_RATE      	 (int *): max value of pitch rate (%)
+		VT_DEF_PITCH_RATE      	 (int *): default value of pitch rate (%)
+		VT_MIN_PITCH_RATE      	 (int *): min value of pitch rate (%)
+		VT_MAX_SPEED_RATE      	 (int *): max value of speed rate (%)
+		VT_DEF_SPEED_RATE      	 (int *): default value of speed rate (%)
+		VT_MIN_SPEED_RATE      	 (int *): min value of speed rate (%)
+		VT_MAX_VOLUME          	 (int *): max value of volume (%)
+		VT_DEF_VOLUME          	 (int *): default value of volume (%)
+		VT_MIN_VOLUME          	 (int *): min value of volume (%)
+		VT_MAX_SENT_PAUSE		 (int *): max value of sentence pause (msec)
+		VT_DEF_SENT_PAUSE		 (int *): default value of sentence pause (msec)
+		VT_MIN_SENT_PAUSE		 (int *): min value of sentence pause (msec)
+		VT_DB_BUILD_DATE		 (char*): embedded db build date (for embedded engine only)
+		VT_MAX_COMMA_PAUSE		 (int *): max value of comma pause (msec)
+		VT_DEF_COMMA_PAUSE		 (int *): default value of comma pause (msec)
+		VT_MIN_COMMA_PAUSE		 (int *): min value of comma pause (msec)
+
+	licensefile
+		if NULL, use default licensefile.
+
+	value
+		VT_DB_DIRECTORY and VT_BUILD_DATE requests are (char *), and any other request is (int *)
+
+	valuesize
+		maximum length of value in characters
+
+RETURN VALUE
+	On success, zero(VT_INFO_SUCCESS) is returned.
+	On error, the return value depends on the operation:
+		VT_INFO_ERROR_NOT_SUPPORTED_REQUEST	(1)
+		VT_INFO_ERROR_INVALID_REQUEST		(2)
+		VT_INFO_ERROR_NULL_VALUE			(3)
+		VT_INFO_ERROR_SHORT_LENGTH_VALUE	(4)
+		VT_INFO_ERROR_UNKNOWN				(5)
+===========================================================================*/
+
+#if !defined(VT_BASIC_DEFINE)
+	/* Return Value */
+	#define	VT_INFO_SUCCESS						(0)
+	#define	VT_INFO_ERROR_NOT_SUPPORTED_REQUEST	(1)
+	#define	VT_INFO_ERROR_INVALID_REQUEST		(2)
+	#define VT_INFO_ERROR_NULL_VALUE			(3)
+	#define	VT_INFO_ERROR_SHORT_LENGTH_VALUE	(4)
+	#define	VT_INFO_ERROR_UNKNOWN				(5)
+
+	/* Request */
+	enum
+	{
+		VT_BUILD_DATE		   =  0,
+		VT_VERIFY_CODE         =  1,
+		VT_MAX_CHANNEL         =  2,
+		VT_DB_DIRECTORY        =  3,
+		VT_LOAD_SUCCESS_CODE   =  4,
+		VT_MAX_SPEAKER         =  5,
+		VT_DEF_SPEAKER         =  6,
+		VT_CODEPAGE            =  7,
+		VT_DB_ACCESS_MODE      =  8,
+		VT_FIXED_POINT_SUPPORT =  9,
+		VT_SAMPLING_FREQUENCY  = 10,
+		VT_MAX_PITCH_RATE      = 11,
+		VT_DEF_PITCH_RATE      = 12,
+		VT_MIN_PITCH_RATE      = 13,
+		VT_MAX_SPEED_RATE      = 14,
+		VT_DEF_SPEED_RATE      = 15,
+		VT_MIN_SPEED_RATE      = 16,
+		VT_MAX_VOLUME          = 17,
+		VT_DEF_VOLUME          = 18,
+		VT_MIN_VOLUME          = 19,
+		VT_MAX_SENT_PAUSE	   = 20,
+		VT_DEF_SENT_PAUSE	   = 21,
+		VT_MIN_SENT_PAUSE      = 22,
+		VT_DB_BUILD_DATE       = 23,
+		VT_MAX_COMMA_PAUSE	   = 24,
+		VT_DEF_COMMA_PAUSE	   = 25,
+		VT_MIN_COMMA_PAUSE	   = 26,
+		VT_MAX_SYMBOL_OPEN_PAUSE	   = 27,
+		VT_DEF_SYMBOL_OPEN_PAUSE	   = 28,
+		VT_MIN_SYMBOL_OPEN_PAUSE	   = 29,
+		VT_MAX_SYMBOL_CLOSE_PAUSE	   = 30,
+		VT_DEF_SYMBOL_CLOSE_PAUSE	   = 31,
+		VT_MIN_SYMBOL_CLOSE_PAUSE	   = 32,
+	};
+#endif
+
+_DllMode(int) VT_GetTTSInfo_JPN(int request, char *licensefile, void *value, int valuesize);
+
+
+#if !defined(VT_BASIC_DEFINE)
+	#define VT_BASIC_DEFINE
+#endif
+
+#if defined(__cplusplus)
+	}
+#endif
+
+#endif /* VT_JPN_H */
diff --git a/3rdparty/voice_text/include/vtapi.h b/3rdparty/voice_text/include/vtapi.h
new file mode 100644
index 000000000..9996df717
--- /dev/null
+++ b/3rdparty/voice_text/include/vtapi.h
@@ -0,0 +1,262 @@
+/*
+ * Copyright (c) 2000-2019 ReadSpeaker
+ * All Rights Reserved.
+ */
+
+#ifndef _VTAPI_H_
+#define _VTAPI_H_
+
+#if defined(__cplusplus)
+extern "C"
+{
+#endif
+
+
+#if !defined(VTAPI_decl)
+#	if defined(WIN32) || defined(WINCE)
+#		define VTAPI_decl       __declspec( dllexport )
+#		define _CRTDBG_MAP_ALLOC
+#		include <stdlib.h>
+#		if defined(WINCE)
+#		else
+#		include <crtdbg.h>
+#		endif
+	#else
+		#define VTAPI_decl       extern
+	#endif
+#endif
+
+
+#if defined(WIN32) || defined(WINCE)
+#pragma warning(disable:4996)
+#include <windows.h>
+#else
+#if !defined(HWND_DEFINE)
+#define HWND_DEFINE
+typedef int HWND;
+#endif
+#if !defined(DWORD_DEFINE)
+#define DWORD_DEFINE
+typedef unsigned long       DWORD;
+#endif
+#if !defined(UINT_DEFINE)
+#define UINT_DEFINE
+typedef unsigned int        UINT;
+#endif
+#endif
+
+typedef struct VOICE_INFO* VTAPI_HANDLE;
+typedef struct ENGINE_INFO* VTAPI_ENGINE_HANDLE;
+#if 1 //defined(USE_NEW_USERDICT_MANAGER)
+typedef struct USERDICT_INFO* VTAPI_USERDICT_HANDLE;
+#endif
+
+#define VTAPI_VERSION	"4.3.0.2"
+
+typedef enum OUTPUTFORMAT
+{
+	FORMAT_16PCM = 0,
+	FORMAT_8PCM = 1,
+	FORMAT_ALAW_PCM = 2,
+	FORMAT_MULAW_PCM = 3,
+	FORMAT_ADPCM_PCM = 4,
+	FORMAT_16PCM_WAV = 5,
+	FORMAT_8PCM_WAV = 6,
+	FORMAT_ALAW_WAV = 7,
+	FORMAT_MULAW_WAV = 8,
+
+	FORMAT_MAX
+} Output_Format;
+
+typedef enum TEXTTYPES
+{
+	TEXT_FORMAT_DEFAULT = 0, // multibyte
+	TEXT_FORMAT_UTF8 = 2
+} Text_Types;
+
+typedef enum AUDIOEVENTTYPES
+{
+	AUDIO_EVENT_NO_EVENTS = 0,
+	AUDIO_EVENT_START_INPUT_STREAM = 1,
+    AUDIO_EVENT_END_INPUT_STREAM = 2,
+    AUDIO_EVENT_VOICE_CHANGE = 4,
+    AUDIO_EVENT_TTS_MARK = 8,
+    AUDIO_EVENT_WORD_BOUNDARY = 16,
+	AUDIO_EVENT_SENTENCE_BOUNDARY = 32,
+	AUDIO_EVENT_BUFFER = 64,
+	AUDIO_EVENT_BUFFER_END = 128,
+	AUDIO_EVENT_FAILURE = 256,
+	AUDIO_EVENT_READY_PLAY = 512,
+	AUDIO_EVENT_ALL = 1023
+} Audio_Event_Types;
+
+typedef enum ATTRFLAGS
+{
+    ATTR_PITCH = 0,
+    ATTR_SPEED = 1,
+    ATTR_VOLUME = 2,
+	ATTR_PAUSE = 3,
+	ATTR_COMMAPAUSE = 5
+} Attr_Flags;
+
+typedef enum PRIORITYTYPES
+{
+	PRIOR_DEFAULT = 0,
+	PRIOR_OVERLAP = 1,
+	PRIOR_ALERT = 2,
+} Priority_Types;
+
+#define MAX_STR_LEN				256
+#define MAX_ERR_MSG				512
+
+typedef struct _audio_format
+{
+	int					nChannel;
+	int					nBits;
+	int					nSampling;
+} VTAPI_AUDIO_FORMAT;
+
+typedef struct _audio_info
+{
+	VTAPI_AUDIO_FORMAT	audiofmt;
+	int					nDevNo;
+} VTAPI_AUDIO_INFO;
+
+typedef struct
+{
+	int					nId;
+	int					nTextStartOffset;			// Text position of Word or Sentence or Mark
+	int					nTextEndOffset;				// Text position of Word or Sentence or Mark
+	char				szStrValue[MAX_STR_LEN];	// Name of Mark
+
+	int					nOffsetInStream;			// Buffer position of Mark
+	int					nOffsetInBuffer;			// Buffer position of Mark
+
+	int					nError;						// Error // VTAPI4 Version over 4.2.2.x
+
+	int					nBufferSize;				// Buffer size of Word or FrameBuffer
+	char*				pFrameBuffer;				// FrameBuffer
+} VTAPI_AUDIO_EVENT;
+
+
+typedef struct _vtapi_errs
+{
+	int nErr;
+	char szMsg[MAX_ERR_MSG];
+} VTAPI_ERRS_INFO;
+
+
+#define		VTAPI_SUCCESS						 0
+
+#define		VTAPI_INVALID_PARAM_ERROR			-1
+#define		VTAPI_INVALID_VALUE_ERROR			-2
+#define		VTAPI_INCORRECT_SET_ERROR			-3
+#define		VTAPI_INVALID_ID_OR_THREAD_ERROR	-4
+#define	    VTAPI_USERDICT_ERROR				-5
+#define		VTAPI_SSML_SYNTAX_ERROR				-6
+#define		VTAPI_MEMORY_ALLOC_ERROR			-7
+#define		VTAPI_NOT_OPENBUFFER_ERROR			-8
+#define		VTAPI_VTSSML_INVALID_ERROR			-9
+#define		VTAPI_INVALID_ENGINE_ERROR			-10
+#define		VTAPI_OVER_CHANNEL_ERROR			-11
+#define		VTAPI_NODATA_ERROR					-12
+#define		VTAPI_NOT_SUPPORTED_ERROR			-13
+
+#define		VTAPI_DLL_VTPLAY_NOT_LINKING		-21
+#define		VTAPI_DLL_VTSSML_NOT_LINKING		-22
+#define		VTAPI_DLL_VTSAVE_NOT_LINKING		-23
+#define		VTAPI_DLL_VTCONV_NOT_LINKING		-24
+#define		VTAPI_DLL_VTEFFECT_NOT_LINKING		-25
+
+
+
+// API for VTAPI
+
+// VTAPI_INIT Return Value : You can check by XOR the lower values.
+//                           If all Library links succeed, 0 or 0x11000
+#define VTAPI_LIB_VTPLAY_NOT_LINKING 0x1
+#define VTAPI_LIB_VTSAVE_NOT_LINKING 0x10
+#define VTAPI_LIB_VTCONV_NOT_LINKING 0x100
+#define VTAPI_LIB_VTSSML_NOT_LINKING 0x1000
+
+VTAPI_decl int VTAPI_Init(char *szWorkingDir); // It is automatically Initiated when using other functions.
+VTAPI_decl void VTAPI_Exit();
+VTAPI_decl void VTAPI_SetLicenseFolder(char *szPath);
+
+// API for VTAPI-Voice
+VTAPI_decl VTAPI_HANDLE VTAPI_CreateHandle();
+VTAPI_decl void VTAPI_ReleaseHandle(VTAPI_HANDLE hVTAPI);
+
+
+VTAPI_decl int VTAPI_SetEngineHandle(VTAPI_HANDLE hVTAPI, VTAPI_ENGINE_HANDLE hEngine);
+VTAPI_decl VTAPI_ENGINE_HANDLE VTAPI_GetEngineHandle(VTAPI_HANDLE hVTAPI);
+
+VTAPI_decl int VTAPI_SetOutputAudio(VTAPI_HANDLE hVTAPI, VTAPI_AUDIO_INFO stAudioInfo);
+VTAPI_decl int VTAPI_SetOutputFile(VTAPI_HANDLE hVTAPI, char *pszFileName, int nOutputFormat);
+VTAPI_decl int VTAPI_SetOutputBuffer(VTAPI_HANDLE hVTAPI, int nOutputFormat);
+
+VTAPI_decl int VTAPI_SetAttr(VTAPI_HANDLE hVTAPI, int nFlag, int nValue);
+VTAPI_decl int VTAPI_GetAttr(VTAPI_HANDLE hVTAPI, int nFlag, int *nValue);
+
+
+#if !defined(__VTAPI_DEFINED_CALLBACK_FUNCTION__)
+#define __VTAPI_DEFINED_CALLBACK_FUNCTION__
+typedef void  (*LPPEventProc)(VTAPI_HANDLE hVTAPI, int nEventType, void *pParam);
+#endif
+
+VTAPI_decl int VTAPI_SetEvent(VTAPI_HANDLE hVTAPI, void *pEventProc, int nEventTypes);
+
+#if defined(WIN32) || defined(WINCE)
+// API for Audio Output (VTAPI_Speak - File Output included)
+VTAPI_decl int VTAPI_Speak(VTAPI_HANDLE hVTAPI, void *pszText, int nTextLength, int nTextType);
+VTAPI_decl int VTAPI_Stop(VTAPI_HANDLE hVTAPI);
+VTAPI_decl int VTAPI_Pause(VTAPI_HANDLE hVTAPI);
+VTAPI_decl int VTAPI_Resume(VTAPI_HANDLE hVTAPI);
+VTAPI_decl int VTAPI_SetPriority(VTAPI_HANDLE hVTAPI, int nPrior);
+#endif
+
+// API for FileSave
+VTAPI_decl int VTAPI_TextToFile(VTAPI_HANDLE hVTAPI, void *pszText, int nTextLength, int nTextType);
+
+// API for Buffer
+typedef enum BUFSIZE_VALUE
+{
+    VAL_DEFAULT = -1,
+    VAL_ONEBUF = 0,
+} BUFSIZE_VALUE;
+VTAPI_decl int VTAPI_BufferOpen(VTAPI_HANDLE hVTAPI, void *pszText, int nTextLength, int nTextType, int nBufferSize);
+VTAPI_decl int VTAPI_GetBuffer(VTAPI_HANDLE hVTAPI, char** pFrameBuffer);
+VTAPI_decl void VTAPI_FreeBuffer(char* ptr);
+VTAPI_decl int VTAPI_BufferClose(VTAPI_HANDLE hVTAPI);
+
+VTAPI_decl VTAPI_ERRS_INFO *VTAPI_GetLastErrorInfo(VTAPI_HANDLE hVTAPI);
+
+VTAPI_decl int VTAPI_SetUserData(VTAPI_HANDLE hVTAPI, void *ptr);
+VTAPI_decl void *VTAPI_GetUserData(VTAPI_HANDLE hVTAPI);
+
+VTAPI_decl void VTAPI_GetVersion(char output[100]);
+
+// API for tts engine
+VTAPI_decl VTAPI_ENGINE_HANDLE VTAPI_GetEngine(char *pszSpeaker, char *pszType) ;
+// support for not installed engines
+VTAPI_decl VTAPI_ENGINE_HANDLE VTAPI_AddNewEngineInfo(char *pszSpeaker, char *pszType, int nSpeakerID, char *pszDBPath, char *pszLang, char *pszGender, int nCodePage, char *pszISOCode, char *pszVendor, int nSamplingRate);
+
+VTAPI_decl int VTAPI_GetEngineInfoFieldEx(VTAPI_ENGINE_HANDLE hEngine, char *pszSpeaker, char *pszType,char *pszDllPath, char *pszDBPath, char *pszLang, char *pszGender, char *pszVersion, int *nSampling, int *nChannel, char *pszISOCode);
+
+VTAPI_decl int VTAPI_LoadEngine(VTAPI_ENGINE_HANDLE hEngine);
+VTAPI_decl int VTAPI_UnloadEngine(VTAPI_ENGINE_HANDLE hEngine);
+VTAPI_decl int VTAPI_GetEngineVersion(VTAPI_ENGINE_HANDLE hEngine, char output[100]);
+
+#if 1 //defined(USE_NEW_USERDICT_MANAGER)
+VTAPI_decl VTAPI_USERDICT_HANDLE VTAPI_CreateUserDictHandle(char* pszDicFile);
+VTAPI_decl void VTAPI_ReleaseUserDictHandle(VTAPI_USERDICT_HANDLE hUSERDICT);
+VTAPI_decl int VTAPI_SetUserDictHandle(VTAPI_HANDLE hVTAPI, VTAPI_USERDICT_HANDLE hUSERDICT);
+VTAPI_decl VTAPI_USERDICT_HANDLE VTAPI_GetUserDictHandle(VTAPI_HANDLE hVTAPI);
+VTAPI_decl int VTAPI_GetUserDictInfo(VTAPI_USERDICT_HANDLE hUSERDICT, char* pszDicFile);
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* _VTAPI_H_ */
diff --git a/3rdparty/voice_text/launch/voice_text.launch b/3rdparty/voice_text/launch/voice_text.launch
index 95e7d781f..54413b5a5 100644
--- a/3rdparty/voice_text/launch/voice_text.launch
+++ b/3rdparty/voice_text/launch/voice_text.launch
@@ -14,7 +14,8 @@
         machine="$(arg voice_text_machine)">
     <remap from="text_to_speech" to="voice_text/text_to_speech" />
     <rosparam>
-      # db_path: /usr/vt/sayaka/M16  # Commented out to support other speakers than SAYAKA
+      db_path: /usr/vt/sayaka/M16  # Commented out to support other speakers than SAYAKA
+      license_path: /usr/vt/sayaka/M16/data-common/verify/verification.txt
       pitch: 100
       speed: 100
       volume: 100
diff --git a/3rdparty/voice_text/package.xml b/3rdparty/voice_text/package.xml
index 2fd9e303a..b9df97f21 100644
--- a/3rdparty/voice_text/package.xml
+++ b/3rdparty/voice_text/package.xml
@@ -1,14 +1,16 @@
 <package>
   <name>voice_text</name>
   <version>2.1.24</version>
-  <description>voice_text (www.voicetext.jp)</description>
+  <description>The ROS package for VoiceText, ReadSpeaker (https://readspeaker.jp/)</description>
   <maintainer email="k-okada@jsk.t.u-tokyo.ac.jp">Kei Okada</maintainer>
+  <maintainer email="obinata@jsk.imi.i.u-tokyo.ac.jp">Yoshiki Obinata</maintainer>
 
   <license>HOYA License</license>
 
   <url type="website">http://ros.org/wiki/voice_text</url>
 
   <author>Kei Okada</author>
+  <author>Yoshiki Obinata</author>
 
   <buildtool_depend>catkin</buildtool_depend>
   <build_depend>roscpp</build_depend>
diff --git a/3rdparty/voice_text/src/dummy/vt_dummy.cpp b/3rdparty/voice_text/src/dummy/vt_dummy.cpp
deleted file mode 100644
index 0add52d66..000000000
--- a/3rdparty/voice_text/src/dummy/vt_dummy.cpp
+++ /dev/null
@@ -1,13 +0,0 @@
-#include "vt_dummy.h"
-#include <stdio.h>
-
-void VT_UNLOADTTS_JPN(int) {};
-int VT_LOADTTS_JPN(int, int, char*, char*) {
-  fprintf(stderr, "LOADING DUMMY VT_LOADTTS_JPN\n");
-  fprintf(stderr, "You need to install voice_text application\n");
-  fprintf(stderr, "exiting....\n");
-  return -1;
-};
-int VT_TextToFile_JPN(int, char *, char *, int, int, int, int, int, int, int) {};
-
-void VT_GetTTSInfo_JPN(int , char *, void *, int) {};
diff --git a/3rdparty/voice_text/src/dummy/vt_dummy.h b/3rdparty/voice_text/src/dummy/vt_dummy.h
deleted file mode 100644
index 3d7742967..000000000
--- a/3rdparty/voice_text/src/dummy/vt_dummy.h
+++ /dev/null
@@ -1,50 +0,0 @@
-#ifndef __VT_DUMMY_H__
-#define __VT_DUMMY_H__
-extern "C" {
-  void VT_UNLOADTTS_JPN(int);
-  int VT_LOADTTS_JPN(int, int, char*, char*);
-  int VT_TextToFile_JPN(int, char *, char *, int, int, int, int, int, int, int);
-  void VT_GetTTSInfo_JPN(int , char *, void *, int);
-  int VT_LOADTTS_SUCCESS = 0;
-  int VT_FILE_API_SUCCESS = 0;
-  int VT_FILE_API_FMT_S16PCM_WAVE = 4; // https://pastebin.com/9LeCr2HN
-}
-
-enum
-  {
-    VT_BUILD_DATE              =  0,
-    VT_VERIFY_CODE         =  1,
-    VT_MAX_CHANNEL         =  2,
-    VT_DB_DIRECTORY        =  3,
-    VT_LOAD_SUCCESS_CODE   =  4,
-    VT_MAX_SPEAKER         =  5,
-    VT_DEF_SPEAKER         =  6,
-    VT_CODEPAGE            =  7,
-    VT_DB_ACCESS_MODE      =  8,
-    VT_FIXED_POINT_SUPPORT =  9,
-    VT_SAMPLING_FREQUENCY  = 10,
-    VT_MAX_PITCH_RATE      = 11,
-    VT_DEF_PITCH_RATE      = 12,
-    VT_MIN_PITCH_RATE      = 13,
-    VT_MAX_SPEED_RATE      = 14,
-    VT_DEF_SPEED_RATE      = 15,
-    VT_MIN_SPEED_RATE      = 16,
-    VT_MAX_VOLUME          = 17,
-    VT_DEF_VOLUME          = 18,
-    VT_MIN_VOLUME          = 19,
-    VT_MAX_SENT_PAUSE          = 20,
-    VT_DEF_SENT_PAUSE          = 21,
-    VT_MIN_SENT_PAUSE      = 22,
-    VT_DB_BUILD_DATE       = 23,
-    VT_MAX_COMMA_PAUSE         = 24,
-    VT_DEF_COMMA_PAUSE         = 25,
-    VT_MIN_COMMA_PAUSE         = 26,
-    VT_MAX_SYMBOL_OPEN_PAUSE           = 27,
-    VT_DEF_SYMBOL_OPEN_PAUSE           = 28,
-    VT_MIN_SYMBOL_OPEN_PAUSE           = 29,
-    VT_MAX_SYMBOL_CLOSE_PAUSE          = 30,
-    VT_DEF_SYMBOL_CLOSE_PAUSE          = 31,
-    VT_MIN_SYMBOL_CLOSE_PAUSE          = 32,
-  };
-
-#endif //__VT_DUMMY_H__
diff --git a/3rdparty/voice_text/src/voice_text.cpp b/3rdparty/voice_text/src/voice_text.cpp
new file mode 100644
index 000000000..7a1ea5165
--- /dev/null
+++ b/3rdparty/voice_text/src/voice_text.cpp
@@ -0,0 +1,86 @@
+/*
+ * voice_text.cpp
+ * Author: Yuki Furuta <furushchev@jsk.imi.i.u-tokyo.ac.jp>, Yoshiki Obinata <obinata@jsk.imi.i.u-tokyo.ac.jp>
+ */
+
+#include <fstream>
+
+#include <boost/filesystem.hpp>
+#include <boost/thread.hpp>
+
+// ROS
+#include <ros/ros.h>
+#include <dynamic_reconfigure/server.h>
+#include <voice_text/VoiceTextConfig.h>
+#include <voice_text/TextToSpeech.h>
+
+#include "vt_handler.h"
+
+namespace fs = boost::filesystem;
+
+class VoiceText {
+public:
+  typedef voice_text::VoiceTextConfig Config;
+
+  VoiceText() : nh_(), pnh_("~"), db_path_(""), license_path_(""), dyn_srv_(pnh_){
+    pnh_.param<std::string>("db_path", db_path_, "");
+    pnh_.setParam("db_path", db_path_);  // for backward compatibility (db_path is usually set previously)
+    pnh_.param<std::string>("license_path", license_path_, "");
+
+    dynamic_reconfigure::Server<Config>::CallbackType f =
+      boost::bind(&VoiceText::config_callback, this, _1, _2);
+    dyn_srv_.setCallback(f);
+
+    h_vt_ = (new VTHandler(license_path_, db_path_));
+
+    srv_ = nh_.advertiseService("text_to_speech", &VoiceText::text_to_speech, this);
+    ROS_INFO("Advertised service text_to_speech\n");
+  }
+
+    ~VoiceText(){
+      delete h_vt_;
+    }
+
+  void config_callback(Config &config, uint32_t level) {
+    boost::mutex::scoped_lock lock(mutex_);
+    config_ = config;
+  }
+
+  bool text_to_speech(voice_text::TextToSpeech::Request  &req,
+                      voice_text::TextToSpeech::Response &res) {
+    boost::mutex::scoped_lock lock(mutex_);
+    // load text from file
+    if (!fs::exists(fs::path(req.text_path))) {
+      ROS_ERROR_STREAM("text file " << req.text_path << " not found");
+      res.ok = false;
+      return true;
+    }
+    std::ifstream ifs(req.text_path.c_str());
+    std::string text = "", line = "";
+    while (ifs && std::getline(ifs, line)) {
+      text += line;
+    }
+    res.ok = h_vt_->VTH_TextToFile(config_.pitch, config_.speed, config_.volume, config_.pause,
+                                   text, req.wave_path);
+    return true;
+  }
+
+  ros::NodeHandle nh_, pnh_;
+  boost::mutex mutex_;
+  dynamic_reconfigure::Server<Config> dyn_srv_;
+  Config config_;
+  ros::ServiceServer srv_;
+  bool initialized_;
+  std::string db_path_, license_path_;
+  VTHandler* h_vt_;
+};
+
+int main(int argc, char** argv) {
+  ros::init(argc, argv, "voice_text");
+
+  VoiceText vt;
+
+  ros::spin();
+
+  return 0;
+}
diff --git a/3rdparty/voice_text/src/voice_text.cpp.in b/3rdparty/voice_text/src/voice_text.cpp.in
deleted file mode 100644
index f52955098..000000000
--- a/3rdparty/voice_text/src/voice_text.cpp.in
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * voice_text_server.cpp
- * Author: Yuki Furuta <furushchev@jsk.imi.i.u-tokyo.ac.jp>
- */
-
-#include <fstream>
-#include <cstdlib>
-#include <sstream>
-#include <string>
-
-#include <boost/filesystem.hpp>
-#include <boost/thread.hpp>
-
-// ROS
-#include <ros/ros.h>
-#include <dynamic_reconfigure/server.h>
-#include <voice_text/VoiceTextConfig.h>
-#include <voice_text/TextToSpeech.h>
-
-// VoiceText
-#ifdef USE_DUMMY_INCLUDE
-#include "dummy/vt_dummy.h"
-#else
-#include "@VT_ROOT@/inc/vt_jpn.h"
-#endif
-
-#define PATH_MAX 1024
-
-namespace fs = boost::filesystem;
-
-
-class VoiceText {
-public:
-  typedef voice_text::VoiceTextConfig Config;
-
-  VoiceText() : nh_(), pnh_("~"), db_path_(""), license_path_(""), dyn_srv_(pnh_) {
-    pnh_.param<std::string>("db_path", db_path_, "@VT_ROOT@");
-    pnh_.setParam("db_path", db_path_);  // for backward compatibility (db_path is usually set previously)
-    pnh_.param<std::string>("license_path", license_path_, "");
-
-    dynamic_reconfigure::Server<Config>::CallbackType f =
-      boost::bind(&VoiceText::config_callback, this, _1, _2);
-    dyn_srv_.setCallback(f);
-  }
-
-  ~VoiceText() {
-    if (initialized_) {
-      VT_UNLOADTTS_JPN(-1);
-    }
-  }
-
-  void config_callback(Config &config, uint32_t level) {
-    boost::mutex::scoped_lock lock(mutex_);
-    config_ = config;
-  }
-
-  bool initialize() {
-    // initialize voice text
-    int ret = -1;
-    char* db_path_char = (char*)calloc(std::strlen(db_path_.c_str())+1, sizeof(char));
-    std::strcpy(db_path_char, db_path_.c_str());
-    char* license_path_char = NULL;
-    if (!license_path_.empty()) {
-      license_path_char = (char*)calloc(std::strlen(license_path_.c_str())+1, sizeof(char));
-      std::strcpy(license_path_char, license_path_.c_str());
-    }
-    ret = VT_LOADTTS_JPN((int)NULL, -1, db_path_char, license_path_char);
-    free(db_path_char);
-    if (!license_path_.empty()) free(license_path_char);
-    if (ret != VT_LOADTTS_SUCCESS) {
-      ROS_FATAL("Failed to load TTS engine (code %d)", ret);
-      if (ret == -1) {
-        ROS_FATAL("You must install voice_text library before building this library");
-      }
-      return false;
-    }
-    VT_GetTTSInfo_JPN(VT_VERIFY_CODE, NULL, &ret, sizeof(int));
-    if (ret != 0) {
-      ROS_FATAL_STREAM("Verification failed (VT_VERIFY_CODE " << ret << ")");
-      return false;
-    }
-
-    // Print voice text info
-    char szTmp[PATH_MAX];
-    char szTmp2[PATH_MAX];
-    VT_GetTTSInfo_JPN(VT_BUILD_DATE, NULL, szTmp, PATH_MAX);
-    sprintf(szTmp2, "VT_GetTTSInfo_JPN(VT_BUILD_DATE = %s\n", szTmp);
-    ROS_INFO_STREAM(szTmp2);
-    VT_GetTTSInfo_JPN(VT_VERIFY_CODE, NULL, &ret, sizeof(int));
-    sprintf(szTmp2, "VT_GetTTSInfo_JPN(VT_VERIFY_CODE = %d\n", ret);
-    ROS_INFO_STREAM(szTmp2);
-    VT_GetTTSInfo_JPN(VT_MAX_CHANNEL, NULL, &ret, sizeof(int));
-    sprintf(szTmp2, "VT_GetTTSInfo_JPN(VT_MAX_CHANNEL = %d\n", ret);
-    ROS_INFO_STREAM(szTmp2);
-    VT_GetTTSInfo_JPN(VT_DB_DIRECTORY, NULL, szTmp, PATH_MAX);
-    sprintf(szTmp2, "VT_GetTTSInfo_JPN(VT_DB_DIRECTORY = %s\n", szTmp);
-    ROS_INFO_STREAM(szTmp2);
-    VT_GetTTSInfo_JPN(VT_DEF_SPEAKER, NULL, &ret, sizeof(int));
-    sprintf(szTmp2, "VT_GetTTSInfo_JPN(VT_DEF_SPEAKER = %d\n", ret);
-    ROS_INFO_STREAM(szTmp2);
-    VT_GetTTSInfo_JPN(VT_CODEPAGE, NULL, &ret, sizeof(int));
-    sprintf(szTmp2, "VT_GetTTSInfo_JPN(VT_CODEPAGE = %d\n", ret);
-    ROS_INFO_STREAM(szTmp2);
-
-    // advertise service
-    srv_ = nh_.advertiseService("text_to_speech", &VoiceText::text_to_speech, this);
-
-    ROS_INFO_STREAM("Advertised service text_to_speech");
-
-    return true;
-  }
-
-  bool text_to_speech(voice_text::TextToSpeech::Request  &req,
-                      voice_text::TextToSpeech::Response &res) {
-    boost::mutex::scoped_lock lock(mutex_);
-    // load text from file
-    if (!fs::exists(fs::path(req.text_path))) {
-      ROS_ERROR_STREAM("text file " << req.text_path << " not found");
-      res.ok = false;
-      return true;
-    }
-    std::ifstream ifs(req.text_path.c_str());
-    std::string text = "", line = "";
-    while (ifs && std::getline(ifs, line)) {
-      text += line;
-    }
-    char* text_char = (char*)calloc(std::strlen(text.c_str())+1, sizeof(char));
-    std::strcpy(text_char, text.c_str());
-
-    char* wave_char = (char*)calloc(std::strlen(req.wave_path.c_str())+1, sizeof(char));
-    std::strcpy(wave_char, req.wave_path.c_str());
-
-    int ret = VT_TextToFile_JPN(VT_FILE_API_FMT_S16PCM_WAVE,
-                                text_char,
-                                wave_char,
-                                -1,
-                                config_.pitch,
-                                config_.speed,
-                                config_.volume,
-                                config_.pause,
-                                -1, -1);
-
-    ROS_INFO_STREAM("voice text wave file is outputted to " << wave_char);
-
-    free(text_char);
-    free(wave_char);
-
-    if (ret != VT_FILE_API_SUCCESS) {
-      ROS_ERROR("Failed to execute tts: (code: %d)", ret);
-      res.ok = false;
-      return true;
-    }
-
-    res.ok = true;
-    return true;
-  }
-
-  ros::NodeHandle nh_, pnh_;
-  boost::mutex mutex_;
-  dynamic_reconfigure::Server<Config> dyn_srv_;
-  Config config_;
-  ros::ServiceServer srv_;
-  bool initialized_;
-  std::string db_path_, license_path_;
-};
-
-int main(int argc, char** argv) {
-  ros::init(argc, argv, "voice_text");
-
-  VoiceText vt;
-  if (!vt.initialize()) {
-    return 1;
-  };
-
-  ros::spin();
-
-  return 0;
-}
diff --git a/3rdparty/voice_text/src/vt_handler.cpp b/3rdparty/voice_text/src/vt_handler.cpp
new file mode 100644
index 000000000..9508cd439
--- /dev/null
+++ b/3rdparty/voice_text/src/vt_handler.cpp
@@ -0,0 +1,248 @@
+/*
+ * vt_handler.cpp
+ * Author: Yoshiki Obinata <obinata@jsk.imi.i.u-tokyo.ac.jp>
+ */
+
+#include "vt_handler.h"
+
+VTHandler::VTHandler(const std::string license_path, const std::string db_path){
+  glob_t sdk_old_gbuf_, sdk_new_gbuf_, api_gbuf_;
+  char *lib_file_;
+  char *dl_err_, *db_path_char_, *license_path_char_;
+  bool sym_status_;
+  int ret_;
+
+  // Locate libraries
+#ifdef ENV64
+  glob(("/usr/vt/*/*/bin/x86_64/RAMIO/libvt_jpn.so"), 0, NULL, &sdk_old_gbuf_); // e.g., /usr/vt/sayaka/M16/bin/x86_64/RAMIO/libvt_jpn.so
+  glob(("/usr/vt/*/*/bin/LINUX64_GLIBC3/RAMIO/libvt_jpn.so"), 0, NULL, &sdk_new_gbuf_); // e.g., /usr/vt/risa/H16/bin/LINUX64_GLIBC3/RAMIO/libvt_jpn.so
+#elif ENV32
+  glob(("/usr/vt/*/*/bin/x86_32/RAMIO/libvt_jpn.so"), 0, NULL, &sdk_old_gbuf_); // e.g., /usr/vt/sayaka/M16/bin/x86_32/RAMIO/libvt_jpn.so
+  glob(("/usr/vt/*/*/bin/LINUX32_GLIBC3/RAMIO/libvt_jpn.so"), 0, NULL, &sdk_new_gbuf_); // e.g., /usr/vt/risa/H16/bin/LINUX32_GLIBC3/RAMIO/libvt_jpn.so
+#else
+  ROS_FATAL("Your architecture is not supported");
+  return;
+#endif
+  glob(("/usr/vt/*/*/bin/libvtapi.so"), 0, NULL, &api_gbuf_); // e.g., /usr/vt/hikari/D16/bin/libvtapi.so
+
+  if(sdk_old_gbuf_.gl_pathc > 0){
+    this->vt_type = VT_SDK;
+    strcpy(lib_file_, sdk_old_gbuf_.gl_pathv[0]);
+  }else if(sdk_new_gbuf_.gl_pathc > 0){
+    this->vt_type = VT_SDK;
+    strcpy(lib_file_, sdk_new_gbuf_.gl_pathv[0]);
+  }else if(api_gbuf_.gl_pathc > 0){
+    this->vt_type = VT_API;
+    strcpy(lib_file_, api_gbuf_.gl_pathv[0]);
+  }else{
+    this->vt_type = NO_VT;
+  }
+
+  globfree(&sdk_old_gbuf_);
+  globfree(&sdk_new_gbuf_);
+  globfree(&api_gbuf_);
+
+  // Load libraries
+  if(this->vt_type != NO_VT){
+    ROS_INFO("Opening %s ", lib_file_);
+    this->dl_handle = dlopen(lib_file_, RTLD_NOW);
+    if(this->dl_handle == NULL){
+      dl_err_ = dlerror();
+      ROS_FATAL_STREAM("Error occured when opening VoiceText or ReadSpeaker libraries " <<
+                       dl_err_);
+      return;
+    }
+  }else{
+    ROS_FATAL("No VoiceText or ReadSpeaker libraries have found");
+    return;
+  }
+
+  // Load symbols
+  sym_status_ = LoadSym();
+  if(!sym_status_){
+    return;
+  }
+
+  // Initialize VT Handler
+  // db_path is for backward compatibility
+  db_path_char_ = (char*)calloc(std::strlen(db_path.c_str())+1, sizeof(char));
+  std::strcpy(db_path_char_, db_path.c_str());
+
+  // Load license file
+  license_path_char_ = NULL;
+  if(!license_path.empty()){
+    license_path_char_ = (char*)calloc(std::strlen(license_path.c_str())+1, sizeof(char));
+    std::strcpy(license_path_char_, license_path.c_str());
+  }else{
+    ROS_FATAL("Please set license file");
+    return;
+  }
+
+  // Load license file
+  if(this->vt_type == VT_SDK){
+    ret_ = VT_LOADTTS_JPN((int)NULL, -1, db_path_char_, license_path_char_);
+    if(ret_ != VT_LOADTTS_SUCCESS){
+      ROS_FATAL("[VoiceText SDK] Failed to load TTS engine (code %d)", ret_);
+      return;
+    }
+    VT_GetTTSInfo_JPN(VT_VERIFY_CODE, NULL, &ret_, sizeof(int));
+    if (ret_ != 0) {
+      ROS_FATAL_STREAM("[VoiceText SDK] Verification failed (VT_VERIFY_CODE " << ret_ << ")");
+      return;
+    }
+  }else if(this->vt_type == VT_API){
+    fs::path p_ = lib_file_;
+    std::vector<std::string> elements_;
+    std::string lib_path_, speaker_, type_;
+    char *lib_path_char_, *speaker_char_, *type_char_;
+
+    // Get ReadSpeaker library directory
+    lib_path_ = p_.parent_path().string();
+    lib_path_char_ = (char*)calloc(std::strlen(lib_path_.c_str())+1, sizeof(char));
+    std::strcpy(lib_path_char_, lib_path_.c_str());
+
+    // Get speaker and type
+    for(auto& part_ : p_){
+      elements_.push_back(part_.string());
+    }
+    speaker_ = elements_.at(3);
+    speaker_char_ = (char*)calloc(std::strlen(speaker_.c_str())+1, sizeof(char));
+    std::strcpy(speaker_char_, speaker_.c_str());
+    type_ = elements_.at(4);
+    type_char_ = (char*)calloc(std::strlen(type_.c_str())+1, sizeof(char));
+    std::strcpy(type_char_, type_.c_str());
+
+    VTAPI_Init(lib_path_char_);
+    this->hVTAPI = VTAPI_CreateHandle();
+    if(this->hVTAPI == 0){
+      ROS_FATAL("[ReadSpeaker API] Failed to create API handler. STATUS: %s", VTAPI_GetLastErrorInfo(0)->szMsg);
+      return;
+    }
+    VTAPI_SetLicenseFolder(license_path_char_);
+    // Load engine
+    this->hEngine = VTAPI_GetEngine(speaker_char_, type_char_);
+    ret_ = VTAPI_SetEngineHandle(this->hVTAPI, this->hEngine);
+    if(ret_ < VTAPI_SUCCESS){
+      ROS_FATAL("[ReadSpeaker API] Failed to create engine handler. CODE: %d, MESSAGE: %s ",
+                ret_, VTAPI_GetLastErrorInfo(this->hVTAPI)->szMsg);
+      return;
+    }
+
+    free(lib_path_char_);
+    free(speaker_char_);
+    free(type_char_);
+  }
+
+  free(db_path_char_);
+  if (!license_path.empty()) free(license_path_char_);
+
+}
+
+VTHandler::~VTHandler(){
+  if(this->dl_handle != NULL){
+    // TODO release handle before close dl
+    dlclose(this->dl_handle);
+  }
+}
+
+bool VTHandler::LoadSym(){
+  const char* dl_err_;
+  if(vt_type == VT_SDK){
+    ROS_INFO("Found VoiceText SDK");
+    // load symbol
+    for(auto& itr: VTSDK_func_){
+      VTSDK_s_map_[itr] = dlsym(this->dl_handle, itr);
+      dl_err_ = dlerror();
+      if(dl_err_ != NULL){
+        ROS_FATAL_STREAM("Failed to load VoiceText libraries. STATUS: "
+                         << dl_err_);
+        dlclose(this->dl_handle);
+        return false;
+        break;
+      }
+    }
+    // cast
+    VT_LOADTTS_JPN = reinterpret_cast<short(*)(HWND, int, char*, char*)>(VTSDK_s_map_.at("VT_LOADTTS_JPN"));
+    VT_UNLOADTTS_JPN = reinterpret_cast<void(*)(int)>(VTSDK_s_map_.at("VT_UNLOADTTS_JPN"));
+    VT_GetTTSInfo_JPN = reinterpret_cast<int(*)(int, char*, void*, int)>(VTSDK_s_map_.at("VT_GetTTSInfo_JPN"));
+    VT_TextToFile_JPN = reinterpret_cast<short(*)(int, char*, char*, int, int, int, int, int, int, int)>(VTSDK_s_map_.at("VT_TextToFile_JPN"));
+  }else if(vt_type == VT_API){
+    ROS_INFO("Found ReadSpeaker API");
+    // load symbol
+    for(auto& itr: VTAPI_func_){
+      VTAPI_s_map_[itr] = dlsym(this->dl_handle, itr);
+      dl_err_ = dlerror();
+      if(dl_err_ != NULL){
+        ROS_FATAL_STREAM("Failed to load ReadSpeaker libraries. STATUS: "
+                         << dl_err_);
+        dlclose(this->dl_handle);
+        return false;
+        break;
+      }
+    }
+    // cast
+    VTAPI_Init = reinterpret_cast<int(*)(char*)>(VTAPI_s_map_.at("VTAPI_Init"));
+    VTAPI_CreateHandle = reinterpret_cast<VTAPI_HANDLE(*)()>(VTAPI_s_map_.at("VTAPI_CreateHandle"));
+    VTAPI_SetLicenseFolder = reinterpret_cast<void(*)(char*)>(VTAPI_s_map_.at("VTAPI_SetLicenseFolder"));
+    VTAPI_GetEngine = reinterpret_cast<VTAPI_ENGINE_HANDLE(*)(char*, char*)>(VTAPI_s_map_.at("VTAPI_GetEngine"));
+    VTAPI_SetEngineHandle = reinterpret_cast<int(*)(VTAPI_HANDLE, VTAPI_ENGINE_HANDLE)>(VTAPI_s_map_.at("VTAPI_SetEngineHandle"));
+    VTAPI_SetAttr = reinterpret_cast<int(*)(VTAPI_HANDLE, int, int)>(VTAPI_s_map_.at("VTAPI_SetAttr"));
+    VTAPI_SetOutputFile = reinterpret_cast<int(*)(VTAPI_HANDLE, char*, int)>(VTAPI_s_map_.at("VTAPI_SetOutputFile"));
+    VTAPI_TextToFile = reinterpret_cast<int(*)(VTAPI_HANDLE, void*, int, int)>(VTAPI_s_map_.at("VTAPI_TextToFile"));
+    VTAPI_GetLastErrorInfo = reinterpret_cast<VTAPI_ERRS_INFO*(*)(VTAPI_HANDLE)>(VTAPI_s_map_.at("VTAPI_GetLastErrorInfo"));
+    VTAPI_ReleaseHandle = reinterpret_cast<void(*)(VTAPI_HANDLE)>(VTAPI_s_map_.at("VTAPI_ReleaseHandle"));
+    VTAPI_UnloadEngine = reinterpret_cast<int(*)(VTAPI_ENGINE_HANDLE)>(VTAPI_s_map_.at("VTAPI_UnloadEngine"));
+    VTAPI_Exit = reinterpret_cast<void(*)()>(VTAPI_s_map_.at("VTAPI_Exit"));
+  }
+  return true;
+}
+
+bool VTHandler::VTH_TextToFile(const int pitch, const int speed, const int volume, const int pause,
+                               const std::string text, const std::string wave_path){
+  char *text_char_, *wave_path_char_;
+  int ret_;
+  bool success_ = true;
+
+  text_char_ = (char*)calloc(std::strlen(text.c_str())+1, sizeof(char));
+  std::strcpy(text_char_, text.c_str());
+  wave_path_char_ = (char*)calloc(std::strlen(wave_path.c_str())+1, sizeof(char));
+  std::strcpy(wave_path_char_, wave_path.c_str());
+
+  if(this->vt_type == VT_SDK){
+    ret_ = VT_TextToFile_JPN(VT_FILE_API_FMT_S16PCM_WAVE,
+                             text_char_,
+                             wave_path_char_,
+                             -1,
+                             pitch,
+                             speed,
+                             volume,
+                             pause,
+                             -1, -1);
+    if(ret_ != VT_FILE_API_SUCCESS){
+      ROS_ERROR("[VoiceText SDK] Failed to execute TTS (code %d)", ret_);
+      success_ = false;
+    }
+  }else if(this->vt_type == VT_API){
+    VTAPI_SetAttr(this->hVTAPI, ATTR_PITCH, pitch);
+    VTAPI_SetAttr(this->hVTAPI, ATTR_SPEED, speed);
+    VTAPI_SetAttr(this->hVTAPI, ATTR_VOLUME, volume);
+    VTAPI_SetAttr(this->hVTAPI, ATTR_PAUSE, pause);
+    ret_ = VTAPI_SetOutputFile(this->hVTAPI, wave_path_char_, FORMAT_16PCM_WAV);
+    if(ret_ != VTAPI_SUCCESS){
+      ROS_ERROR("[ReadSpeaker API] Failed to execute VTAPI_SetOutputFile. STATUS: %s",
+                VTAPI_GetLastErrorInfo(this->hVTAPI)->szMsg);
+      success_ = false;
+    }
+    ret_ = VTAPI_TextToFile(this->hVTAPI, text_char_, -1, TEXT_FORMAT_DEFAULT);
+    if(ret_ != VTAPI_SUCCESS){
+      ROS_ERROR("[ReadSpeaker API] Failed to execute VTAPI_TextToFile. STATUS: %s",
+                VTAPI_GetLastErrorInfo(this->hVTAPI)->szMsg);
+      success_ = false;
+    }
+
+    free(text_char_);
+    free(wave_path_char_);
+
+  return success_;
+  }
+}