Merge pull request jsk-ros-pkg#343 from mqcmd196/PR/separate_dialogfl…

…ow_task_executive [dialogflow_task_executive]separate dialogflow API client and app execution from dialogflow_task_executive
knorth55 · Jun 9, 2022 · 8b1fc37 · 8b1fc37
2 parents f4f0648 + 6915c84
commit 8b1fc37
Show file tree

Hide file tree

Showing 7 changed files with 161 additions and 76 deletions.
diff --git a/dialogflow_task_executive/CMakeLists.txt b/dialogflow_task_executive/CMakeLists.txt
@@ -18,6 +18,7 @@ endif()
 find_package(catkin REQUIRED COMPONENTS
     message_generation
     std_msgs
+    actionlib_msgs
     catkin_virtualenv
 )
 
@@ -26,9 +27,15 @@ add_message_files(
   DialogResponse.msg
 )
 
+add_action_files(
+  FILES
+  DialogText.action
+)
+
 generate_messages(
   DEPENDENCIES
   std_msgs
+  actionlib_msgs
 )
 
 catkin_package(
@@ -50,6 +57,7 @@ else()
 endif()
 
 file(GLOB NODE_SCRIPTS_FILES node_scripts/*.py)
+
 catkin_install_python(
   PROGRAMS ${NODE_SCRIPTS_FILES}
   DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}

diff --git a/dialogflow_task_executive/action/DialogText.action b/dialogflow_task_executive/action/DialogText.action
@@ -0,0 +1,11 @@
+# Define the goal
+string query
+---
+# Define the result
+DialogResponse response
+string session
+bool done
+---
+# Define a feedback message
+string session
+string status
diff --git a/dialogflow_task_executive/launch/dialogflow_ros.launch b/dialogflow_task_executive/launch/dialogflow_ros.launch
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="utf-8"?>
+<launch>
+  <arg name="launch_dialogflow" default="true" />
+  <arg name="use_audio" default="false" />
+  <arg name="use_tts" default="true" />
+  <arg name="language" default="ja-JP" />
+  <arg name="soundplay_action_name" default="robotsound_jp" />
+  <arg name="volume" default="1.0" />
+
+  <node name="dialogflow_client"
+        pkg="dialogflow_task_executive" type="dialogflow_client.py"
+        output="screen">
+    <rosparam subst_value="true">
+      use_audio: $(arg use_audio)
+      use_tts: $(arg use_tts)
+      language: $(arg language)
+      soundplay_action_name: $(arg soundplay_action_name)
+      volume: $(arg volume)
+      project_id: $(arg project_id)
+      google_cloud_credentials_json: $(arg credential)
+      enable_hotword: $(arg enable_hotword)
+    </rosparam>
+    <rosparam command="load" ns="/dialogflow_client/hotword"
+              file="$(find dialogflow_task_executive)/config/dialogflow_hotword.yaml"/>
+  </node>
+
+</launch>
diff --git a/dialogflow_task_executive/launch/dialogflow_task_executive.launch b/dialogflow_task_executive/launch/dialogflow_task_executive.launch
@@ -6,6 +6,7 @@
   <arg name="enable_hotword" default="true" />
 
   <!-- options for dialogflow_client -->
+  <arg name="launch_dialogflow" default="true" />
   <arg name="use_audio" default="false" />
   <arg name="use_tts" default="true" />
   <arg name="language" default="ja-JP" />
@@ -18,22 +19,24 @@
     <remap from="mux" to="speech_to_text_mux"/>
   </node>
 
-  <node name="dialogflow_client"
-        pkg="dialogflow_task_executive" type="dialogflow_client.py"
-        output="screen">
-    <rosparam subst_value="true">
-      use_audio: $(arg use_audio)
-      use_tts: $(arg use_tts)
-      language: $(arg language)
-      soundplay_action_name: $(arg soundplay_action_name)
-      volume: $(arg volume)
-      project_id: $(arg project_id)
-      google_cloud_credentials_json: $(arg credential)
-      enable_hotword: $(arg enable_hotword)
-    </rosparam>
-    <rosparam command="load" ns="/dialogflow_client/hotword"
-        file="$(find dialogflow_task_executive)/config/dialogflow_hotword.yaml"/>
-  </node>
+  <group if="$(arg launch_dialogflow)">
+    <node name="dialogflow_client"
+          pkg="dialogflow_task_executive" type="dialogflow_client.py"
+          output="screen">
+      <rosparam subst_value="true">
+        use_audio: $(arg use_audio)
+        use_tts: $(arg use_tts)
+        language: $(arg language)
+        soundplay_action_name: $(arg soundplay_action_name)
+        volume: $(arg volume)
+        project_id: $(arg project_id)
+        google_cloud_credentials_json: $(arg credential)
+        enable_hotword: $(arg enable_hotword)
+      </rosparam>
+      <rosparam command="load" ns="/dialogflow_client/hotword"
+                file="$(find dialogflow_task_executive)/config/dialogflow_hotword.yaml"/>
+    </node>
+  </group>
 
   <node name="task_executive"
         pkg="dialogflow_task_executive" type="task_executive.py"

diff --git a/dialogflow_task_executive/node_scripts/dialogflow_client.py b/dialogflow_task_executive/node_scripts/dialogflow_client.py
@@ -17,6 +17,7 @@
 from sound_play.msg import SoundRequestGoal
 from speech_recognition_msgs.msg import SpeechRecognitionCandidates
 from std_msgs.msg import String
+from dialogflow_task_executive.msg import DialogTextAction, DialogTextGoal, DialogTextResult, DialogTextFeedback
 
 from dialogflow_task_executive.msg import DialogResponse
 
@@ -59,9 +60,95 @@ def __ne__(self, state):
         return not self.__eq__(state)
 
 
-class DialogflowClient(object):
+class DialogflowBase(object):
 
     def __init__(self):
+        self.session_id = None
+        self.language = rospy.get_param("~language", "ja-JP")
+        credentials_json = rospy.get_param(
+            '~google_cloud_credentials_json', None)
+        if credentials_json is None:
+            rospy.loginfo("Loading credential json from env")
+            # project id for google cloud service
+            self.project_id = rospy.get_param("~project_id", None)
+            self.session_client = df.SessionsClient()
+        else:
+            rospy.loginfo("Loading credential json from rosparam")
+            credentials = Credentials.from_service_account_file(
+                credentials_json
+            )
+            self.project_id = credentials.project_id
+            self.session_client = df.SessionsClient(
+                credentials=credentials
+            )
+        if self.project_id is None:
+            rospy.logerr('project ID is not set')
+
+    def detect_intent_text(self, data, session):
+        query = df.types.QueryInput(
+            text=df.types.TextInput(
+                text=data, language_code=self.language))
+        return self.session_client.detect_intent(
+            session=session, query_input=query).query_result
+
+    def make_dialog_msg(self, result):
+        msg = DialogResponse()
+        msg.header.stamp = rospy.Time.now()
+        if result.action != 'input.unknown':
+            rospy.logwarn("Unknown action")
+        msg.action = result.action
+
+        if self.language == 'ja-JP':
+            msg.query = result.query_text.encode("utf-8")
+            msg.response = result.fulfillment_text.encode("utf-8")
+        else:
+            msg.query = result.query_text
+            msg.response = result.fulfillment_text
+        msg.fulfilled = result.all_required_params_present
+        msg.parameters = MessageToJson(result.parameters)
+        msg.speech_score = result.speech_recognition_confidence
+        msg.intent_score = result.intent_detection_confidence
+        return msg
+
+
+class DialogflowTextClient(DialogflowBase):
+
+    def __init__(self):
+        super(DialogflowTextClient, self).__init__()
+        self._as = actionlib.SimpleActionServer("~text_action", DialogTextAction,
+                                                execute_cb=self.cb, auto_start=False)
+        self._as.start()
+
+    def cb(self, goal):
+        feedback = DialogTextFeedback()
+        result = DialogTextResult()
+        success = False
+        try:
+            if self.session_id is None:
+                self.session_id = str(uuid.uuid1())
+                rospy.loginfo(
+                    "Created new session: {}".format(self.session_id))
+            session = self.session_client.session_path(
+                self.project_id, self.session_id
+            )
+            df_result = self.detect_intent_text(goal.query, session)
+            result.session = session
+            result.response = self.make_dialog_msg(df_result)
+            success = True
+        except Exception as e:
+            rospy.logerr(str(e))
+            feedback.status = str(e)
+            success = False
+        finally:
+            self._as.publish_feedback(feedback)
+            result.done = success
+            self._as.set_succeeded(result)
+
+
+class DialogflowAudioClient(DialogflowBase):
+
+    def __init__(self):
+        super(DialogflowAudioClient, self).__init__()
         # language for dialogflow
         self.language = rospy.get_param("~language", "ja-JP")
 
@@ -83,28 +170,8 @@ def __init__(self):
                             for hotword in hotwords ]
 
         self.state = State()
-        self.session_id = None
         self.queue = Queue.Queue()
 
-        credentials_json = rospy.get_param(
-            '~google_cloud_credentials_json', None)
-        if credentials_json is None:
-            rospy.loginfo("Loading credential json from env")
-            # project id for google cloud service
-            self.project_id = rospy.get_param("~project_id", None)
-            self.session_client = df.SessionsClient()
-        else:
-            rospy.loginfo("Loading credential json from rosparam")
-            credentials = Credentials.from_service_account_file(
-                credentials_json
-            )
-            self.project_id = credentials.project_id
-            self.session_client = df.SessionsClient(
-                credentials=credentials
-            )
-        if self.project_id is None:
-            rospy.logerr('project ID is not set')
-
         if self.use_tts:
             soundplay_action_name = rospy.get_param(
                 '~soundplay_action_name', 'robotsound_jp')
@@ -135,8 +202,6 @@ def __init__(self):
             self.sub_speech = rospy.Subscriber(
                 "speech_to_text", SpeechRecognitionCandidates,
                 self.input_cb)
-            self.sub_text = rospy.Subscriber(
-                "text", String, self.input_cb)
 
         self.df_thread = threading.Thread(target=self.df_run)
         self.df_thread.daemon = True
@@ -154,20 +219,13 @@ def hotword_cb(self, msg):
             rospy.loginfo("Hotword received")
             self.state.set(State.LISTENING)
 
-    def text_cb(self, msg):
-        self.queue.put(msg)
-        rospy.loginfo("Recieved input")
-
     def input_cb(self, msg):
         if not self.enable_hotword:
             self.state.set(State.LISTENING)
         elif not self.use_audio:
             # catch hotword from string
             if isinstance(msg, SpeechRecognitionCandidates):
                 self.hotword_cb(String(data=msg.transcript[0]))
-            # if std_msgs/String was subscribed
-            elif isinstance(msg, String):
-                self.text_cb(msg)
             else:
                 rospy.logerr("Unsupported data class {}".format(msg))
 
@@ -177,13 +235,6 @@ def input_cb(self, msg):
         else:
             rospy.logdebug("Received input but ignored")
 
-    def detect_intent_text(self, data, session):
-        query = df.types.QueryInput(
-            text=df.types.TextInput(
-                text=data, language_code=self.language))
-        return self.session_client.detect_intent(
-            session=session, query_input=query).query_result
-
     def detect_intent_audio(self, data, session):
         query = df.types.QueryInput(audio_config=self.audio_config)
         return self.session_client.detect_intent(
@@ -194,22 +245,7 @@ def print_result(self, result):
         rospy.loginfo(pprint.pformat(result))
 
     def publish_result(self, result):
-        msg = DialogResponse()
-        msg.header.stamp = rospy.Time.now()
-        if result.action != 'input.unknown':
-            rospy.logwarn("Unknown action")
-        msg.action = result.action
-
-        if self.language == 'ja-JP':
-            msg.query = result.query_text.encode("utf-8")
-            msg.response = result.fulfillment_text.encode("utf-8")
-        else:
-            msg.query = result.query_text
-            msg.response = result.fulfillment_text
-        msg.fulfilled = result.all_required_params_present
-        msg.parameters = MessageToJson(result.parameters)
-        msg.speech_score = result.speech_recognition_confidence
-        msg.intent_score = result.intent_detection_confidence
+        msg = self.make_dialog_msg(result)
         self.pub_res.publish(msg)
 
     def speak_result(self, result):
@@ -250,9 +286,6 @@ def df_run(self):
                 elif isinstance(msg, SpeechRecognitionCandidates):
                     result = self.detect_intent_text(
                         msg.transcript[0], session)
-                elif isinstance(msg, String):
-                    result = self.detect_intent_text(
-                        msg.data, session)
                 else:
                     raise RuntimeError("Invalid data")
                 self.print_result(result)
@@ -269,5 +302,6 @@ def df_run(self):
 
 if __name__ == '__main__':
     rospy.init_node("dialogflow_client")
-    dfc = DialogflowClient()
+    dftc = DialogflowTextClient()
+    dfac = DialogflowAudioClient()
     rospy.spin()
diff --git a/dialogflow_task_executive/package.xml b/dialogflow_task_executive/package.xml
@@ -1,12 +1,15 @@
-<?xml version="1.0"?>
+<?xml version="1.0" encoding="utf-8"?>
 <package format="1">
   <name>dialogflow_task_executive</name>
   <version>2.1.24</version>
-  <description>A ROS package for Task execution with dialogflow</description>
+  <description>A ROS package for Google Dialogflow and launching apps via Dialogflow</description>
 
   <maintainer email="[email protected]">Shingo Kitagawa</maintainer>
+  <maintainer email="[email protected]">Kei Okada</maintainer>
 
   <license>BSD</license>
+  <author email="[email protected]">Yoshiki Obinata</author>
+  <author email="[email protected]">Shingo Kitagawa</author>
   <author email="[email protected]">Yuki Furuta</author>
   <url type="website">http://ros.org/wiki/dialogflow_task_executive</url>
   <url type="repository">https://github.com/jsk-ros-pkg/jsk_3rdparty</url>

diff --git a/dialogflow_task_executive/requirements.in b/dialogflow_task_executive/requirements.in
@@ -1,2 +1 @@
 dialogflow==1.1.1
-