-
Notifications
You must be signed in to change notification settings - Fork 1.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
refactor: optimize the display of STT nodes #1874
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -70,7 +70,7 @@ def save_image(image_list): | |
# 回到文件头 | ||
buffer.seek(0) | ||
file_content = split_handle.get_content(buffer, save_image) | ||
content.append('## ' + doc['name'] + '\n' + file_content) | ||
content.append('### ' + doc['name'] + '\n' + file_content) | ||
break | ||
|
||
return NodeResult({'content': splitter.join(content)}, {}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The code is mostly correct but it can be improved for more readability and possibly for better error handling. Here's a revised version: def save_image(image_list):
buffer.seek(0)
content = []
for doc in image_list:
try:
file_content = split_handle.get_content(buffer, save_image)
content.append(f"### {doc['name']}\n{file_content}")
except Exception as e:
print(f"Error processing document {doc['name']}:", e)
return NodeResult({'content': splitter.join(content)}, {}) Corrections and Suggestions:
These changes make the code cleaner and more robust. |
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,25 +37,34 @@ def process_audio_item(audio_item, model): | |
temp_mp3_path = temp_amr_file.name | ||
any_to_mp3(temp_file_path, temp_mp3_path) | ||
try: | ||
return split_and_transcribe(temp_mp3_path, model) | ||
transcription = split_and_transcribe(temp_mp3_path, model) | ||
return {file.file_name: transcription} | ||
finally: | ||
os.remove(temp_file_path) | ||
os.remove(temp_mp3_path) | ||
|
||
def process_audio_items(audio_list, model): | ||
with ThreadPoolExecutor(max_workers=5) as executor: | ||
results = list(executor.map(lambda item: process_audio_item(item, model), audio_list)) | ||
return '\n\n'.join(results) | ||
return results | ||
|
||
result = process_audio_items(audio_list, stt_model) | ||
return NodeResult({'answer': result, 'result': result}, {}) | ||
content = [] | ||
result_content = [] | ||
for item in result: | ||
for key, value in item.items(): | ||
content.append(f'### {key}\n{value}') | ||
result_content.append(value) | ||
return NodeResult({'answer': '\n'.join(result_content), 'result': '\n'.join(result_content), | ||
'content': content}, {}) | ||
|
||
def get_details(self, index: int, **kwargs): | ||
return { | ||
'name': self.node.properties.get('stepName'), | ||
"index": index, | ||
'run_time': self.context.get('run_time'), | ||
'answer': self.context.get('answer'), | ||
'content': self.context.get('content'), | ||
'type': self.node.type, | ||
'status': self.status, | ||
'err_message': self.err_message, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The provided code has several issues and could be improved: Issues:
Improvements:
Here’s an updated version incorporating some of these improvements: from joblib.pool import ThreadPoolExecutor
from typing import Dict
from .models import File
from setting.models_provider.tools import get_model_instance_by_model_user_id
import os
# Assuming this is the correct model instance for speech recognition
stt_model = get_model_instance_by_model_user_id(user_id)
def process_audio_item(audio_item):
temp_mp3_path = f"/tmp/temp_{audio_item.file.filename}.mp3"
try:
any_to_mp3(audio_item.file.path, temp_mp3_path)
transcription_result = split_and_transcribe(temp_mp3_path, stt_model)
transcriptions = [{file.file_name: entry} for file, entries in transcription_result.items()]
return {'file': audio_item.file.filename, 'entries': transcriptions}
finally:
os.remove(temp_mp3_path)
class BaseSpeechToTextNode ISpeechToTextNode):
...
def process_audio_items(self, audio_list):
# Use dictionary comprehension to store results directly
results_dicts = []
with ThreadPoolExecutor(max_workers=5) as executor:
futures = [executor.submit(process_audio_item, audio_item) for audio_item in audio_list]
for future in futures:
audio_data_dict = future.result()
results_dicts.extend(audio_data_dict['entries'])
flattened_results = [{'file': d['file'], 'entry': e} for sub_d in results_dicts for e in sub_d['entries']]
return NodeResult({'flattenedResults': flattened_results})
... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The code has a few issues that need addressing to improve its correctness and maintainability:
Here's an improved version of the code incorporating these suggestions: def process_audio_item(audio_item, model):
try:
temp_mp3_path = generate_temporary_file()
any_to_mp3(audio_item.path, temp_mp3_path)
transcription = split_and_transcribe(temp_mp3_path, model)
os.remove(temp_mp3_path)
return {audio_item.id: transcription}
except Exception as e:
# Handle exceptions and log/raise appropriate
print(f"Error processing audio item {audio_item.id}: {e}")
return None
def process_audio_items(audio_list, model):
with ThreadPoolExecutor(max_workers=5) as executor:
future_results = [executor.submit(process_audio_item, item, model) for item in audio_list]
results = [future.result() for future in concurrent.futures.as_completed(future_results)]
# Combine and filter out None values (failed items)
combined_results = [{item_id: transcr} for res in results if res for item_id, transcr in res.items()]
# Flatten the list into individual transcription strings
result_content = ''.join([transcription for _id, transcription in combined_results])
return {'answer': result_content, 'result': result_content}
def get_details(self, index: int, **kwargs):
response_data = {
'name': self.node.properties.get('stepName', ''),
'index': index,
'run_time': self.context.get('run_time', {}),
'answer': self.context.get('answer', ''),
+ 'content': self.context.get('content', []),
'type': self.node.type,
'status': self.status,
'err_message': self.err_message
}
response_data['items'] = [{'text': t} for _, t in combined_results]
return response_data In this revision:
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -293,15 +293,22 @@ | |
<div class="card-never border-r-4"> | ||
<h5 class="p-8-12">参数输出</h5> | ||
<div class="p-8-12 border-t-dashed lighter"> | ||
<p class="mb-8 color-secondary">文本内容:</p> | ||
<div v-if="item.answer"> | ||
<el-card | ||
shadow="never" | ||
style="--el-card-padding: 8px" | ||
v-for="(file_content, index) in item.content" | ||
:key="index" | ||
class="mb-8" | ||
> | ||
<MdPreview | ||
v-if="file_content" | ||
ref="editorRef" | ||
editorId="preview-only" | ||
:modelValue="item.answer" | ||
:modelValue="file_content" | ||
style="background: none" | ||
/> | ||
</div> | ||
<template v-else> -</template> | ||
</el-card> | ||
</div> | ||
</div> | ||
</template> | ||
|
@@ -544,7 +551,7 @@ | |
:modelValue="item.answer" | ||
style="background: none" | ||
/> | ||
<template v-else> - </template> | ||
<template v-else> -</template> | ||
</div> | ||
</div> | ||
</template> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The code provided has some minor style and performance improvement opportunities, as well as a small bug fix related to conditional rendering. Regularities: The card component is using Potential Issues: There might not be any direct functional issues with this code snippet based on its current structure, but ensuring consistent styling across components would improve user experience. Optimization Suggestions:
These changes will make the code more coherent and maintainable while improving its overall aesthetic appeal. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The code provided appears to be part of an HTML template using Vue.js with Element Plus. Several areas can be improved:
Here is a cleaned up version based on these suggestions: <div class="card-never border-r-4">
<h5 class="p-8-12">参数输出</h5>
<div class="p-8-12 border-t-dashed lighter" v-if="item.content && item.content.length > 0">
<template v-for="(file_content, index) in item.content" :key="index">
<el-card shadow="never" style="--el-card-padding: 8px" class="mb-8">
<MdPreview v-if="file_content" ref="editorRef" editorId="preview-only" :modelValue="file_content" style="background: none"/>
<template v-else>-</template>
</el-card>
</template>
</div>
</div>
<!-- Similar changes will apply to the other similar sections --> Additional Comments:
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is a minor error in the code that needs correction:
Replace this line:
content.append('## ' + doc['name'] + '\n' + file_content)
With this line:
content.append(f"### {doc['name']}\n{file_content}")
Specifically, you should use an f-string to escape the double quotes properly within your string interpolation. This will ensure proper formatting when adding each document's name and content.
Also, while there is no significant issue with the rest of the function (such as improper handling exceptions or inefficiencies), it doesn't require any major optimizations for its current implementation. However, make sure that the variable names (
buffer
,split_handle
,splitter
) are appropriate for their usage context and adhere to common coding conventions for clarity and maintainability.