Skip to content

Commit

Permalink
[feat] add ai (#22)
Browse files Browse the repository at this point in the history
Co-authored-by: Juyeonnn <[email protected]>
  • Loading branch information
juyeonnn and juyeonnn authored Feb 18, 2024
1 parent 1831dbb commit c4bc962
Show file tree
Hide file tree
Showing 18 changed files with 3,267 additions and 0 deletions.
90 changes: 90 additions & 0 deletions ai/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
## Data Generation ##

**1) 문장 구사** : ```fluentify.ProDataGen(iterarion_num=30)```


```data/con-data.json``` : 문장구사평가를 위한 데이터 (```Gemini-pro-vision```)

```data/con-img-pool.json``` : 문장구사평가 데이터 생성에 사용된 [이미지 데이터](https://huggingface.co/datasets/ehristoforu/dalle-3-images) (```Dalle-3``` )

Example
```js

[
{
"context": "Let's imagine that you are a brave captain of a big ship. You are sailing on the high seas. Suddenly, you see a beautiful sunset. Look at this picture and tell me...",
"question": "What colors can you see in the sky?",
"answer": "I see red, orange, yellow, and blue.",
"img": "1070.jpg"
},
...
]
```
**2) 발음** : ```fluentify.ConDataGen(iterarion_num=30)```


```data/pro-data.json``` : 발음평가를 위한 데이터 (```Gemini-pro```)

```data/pro-topic-pool.json``` : 발음평가 데이터 생성에 사용된 주제 데이터 (```Gemini-pro```)

Example
```js
[
{
"practice-sentence": "I love to mix baking soda and vinegar together to create a fizzy experiment.",
"tip": "Remember to say 'mix' with your lips together and 'fizzy' with a big smile."
},
...
]
```



## Feedback Generation ##

**1) 문장 구사** : ```fluentify.ConFeedback(con_input)```


Input
```js
{
"user-answer": "Hmm.. a ship? maybe yellow? I may no",
"context": "Let's imagine that you are a brave captain of a big ship. You are sailing on the high seas. Suddenly, you see a beautiful sunset. Look at this picture and tell me...",
"question": "What colors can you see in the sky?",
"answer": "I see red, orange, yellow, and blue.",
"img": "1070.jpg"
}
```
Output
```js
{
'positive-feedback': 'You are very creative! I like your imagination.',
'negative-feedback': "Let's try to describe what we see in the picture. First, look at the sky. What colors can you see there?",
'enhanced-answer': 'In the sky, I can see yellow, orange, pink, and blue.'
}
```


**2) 발음** : ```fluentify.ProFeedback(pro_input)```

Input
```js
{
"user-audio" : "example1.m4a",
"practice-sentece": "It's autumn now, and the leaves are turning beautiful colors.",
"tip": "Say 'aw-tum,' not 'ay-tum.'"
}
```
Output
```js
{
'transcription': 'ITS AUTUMN NOW AND THE LEAVES ARE TURNING BEAUTIFUL COLORS',
'wrong_idx': {'minor': [2, 9], 'major': []},
'pronunciation_score': 0.7,
'decibel': 46.90759735625882,
'speech_rate': 2.347417840375587,
'positive-feedback': 'Pronunciation is correct. Keep up the good work!',
'negative-feedback': ' '
}
```
----
133 changes: 133 additions & 0 deletions ai/data-gen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import os
import requests
import yaml
import json
import vertexai
import numpy as np
from vertexai.preview.generative_models import GenerativeModel, Part
import random
import ast

import torch
from transformers import AutoTokenizer, AutoFeatureExtractor, AutoModelForCTC
import torch
from jiwer import wer
import os
import json
import math
import torch
import librosa
from evaluate import load
from jiwer import compute_measures
import numpy as np
from utils.word_process import get_wrong_idx


# gcloud auth application-default login
# gcloud auth application-default set-quota-project fluentify-412312

class DataGeneration:
def __init__(self):
vertexai.init(project="fluentify-412312", location="asia-northeast3")
self.multimodal_model = GenerativeModel("gemini-pro-vision")
self.lang_model = GenerativeModel("gemini-pro")
self.current_path = os.path.dirname(__file__)
self.gcs_path = "gs://fluentify-412312.appspot.com"
with open(os.path.join(self.current_path,'data/prompt.yaml'), 'r', encoding='UTF-8') as file:
self.prompt = yaml.load(file, Loader=yaml.FullLoader)

self.audio_path = "./data/audio"
self.tokenizer = AutoTokenizer.from_pretrained("facebook/wav2vec2-base-960h")
self.model = AutoModelForCTC.from_pretrained("facebook/wav2vec2-base-960h")
self.feature_extractor = AutoFeatureExtractor.from_pretrained("facebook/wav2vec2-base-960h")
self.wer = load("wer")

self.speech_rate_threshol_h = 2.5
self.speech_rate_threshol_l = 1.0
self.decibel_threshold_h = 95
self.decibel_threshold_l = 45

def GenSent(self, topic):
topic = " ".join(topic) if type(topic) == list else topic
# print('used topic:', topic)
# print(self.prompt['gen-sent'])
prompt = f"{self.prompt['gen-sent']}".format(topic=topic)
response =self.lang_model.generate_content(prompt)
response=response.text.replace("```json","")
response=response.replace("```","")

try :
return ast.literal_eval(response)
except:
return None

def ImgFilter(self, img):
image = Part.from_uri(f"{self.gcs_path}/img/{img}", mime_type="image/jpeg")
prompt = self.prompt['img-filter']
response = self.multimodal_model.generate_content([prompt, image])
output = False if "No" in response.text else True
# print(output)
return output

def GenQA(self, img):
image = Part.from_uri(f"{self.gcs_path}/img/{img}", mime_type="image/jpeg")
prompt = self.prompt['gen-qa']
response = self.multimodal_model.generate_content([prompt, image])
# print(image, response.text)
try :
return ast.literal_eval((response.text).strip())
except:
return None

def ConDataGen(self, iterate_num):
data_path = f"{self.current_path}/data/con-data.json"
img_pool_path = f"{self.current_path}/data/con-img-pool.json"
with open(img_pool_path) as f:
img_pool = json.load(f)
with open(data_path) as f:
con_data = json.load(f)

# Generate Questions and Answers for ConText Evaluation
for i in range(iterate_num):
img = random.sample(img_pool, 1)
img = img[0] if type(img) == list else img

img_pool.remove(img)
with open(img_pool_path,'w') as f:
json.dump(img_pool, f,indent=4)


filter = self.ImgFilter(img)
if filter:
qa = self.GenQA(img)
# if parsing is successful
if qa:
qa.update({"img": img})
print(qa)
con_data += [qa]
with open(data_path, "w") as f:
json.dump(con_data, f,indent=4)
else:
print(f"Image {img} is not suitable for the context evaluation.")
return con_data

def ProDataGen(self, iterate_num):
data_path = f"{self.current_path}/data/pro-data.json"
topic_path = f"{self.current_path}/data/pro-topic-pool.json"
with open(topic_path) as f:
topic_pool = json.load(f)
with open(data_path) as f:
pro_data = json.load(f)

# Generate Sentences and Tips for Pronunciation Evaluation
for i in range(iterate_num):
topic = np.random.choice(topic_pool, size=5, replace=False)
sent = self.GenSent(topic)
# if parsing is successful
if sent :
print(sent)
pro_data += sent
with open(data_path, "w") as f:
json.dump(pro_data, f,indent=4)
return pro_data

Binary file added ai/data/audio/example1.m4a
Binary file not shown.
Binary file added ai/data/audio/example2.m4a
Binary file not shown.
Binary file added ai/data/character/close-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added ai/data/character/open-0.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added ai/data/character/open-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added ai/data/character/temp.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
110 changes: 110 additions & 0 deletions ai/data/con-data.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
[
{
"context": "Let's imagine that you are a brave captain of a big ship. You are sailing on the high seas. Suddenly, you see a beautiful sunset. Look at this picture and tell me...",
"question": "What colors can you see in the sky?",
"answer": "I see red, orange, yellow, and blue.",
"img": "1070.jpg"
},
{
"context": "Look at this picture. This is a woman. She is wearing a red turtleneck blouse and black pants. She has curly blond hair and brown eyes. She is standing in front of a window.",
"question": "What is she wearing?",
"answer": "She is wearing a red turtleneck blouse and black pants.",
"img": "486.jpg"
},
{
"context": "Look at this picture. What do you see?",
"question": "Can you describe what is happening in the picture?",
"answer": "I see a robot walking through a forest. The sky is a beautiful color. The trees are tall and green.",
"img": "185.jpg"
},
{
"context": "Look at this picture. This is a girl. Her name is Jane. She is sitting on a rock. She is wearing a purple outfit and brown shoes. Her hair is long and brown. She looks very peaceful.",
"question": "What is Jane doing?",
"answer": "Jane is sitting on a rock.",
"img": "1629.jpg"
},
{
"context": "Look at this cute kitty! What do you see in the picture?",
"question": "What is the kitty doing?",
"answer": "The kitty is sitting on a log.",
"img": "201.jpg"
},
{
"context": "Look at this beautiful landscape. Imagine you are standing on that rock. What can you see?",
"question": "What is she standing on?",
"answer": "She is standing on a rock.",
"img": "1116.jpg"
},
{
"context": "Let's imagine that you are a famous car designer and you have been asked to design a car for a superhero. This superhero has the power to control metal with his mind. What would be the most important thing to keep in mind when designing this car?",
"question": "What would be the most important thing to keep in mind when designing this car?",
"answer": "The most important thing to keep in mind when designing this car would be to make sure that it is made of a metal that the superhero can control with his mind.",
"img": "218.jpg"
},
{
"context": "Look at this picture. It's raining outside and the girl is wearing a black jacket. She has long brown hair and it's tied in a ponytail. She is standing in the woods and looks very peaceful.",
"question": "What do you think she is thinking about?",
"answer": "She is thinking about her family.",
"img": "506.jpg"
},
{
"context": "Look at this picture. This is a girl. She has beautiful blue eyes and freckles on her face. She is looking at us with a curious expression. What do you think she is thinking?",
"question": "What is the girl thinking?",
"answer": "She is thinking about something interesting.",
"img": "1483.jpg"
},
{
"context": "Look at this beautiful tree! It has so many branches and leaves. It's like a whole world in one tree. Do you see the little house in the middle? That's where the tree people live. They are very friendly and love to play. Let's make up a story about them. What do you think they are doing right now?",
"question": "What do you see in the picture?",
"answer": "I see a tree with a house in the middle. There are also some mountains and a river.",
"img": "284.jpg"
},
{
"context": "Look at this picture. This is Tarzan. He is a fictional character created by Edgar Rice Burroughs. Tarzan was raised by apes in the African jungle. He is a very strong and agile man. He can swing from trees and fight off wild animals. Tarzan is a very interesting character. He is kind and gentle, but he is also very brave and strong.",
"question": "What is Tarzan wearing?",
"answer": "He is wearing a loincloth.",
"img": "1206.jpg"
},
{
"context": "Look at this cozy bedroom! It's so peaceful and quiet. The perfect place to relax and read a book.",
"question": "What do you think is the best thing about this bedroom?",
"answer": "The best thing about this bedroom is the view of the city outside the window.",
"img": "1616.jpg"
},
{
"context": "A boy is standing on a cliff. He is looking at a beautiful landscape. There are mountains, trees, and a lake. The sky is blue, and there are two moons in the sky. The boy is amazed by the beauty of the landscape.",
"question": "What is the boy doing?",
"answer": "The boy is standing on a cliff and looking at a beautiful landscape.",
"img": "926.jpg"
},
{
"context": "Look at this beautiful picture. What do you see?",
"question": "Can you describe what is happening in the picture?",
"answer": "The picture shows a mountain valley with a river running through it. The river is wide and shallow, and it flows over rocks and stones. The valley is green and lush, and there are trees and bushes on the banks of the river. There are mountains in the background, and the sun is shining.",
"img": "1316.jpg"
},
{
"context": "Look at this picture. This is a silhouette of a girl. She looks just like you! She has a head, two arms, and two legs. What do you think she is doing?",
"question": "What do you think she is looking at?",
"answer": "She is looking at a beautiful sunset.",
"img": "502.jpg"
},
{
"context": "Look at this beautiful picture. This is a portrait of a young woman. She has long brown hair and brown eyes. She is wearing a white dress and a gold earring.",
"question": "What is the woman wearing?",
"answer": "The woman is wearing a white dress and a gold earring.",
"img": "1047.jpg"
},
{
"context": "Hi there! I'm an education specialist, and I'm here to help you with your language skills. I'm going to show you a picture, and then I'm going to ask you a question about it. Are you ready?",
"question": "What do you see in the picture?",
"answer": "I see a motorcycle.",
"img": "205.jpg"
},
{
"context": "Look at this beautiful picture! What do you see?",
"question": "Can you describe what you see in the picture?",
"answer": "I see a lot of stars and clouds in the picture. The colors are very bright and pretty.",
"img": "1583.jpg"
}
]
Loading

0 comments on commit c4bc962

Please sign in to comment.