SeeNear sentiment analysis

Our model was developed using Pytorch and KOBERT.

Test sentiment analysis

This model shows the maximum two emotions(in 6 emotions) detected and the percentage of the responses to one conversation. Number of emotions are six(happy, embarrassed, anger, anxiety, heartbroken, sad).

If you want to use it, you can use the model as follows.

from flask import Flask, request

app = Flask(__name__)

import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import gluonnlp as nlp
import numpy as np
import pandas as pd

#KoBERT
from kobert.utils import get_tokenizer
from kobert.pytorch_kobert import get_pytorch_kobert_model
#transformer
from transformers import AdamW
from transformers.optimization import get_cosine_schedule_with_warmup
from sentiment_repository.kobertm import BERTDataset
from sentiment_repository.kobertm import BERTClassifier

## Setting parameters
max_len = 64
batch_size = 64
warmup_ratio = 0.1
num_epochs = 5
max_grad_norm = 1
log_interval = 200
learning_rate =  5e-5

#bertmodel의 vocabulary
device = torch.device("cpu")
    
bertmodel, vocab = get_pytorch_kobert_model()

model = BERTClassifier(bertmodel).to(device)
model.load_state_dict(torch.load('model.pt', map_location='cpu'))

@app.route("/predict/<arg>")
def predict(arg):
    predict_sentence = arg
    
    #토큰화
    tokenizer = get_tokenizer()
    tok = nlp.data.BERTSPTokenizer(tokenizer, vocab, lower=False)

    def new_softmax(a) : 
        c = np.max(a) # 최댓값
        exp_a = np.exp(a-c) # 각각의 원소에 최댓값을 뺀 값에 exp를 취한다. (이를 통해 overflow 방지)
        sum_exp_a = np.sum(exp_a)
        y = (exp_a / sum_exp_a) * 100
        return np.round(y, 3)


    def ping(arg):
        data = [predict_sentence, '0']
        dataset_another = [data]

        another_test = BERTDataset(dataset_another, 0, 1, tok, max_len, True, False)
        test_dataloader = torch.utils.data.DataLoader(another_test, batch_size=batch_size, num_workers=0)
    
        model.eval()
        for batch_id, (token_ids, valid_length, segment_ids, label) in enumerate(test_dataloader):
            token_ids = token_ids.long().to(device)
            segment_ids = segment_ids.long().to(device)

            valid_length= valid_length
            label = label.long().to(device)
        
            out = model(token_ids, valid_length, segment_ids)
        

            test_eval=[]
            for i in out:
                logits=i
                logits = logits.detach().cpu().numpy()
                min_v = min(logits)
                total = 0
                probability = []
                result_emotion = []
                percent = []
                logits = np.round(new_softmax(logits), 3).tolist()
                for logit in logits:
                    print(logit)
                    probability.append(np.round(logit, 3))


            if np.argmax(logits) == 0:  emotion = "0"
            elif np.argmax(logits) == 1: emotion = "1"
            elif np.argmax(logits) == 2: emotion = '2'
            elif np.argmax(logits) == 3: emotion = '3'
            elif np.argmax(logits) == 4: emotion = '4'
            elif np.argmax(logits) == 5: emotion = '5'

            result_emotion.append(emotion)
            percent.append(probability[np.argmax(logits)])
            logits[np.argmax(logits)] = 0

            if np.argmax(logits) == 0:  emotion = "0"
            elif np.argmax(logits) == 1: emotion = "1"
            elif np.argmax(logits) == 2: emotion = '2'
            elif np.argmax(logits) == 3: emotion = '3'
            elif np.argmax(logits) == 4: emotion = '4'
            elif np.argmax(logits) == 5: emotion = '5'
     
            result_emotion.append(emotion)
            percent.append(probability[np.argmax(logits)])
            print(result_emotion, percent)

        return result_emotion, percent
    
    result_emotion, percent = ping(predict_sentence)

    if percent[0] <= 60.0:
        return 'null'    
    
    
    #emotion_list = '0':'happy','1':'embarrassed','2':'anger','3':'anxiety','4':'heartbroken','5':'sad'
    else:
        return dict({'result_emotion': result_emotion, 'percent': percent})

Reference

SKT-Brain KOBERT model https://github.com/SKTBrain/KoBERT
Dataset
Korean emotional conversation corpus
Continuous Conversation Dataset with Korean Sentiment Information

Name		Name	Last commit message	Last commit date
Latest commit History 7 Commits
README.md		README.md
flask-kobert.py		flask-kobert.py
kobertm.py		kobertm.py
requirement.txt		requirement.txt

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Repository files navigation

SeeNear sentiment analysis

Test sentiment analysis

Reference

About

Releases

Packages

Languages

GDSC-seeNear/sentiment_repository

Folders and files

Latest commit

History

Repository files navigation

SeeNear sentiment analysis

Test sentiment analysis

Reference

About

Resources

Stars

Watchers

Forks

Releases

Packages 0

Languages

Packages