-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
102 lines (80 loc) · 2.98 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import cvzone
import cv2
from cvzone.HandTrackingModule import HandDetector
import numpy as np
import google.generativeai as genai
from dotenv import load_dotenv
from PIL import Image
import streamlit as st
import os
st.set_page_config(layout="wide")
st.image("images.jpeg")
col1, col2 = st.columns([1,1])
with col1:
run = st.checkbox("Run", value=True)
FRAME_WINDOW = st.image([])
with col2:
output_text_area = st.title("Answer")
output_text_area = st.subheader("")
load_dotenv()
genai.configure(api_key=os.environ["API_KEY"])
model = genai.GenerativeModel('gemini-1.5-flash')
# Initialize the webcam to capture video
cap = cv2.VideoCapture(0)
cap.set(3,1200)
cap.set(4,720)
# Initialize the HandDetector class with the given parameters
detector = HandDetector(staticMode=False, maxHands=1, modelComplexity=1, detectionCon=0.7, minTrackCon=0.5)
def getHandInfo(img):
# Find hands in the current frame
# The 'draw' parameter draws landmarks and hand outlines on the image if set to True
# The 'flipType' parameter flips the image, making it easier for some detections
hands, img = detector.findHands(img, draw=True, flipType=True)
# Check if any hands are detected
if hands:
# Information for the first hand detected
hand = hands[0] # Get the first hand detected
lmList = hand["lmList"] # List of 21 landmarks for the first hand
# Count the number of fingers up for the first hand
fingers = detector.fingersUp(hand)
print(fingers)
return fingers, lmList
else:
return None
def draw(info, previous_pos,canvas):
fingers, lmList = info
current_pos = None
if fingers == [0,1,0,0,0]:
current_pos = lmList[8][0:2]
if previous_pos is None: previous_pos = current_pos
cv2.line(canvas, current_pos, previous_pos,(255,0,255), 10)
elif fingers == [1,0,0,0,0]:
canvas = np.zeros_like(img)
return current_pos, canvas
def sendToAI(model,canvas,fingers):
if fingers == [1,1,1,1,0]:
pil_image = Image.fromarray(canvas)
response = model.generate_content(["Solve this Math Problem", pil_image])
return response.text
previous_pos = None
canvas = None
image_combined = None
output_text = ""
# Continuously get frames from the webcam
while True:
# Capture each frame from the webcam
# 'success' will be True if the frame is successfully captured, 'img' will contain the frame
success, img = cap.read()
img = cv2.flip(img, 1)
if canvas is None:
canvas = np.zeros_like(img)
info = getHandInfo(img)
if info:
fingers, lmList = info
previous_pos, canvas= draw(info, previous_pos, canvas)
output_text = sendToAI(model, canvas, fingers)
image_combined = cv2.addWeighted(img,0.7,canvas,0.3,0)
FRAME_WINDOW.image(image_combined, channels="BGR")
output_text_area.text(output_text)
# Keep the window open and update it for each frame; wait for 1 millisecond between frames
cv2.waitKey(1)