Marco commited on
Commit
3bc9036
·
1 Parent(s): 1e51b6c

initial commit

Browse files
.gitignore ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ignore python build artifacts
2
+ src/__pycache__/
3
+
4
+ # Ignore virtual environment
5
+ venv/
6
+
7
+ # Ignore vscode files
8
+ .vscode/
9
+
10
+ # Ignore streamlit secrets
11
+ .streamlit/secrets.toml
12
+
13
+ images/
14
+
15
+ test.py
16
+
LICENSE.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Marco Lee
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,13 +1,25 @@
1
- ---
2
- title: OCR
3
- emoji: 😻
4
- colorFrom: red
5
- colorTo: blue
6
- sdk: streamlit
7
- sdk_version: 1.37.1
8
- app_file: app.py
9
- pinned: false
10
- license: apache-2.0
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # streamlit_ocr
2
+
3
+ ## Streamlit OCR Application
4
+ This project is a Streamlit-based Optical Character Recognition (OCR) application that allows users to extract text from images using various OCR engines.
5
+
6
+ ## Features
7
+ * live camera feed with OCR support
8
+ * display of extracted text on screen
9
+ * can change confidence score required for displaying extracted text
10
+ * freeze button to perform chatbot inference on most recent text
11
+
12
+ ## Deployment
13
+ <a href="https://st-ocr.streamlit.app/">Streamlit OCR + chatbot app</a>
14
+
15
+ ### TODOS
16
+ - [ ] Less clunky freezing
17
+ - [ ] Fix bugs
18
+
19
+ ### Completed tasks ✓
20
+ - [x] Implement OCR capability
21
+ - [x] Implement freeze function
22
+ - [x] Implement chatbot API call
23
+ - [x] Integrate code with streamlit
24
+ - [x] Faster OCR
25
+ - [x] Multilingual support
main.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit_webrtc import webrtc_streamer, WebRtcMode
3
+ from src.system_initializer import SystemInitializer
4
+ from src.video_processor import VideoProcessor
5
+ from src.utilities import Utilities
6
+
7
+ import warnings
8
+ warnings.filterwarnings("ignore") # Ignore warnings for a cleaner output
9
+
10
+ class OCRChatbotApp:
11
+ def __init__(self):
12
+ # Initialize the system components
13
+ self.system_initializer = SystemInitializer()
14
+ self.utilities = Utilities()
15
+ self.queues = self.system_initializer.initialize_system() # Initialize queues and start OCR thread
16
+ self.conf_thresh = 50 # Default confidence threshold for OCR
17
+ self.n = 5 # Process every n frames
18
+ self.k = 30 # Number of frames to keep annotations
19
+
20
+ def run(self):
21
+ st.title('OCR and Chatbot Application') # Set the title of the Streamlit app
22
+
23
+ # Initialize session state variables if they don't exist
24
+ if "camera_frozen" not in st.session_state:
25
+ st.session_state.update({"camera_frozen": False, "latest": [], "likely_text": ""})
26
+
27
+ # Create sliders for adjusting confidence threshold and frame processing interval
28
+ self.conf_thresh = st.slider('Confidence Threshold', 0, 100, 50)
29
+ self.n = st.slider('Process every n frames', 1, 30, 5)
30
+
31
+ # Button to freeze or resume the camera
32
+ if st.button("Freeze" if not st.session_state.camera_frozen else "Resume"):
33
+ st.session_state.camera_frozen = not st.session_state.camera_frozen
34
+ if st.session_state.camera_frozen:
35
+ st.session_state.likely_text = self.utilities.fetch_likely_text() # Fetch likely text when camera is frozen
36
+
37
+ # Define constraints for higher resolution video capture
38
+ constraints = {
39
+ "video": {
40
+ "width": {"ideal": 1280},
41
+ "height": {"ideal": 720},
42
+ "frameRate": {"ideal": 30}
43
+ },
44
+ "audio": False
45
+ }
46
+
47
+ # Initialize the WebRTC streamer with the specified constraints and video processor
48
+ webrtc_ctx = webrtc_streamer(
49
+ key="example",
50
+ video_processor_factory=lambda: VideoProcessor(self.queues, self.conf_thresh, self.n, self.k),
51
+ media_stream_constraints=constraints,
52
+ async_processing=True,
53
+ )
54
+
55
+ # Display the likely text if the camera is frozen
56
+ if st.session_state.camera_frozen and st.session_state.likely_text:
57
+ st.write(st.session_state.likely_text)
58
+ else:
59
+ st.write("No text found")
60
+
61
+ # Run the chatbot
62
+ self.system_initializer.run_chatbot()
63
+
64
+ if __name__ == '__main__':
65
+ app = OCRChatbotApp() # Create an instance of the OCRChatbotApp
66
+ app.run() # Run the app
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ libgl1-mesa-glx
requirements.txt ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aioice==0.9.0
2
+ aiortc==1.9.0
3
+ altair==5.3.0
4
+ attrs==23.2.0
5
+ av==12.3.0
6
+ blinker==1.8.2
7
+ cachetools==5.4.0
8
+ certifi==2024.7.4
9
+ cffi==1.16.0
10
+ charset-normalizer==3.3.2
11
+ click==8.1.7
12
+ cryptography==43.0.0
13
+ deskew==1.5.1
14
+ dnspython==2.6.1
15
+ easyocr==1.7.1
16
+ filelock==3.15.4
17
+ fsspec==2024.6.1
18
+ gitdb==4.0.11
19
+ GitPython==3.1.43
20
+ google-crc32c==1.5.0
21
+ idna==3.7
22
+ ifaddr==0.2.0
23
+ imageio==2.34.2
24
+ inexactsearch==1.0.2
25
+ Jinja2==3.1.4
26
+ jsonschema==4.23.0
27
+ jsonschema-specifications==2023.12.1
28
+ lazy_loader==0.4
29
+ markdown-it-py==3.0.0
30
+ MarkupSafe==2.1.5
31
+ mdurl==0.1.2
32
+ mpmath==1.3.0
33
+ networkx==3.3
34
+ ninja==1.11.1.1
35
+ numpy==2.0.1
36
+ opencv-python==4.10.0.84
37
+ opencv-python-headless==4.10.0.84
38
+ packaging==24.1
39
+ pandas==2.2.2
40
+ pillow==10.4.0
41
+ protobuf==5.27.2
42
+ pyarrow==17.0.0
43
+ pyclipper==1.3.0.post5
44
+ pycparser==2.22
45
+ pydeck==0.9.1
46
+ pyee==11.1.0
47
+ Pygments==2.18.0
48
+ pylibsrtp==0.10.0
49
+ pyOpenSSL==24.2.1
50
+ python-bidi==0.5.1
51
+ python-dateutil==2.9.0.post0
52
+ pytz==2024.1
53
+ PyYAML==6.0.1
54
+ referencing==0.35.1
55
+ requests==2.32.3
56
+ rich==13.7.1
57
+ rpds-py==0.19.0
58
+ scikit-image==0.24.0
59
+ scipy==1.14.0
60
+ setuptools==71.1.0
61
+ shapely==2.0.5
62
+ silpa_common==0.3
63
+ six==1.16.0
64
+ smmap==5.0.1
65
+ soundex==1.1.3
66
+ spellchecker==0.4
67
+ streamlit==1.36.0
68
+ streamlit-webrtc==0.47.7
69
+ sympy==1.13.1
70
+ tenacity==8.5.0
71
+ tifffile==2024.7.21
72
+ toml==0.10.2
73
+ toolz==0.12.1
74
+ torch==2.3.1
75
+ torchvision==0.18.1
76
+ tornado==6.4.1
77
+ typing_extensions==4.12.2
78
+ tzdata==2024.1
79
+ urllib3==2.2.2
src/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # __init__.py
src/chatbot.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ import sys
4
+ import os
5
+
6
+ # Add the directory containing this script to the Python path
7
+ sys.path.append(os.path.dirname(os.path.abspath(__file__)))
8
+
9
+ def chatbot_say(message):
10
+ with st.chat_message("assistant", avatar="🔮"):
11
+ st.markdown(message)
12
+
13
+ # Add chatbot response to chat history
14
+ st.session_state.chat_history.append(("assistant", message))
15
+
16
+ def run_chatbot():
17
+ from perplexity_api import chat_completion
18
+
19
+ # Initialise chat history in session state
20
+ if 'chat_history' not in st.session_state:
21
+ st.session_state.chat_history = []
22
+
23
+ # Display chat history
24
+ for role, content in st.session_state.chat_history:
25
+ with st.chat_message(role):
26
+ st.markdown(content)
27
+
28
+ # If user has entered a message, add it to chat history and get chatbot response
29
+ if prompt := st.chat_input("Say something: "):
30
+ # Display user message
31
+ with st.chat_message("user"):
32
+ st.markdown(prompt)
33
+
34
+ # Add user message to chat history
35
+ st.session_state.chat_history.append(("user", prompt))
36
+
37
+ info = "none"
38
+ if st.session_state.latest:
39
+ info = st.session_state.latest
40
+
41
+ response = chat_completion(prompt, info, mode="normal")
42
+
43
+ # Display chatbot response
44
+ with st.chat_message("assistant", avatar="🔮"):
45
+ st.markdown(response)
46
+
47
+ # Add chatbot response to chat history
48
+ st.session_state.chat_history.append(("assistant", response))
49
+
50
+
src/ocr.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Perform OCR in a separate thread
2
+ def ocr_thread(frame_queue, text_queue):
3
+ from easyocr import Reader
4
+ reader = Reader(lang_list=['en'])
5
+
6
+ while True:
7
+ frame = frame_queue.get()
8
+
9
+ # If queue is empty, exit the loop
10
+ if frame is None:
11
+ break
12
+
13
+ texts = reader.readtext(frame)
14
+ text_queue.put(texts)
15
+ print(texts)
16
+
17
+ if __name__ == "__main__":
18
+ import sys
19
+ print(sys.path)
src/perplexity_api.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import streamlit as st
3
+
4
+ # API_KEY = st.secrets["API_KEY"]
5
+ API_KEY = "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"
6
+
7
+ url = "https://api.perplexity.ai/chat/completions"
8
+
9
+ ppx_prompt = '''Given a Python list named latest_ocr_values, such as ['I am a Python', 'I a pthon',
10
+ 'I python', 'I am a python', 'a'], directly provide the text that is most consistently detected by the OCR.
11
+ If multiple answers are possible, choose the most likely one only. If there is no clear answer, state 'None'.
12
+ The output must follow the format: 'OCR scanned text: (your_answer)'. Do not explain yourself afterwards, do not include
13
+ multiple valid outputs. Do not include any other information.'''
14
+
15
+ normal_prompt = '''Be a good assistant and answer my question, using information from the following prompt or relating to it, as well as
16
+ knowledge you have about this prompt. If no information is given or if the question is not relevant to the information given,
17
+ simply answer as normal, using any knowledge you have.'''
18
+
19
+ headers = {
20
+ "Authorization": "Bearer " + API_KEY,
21
+ "accept": "application/json",
22
+ "content-type": "application/json"
23
+ }
24
+
25
+ payload = {
26
+ "model": "mistral-7b-instruct",
27
+ "messages": [
28
+ {
29
+ "role": "system",
30
+ "content": ""
31
+ },
32
+ {
33
+ "role": "user",
34
+ "content": ""
35
+ }
36
+ ]
37
+ }
38
+
39
+ # Perform a chat completion in a separate thread
40
+ def chat_completion(prompt, info="", mode="normal"):
41
+ while True:
42
+ # If queue is empty, exit the loop
43
+ if not prompt:
44
+ break
45
+
46
+ # Copy payload and insert prompt
47
+ pl = payload.copy()
48
+ if mode == "ocr":
49
+ pl["messages"][0]["content"] = ppx_prompt
50
+ elif mode == "normal":
51
+ pl["messages"][0]["content"] = normal_prompt
52
+
53
+ pl["messages"][1]["content"] = f"Prompt: {prompt}. Information: {info}."
54
+
55
+ # Perform chat completion and add results to queue
56
+ response = requests.post(url, json=pl, headers=headers)
57
+
58
+ if response.status_code == 200:
59
+ response_data = response.json()
60
+ output = response_data.get("choices")[0].get("message").get("content")
61
+ return output
62
+ else:
63
+ return "Error"
64
+
65
+
66
+ if __name__ == "__main__":
67
+ print(chat_completion("latest_ocr_values = ['I am a cat', 'I a cat', 'I cat', 'I am cat', 'a']"))
src/st_context.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import threading
2
+ from typing import Any, TypeVar, cast
3
+ from streamlit.errors import NoSessionContext
4
+ from streamlit.runtime.scriptrunner.script_run_context import SCRIPT_RUN_CONTEXT_ATTR_NAME, get_script_run_ctx
5
+
6
+ T = TypeVar("T")
7
+
8
+ def with_streamlit_context(fn: T) -> T:
9
+ """Fix bug in streamlit which raises streamlit.errors.NoSessionContext."""
10
+ ctx = get_script_run_ctx()
11
+ if ctx is None:
12
+ raise NoSessionContext(
13
+ "with_streamlit_context must be called inside a context; "
14
+ "construct your function on the fly, not earlier."
15
+ )
16
+
17
+ def _cb(*args: Any, **kwargs: Any) -> Any:
18
+ """Do it."""
19
+ thread = threading.current_thread()
20
+ do_nothing = hasattr(thread, SCRIPT_RUN_CONTEXT_ATTR_NAME) and (
21
+ getattr(thread, SCRIPT_RUN_CONTEXT_ATTR_NAME) == ctx
22
+ )
23
+ if not do_nothing:
24
+ setattr(thread, SCRIPT_RUN_CONTEXT_ATTR_NAME, ctx)
25
+ # Call the callback.
26
+ ret = fn(*args, **kwargs)
27
+ if not do_nothing:
28
+ # Why delattr? Because tasks for different users may be done by
29
+ # the same thread at different times. Danger danger.
30
+ delattr(thread, SCRIPT_RUN_CONTEXT_ATTR_NAME)
31
+ return ret
32
+
33
+ return cast(T, _cb)
src/system_initializer.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import queue
2
+ import threading
3
+ from streamlit.runtime.scriptrunner import add_script_run_ctx
4
+ from src.ocr import ocr_thread
5
+ from src.chatbot import run_chatbot
6
+
7
+ class SystemInitializer:
8
+ def initialize_system(self):
9
+ """Initializes queues and starts OCR thread."""
10
+ queues = {
11
+ 'frame_queue': queue.Queue(maxsize=1),
12
+ 'text_queue': queue.Queue(maxsize=1),
13
+ 'annotation_queue': queue.Queue(maxsize=1),
14
+ 'prompt_queue': queue.Queue(maxsize=1),
15
+ 'ppx_queue': queue.Queue(maxsize=1)
16
+ }
17
+ ocr_thread_with_ctx = threading.Thread(target=ocr_thread, args=(queues['frame_queue'], queues['text_queue']))
18
+ add_script_run_ctx(ocr_thread_with_ctx)
19
+ ocr_thread_with_ctx.start()
20
+ return queues
21
+
22
+ def run_chatbot(self):
23
+ run_chatbot()
src/utilities.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ from deskew import determine_skew
4
+ from spellchecker import SpellChecker
5
+ import streamlit as st
6
+
7
+ from src.perplexity_api import chat_completion
8
+ from src.st_context import with_streamlit_context
9
+
10
+ class Utilities:
11
+ def __init__(self):
12
+ # Initialize the spell checker
13
+ self.spell = SpellChecker()
14
+
15
+ def fetch_likely_text(self):
16
+ """Fetches likely text based on latest OCR values."""
17
+ # Use the chat_completion function to fetch the latest OCR values from the session state
18
+ return chat_completion(f"latest_ocr_values = {st.session_state['latest']}")
19
+
20
+ @with_streamlit_context
21
+ def detect_annotations(self, frame, text_queue, conf_thresh):
22
+ """Detects annotations for a single video frame."""
23
+ # If the text queue is empty, return an empty list
24
+ if text_queue.empty():
25
+ return []
26
+
27
+ # Get detections from the text queue
28
+ detections = text_queue.get()
29
+ annotations = []
30
+ for (box, text, confidence) in detections:
31
+ # Only consider detections with confidence above the threshold
32
+ if confidence > conf_thresh / 100.0:
33
+ # Correct the spelling of the detected text
34
+ corrected_text = self.correct_spelling(text)
35
+ # Append the bounding box and corrected text to annotations
36
+ annotations.append((box, corrected_text))
37
+ return annotations
38
+
39
+ @with_streamlit_context
40
+ def draw_annotations(self, frame, annotations):
41
+ """Draws annotations on the frame."""
42
+ for (box, text) in annotations:
43
+ try:
44
+ # Calculate the size of the text box
45
+ font = cv2.FONT_HERSHEY_SIMPLEX
46
+ font_scale = 1
47
+ thickness = 2
48
+ text_size, baseline = cv2.getTextSize(text, font, font_scale, thickness)
49
+ text_width, text_height = text_size
50
+
51
+ # Calculate the position for the rectangle and text
52
+ p1 = (int(box[0][0]), int(box[0][1]))
53
+ p2 = (p1[0] + text_width, p1[1] - text_height - baseline)
54
+
55
+ # Draw a filled rectangle with transparency
56
+ overlay = frame.copy()
57
+ cv2.rectangle(overlay, p1, p2, (0, 255, 0), -1)
58
+ alpha = 0.4 # Transparency factor
59
+ cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0, frame)
60
+
61
+ # Put the text on top of the rectangle
62
+ cv2.putText(frame, text, (p1[0], p1[1] - baseline), font, font_scale, (0, 0, 0), thickness)
63
+ except Exception as e:
64
+ # Log an error message if annotation fails
65
+ st.error(f"Failed to annotate frame: {e}")
66
+ return frame
67
+
68
+ def _grayscale(self, image):
69
+ """Converts the image to grayscale."""
70
+ if len(image.shape) == 3:
71
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
72
+ else:
73
+ gray = image
74
+ return gray
75
+
76
+ def _remove_noise(self, image):
77
+ """Removes noise from the image using Non-Local Means Denoising."""
78
+ return cv2.fastNlMeansDenoising(image, None, 10, 7, 21)
79
+
80
+ def _enhance_contrast(self, image):
81
+ """Enhances the contrast of the image using CLAHE."""
82
+ clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
83
+ enhanced = clahe.apply(image)
84
+ return enhanced
85
+
86
+ def _deskew(self, image):
87
+ """Deskews the image assuming the text is horizontal."""
88
+ angle = determine_skew(image)
89
+ (h, w) = image.shape[:2]
90
+ center = (w // 2, h // 2)
91
+
92
+ # Get the rotation matrix
93
+ M = cv2.getRotationMatrix2D(center, angle, 1.0)
94
+
95
+ # Perform the actual rotation and return the image
96
+ deskewed = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
97
+ return deskewed
98
+
99
+ def _binarize(self, image):
100
+ """Converts the image to a binary image using Otsu's binarization."""
101
+ _, binary = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
102
+ return binary
103
+
104
+ def preprocess_image(self, image):
105
+ """Preprocesses the image by enhancing contrast, removing noise, and deskewing."""
106
+ gray = self._grayscale(image)
107
+ denoised = self._remove_noise(gray)
108
+ enhanced = self._enhance_contrast(denoised)
109
+ deskewed = self._deskew(enhanced)
110
+ binary = self._binarize(deskewed)
111
+ return binary
112
+
113
+ def correct_spelling(self, text):
114
+ """Corrects the spelling of the given text."""
115
+ corrected_text = []
116
+ for word in text.split():
117
+ corrected_word = self.spell.correction(word)
118
+ if corrected_word:
119
+ corrected_text.append(corrected_word)
120
+
121
+ return ' '.join(corrected_text)
122
+
123
+ def overlay_annotations(self, frame, annotated_frame):
124
+ """Overlay annotations from the annotated frame onto the current frame."""
125
+ alpha = 0.4 # Transparency factor
126
+ cv2.addWeighted(annotated_frame, alpha, frame, 1 - alpha, 0, frame)
127
+ return frame
src/video_processor.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ from collections import deque
3
+ from streamlit_webrtc import VideoProcessorBase
4
+ from src.utilities import Utilities
5
+ from src.st_context import with_streamlit_context
6
+
7
+ class VideoProcessor(VideoProcessorBase):
8
+ def __init__(self, queues, conf_thresh, n, k):
9
+ self.queues = queues
10
+ self.conf_thresh = conf_thresh
11
+ self.n = n
12
+ self.k = k
13
+ self.frame_counter = 0
14
+ self.utilities = Utilities()
15
+ self.annotation_counter = 0
16
+
17
+ @with_streamlit_context
18
+ def transform(self, frame):
19
+ img = frame.to_ndarray(format="bgr24")
20
+ self.frame_counter += 1
21
+
22
+ if self.frame_counter % self.n == 0:
23
+ # Preprocess the frame
24
+ preprocessed_frame = self.utilities.preprocess_image(img)
25
+
26
+ # Add frame to queue if it is empty
27
+ if self.queues['frame_queue'].empty():
28
+ self.queues['frame_queue'].put(preprocessed_frame)
29
+
30
+ annotations = self.utilities.detect_annotations(preprocessed_frame, self.queues['text_queue'], self.conf_thresh)
31
+ if annotations:
32
+ self.queues['annotation_queue'].put((annotations, self.k)) # Store annotations with counter
33
+ self.annotation_counter = self.k # Reset the counter
34
+
35
+ # Draw annotations from the queue
36
+ if not self.queues['annotation_queue'].empty() and self.annotation_counter > 0:
37
+ annotations, _ = self.queues['annotation_queue'].get()
38
+ img = self.utilities.draw_annotations(img, annotations)
39
+ self.annotation_counter -= 1
40
+
41
+ return img