Marco
commited on
Commit
·
3bc9036
1
Parent(s):
1e51b6c
initial commit
Browse files- .gitignore +16 -0
- LICENSE.txt +21 -0
- README.md +25 -13
- main.py +66 -0
- packages.txt +1 -0
- requirements.txt +79 -0
- src/__init__.py +1 -0
- src/chatbot.py +50 -0
- src/ocr.py +19 -0
- src/perplexity_api.py +67 -0
- src/st_context.py +33 -0
- src/system_initializer.py +23 -0
- src/utilities.py +127 -0
- src/video_processor.py +41 -0
.gitignore
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Ignore python build artifacts
|
2 |
+
src/__pycache__/
|
3 |
+
|
4 |
+
# Ignore virtual environment
|
5 |
+
venv/
|
6 |
+
|
7 |
+
# Ignore vscode files
|
8 |
+
.vscode/
|
9 |
+
|
10 |
+
# Ignore streamlit secrets
|
11 |
+
.streamlit/secrets.toml
|
12 |
+
|
13 |
+
images/
|
14 |
+
|
15 |
+
test.py
|
16 |
+
|
LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2024 Marco Lee
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
README.md
CHANGED
@@ -1,13 +1,25 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# streamlit_ocr
|
2 |
+
|
3 |
+
## Streamlit OCR Application
|
4 |
+
This project is a Streamlit-based Optical Character Recognition (OCR) application that allows users to extract text from images using various OCR engines.
|
5 |
+
|
6 |
+
## Features
|
7 |
+
* live camera feed with OCR support
|
8 |
+
* display of extracted text on screen
|
9 |
+
* can change confidence score required for displaying extracted text
|
10 |
+
* freeze button to perform chatbot inference on most recent text
|
11 |
+
|
12 |
+
## Deployment
|
13 |
+
<a href="https://st-ocr.streamlit.app/">Streamlit OCR + chatbot app</a>
|
14 |
+
|
15 |
+
### TODOS
|
16 |
+
- [ ] Less clunky freezing
|
17 |
+
- [ ] Fix bugs
|
18 |
+
|
19 |
+
### Completed tasks ✓
|
20 |
+
- [x] Implement OCR capability
|
21 |
+
- [x] Implement freeze function
|
22 |
+
- [x] Implement chatbot API call
|
23 |
+
- [x] Integrate code with streamlit
|
24 |
+
- [x] Faster OCR
|
25 |
+
- [x] Multilingual support
|
main.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from streamlit_webrtc import webrtc_streamer, WebRtcMode
|
3 |
+
from src.system_initializer import SystemInitializer
|
4 |
+
from src.video_processor import VideoProcessor
|
5 |
+
from src.utilities import Utilities
|
6 |
+
|
7 |
+
import warnings
|
8 |
+
warnings.filterwarnings("ignore") # Ignore warnings for a cleaner output
|
9 |
+
|
10 |
+
class OCRChatbotApp:
|
11 |
+
def __init__(self):
|
12 |
+
# Initialize the system components
|
13 |
+
self.system_initializer = SystemInitializer()
|
14 |
+
self.utilities = Utilities()
|
15 |
+
self.queues = self.system_initializer.initialize_system() # Initialize queues and start OCR thread
|
16 |
+
self.conf_thresh = 50 # Default confidence threshold for OCR
|
17 |
+
self.n = 5 # Process every n frames
|
18 |
+
self.k = 30 # Number of frames to keep annotations
|
19 |
+
|
20 |
+
def run(self):
|
21 |
+
st.title('OCR and Chatbot Application') # Set the title of the Streamlit app
|
22 |
+
|
23 |
+
# Initialize session state variables if they don't exist
|
24 |
+
if "camera_frozen" not in st.session_state:
|
25 |
+
st.session_state.update({"camera_frozen": False, "latest": [], "likely_text": ""})
|
26 |
+
|
27 |
+
# Create sliders for adjusting confidence threshold and frame processing interval
|
28 |
+
self.conf_thresh = st.slider('Confidence Threshold', 0, 100, 50)
|
29 |
+
self.n = st.slider('Process every n frames', 1, 30, 5)
|
30 |
+
|
31 |
+
# Button to freeze or resume the camera
|
32 |
+
if st.button("Freeze" if not st.session_state.camera_frozen else "Resume"):
|
33 |
+
st.session_state.camera_frozen = not st.session_state.camera_frozen
|
34 |
+
if st.session_state.camera_frozen:
|
35 |
+
st.session_state.likely_text = self.utilities.fetch_likely_text() # Fetch likely text when camera is frozen
|
36 |
+
|
37 |
+
# Define constraints for higher resolution video capture
|
38 |
+
constraints = {
|
39 |
+
"video": {
|
40 |
+
"width": {"ideal": 1280},
|
41 |
+
"height": {"ideal": 720},
|
42 |
+
"frameRate": {"ideal": 30}
|
43 |
+
},
|
44 |
+
"audio": False
|
45 |
+
}
|
46 |
+
|
47 |
+
# Initialize the WebRTC streamer with the specified constraints and video processor
|
48 |
+
webrtc_ctx = webrtc_streamer(
|
49 |
+
key="example",
|
50 |
+
video_processor_factory=lambda: VideoProcessor(self.queues, self.conf_thresh, self.n, self.k),
|
51 |
+
media_stream_constraints=constraints,
|
52 |
+
async_processing=True,
|
53 |
+
)
|
54 |
+
|
55 |
+
# Display the likely text if the camera is frozen
|
56 |
+
if st.session_state.camera_frozen and st.session_state.likely_text:
|
57 |
+
st.write(st.session_state.likely_text)
|
58 |
+
else:
|
59 |
+
st.write("No text found")
|
60 |
+
|
61 |
+
# Run the chatbot
|
62 |
+
self.system_initializer.run_chatbot()
|
63 |
+
|
64 |
+
if __name__ == '__main__':
|
65 |
+
app = OCRChatbotApp() # Create an instance of the OCRChatbotApp
|
66 |
+
app.run() # Run the app
|
packages.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
libgl1-mesa-glx
|
requirements.txt
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aioice==0.9.0
|
2 |
+
aiortc==1.9.0
|
3 |
+
altair==5.3.0
|
4 |
+
attrs==23.2.0
|
5 |
+
av==12.3.0
|
6 |
+
blinker==1.8.2
|
7 |
+
cachetools==5.4.0
|
8 |
+
certifi==2024.7.4
|
9 |
+
cffi==1.16.0
|
10 |
+
charset-normalizer==3.3.2
|
11 |
+
click==8.1.7
|
12 |
+
cryptography==43.0.0
|
13 |
+
deskew==1.5.1
|
14 |
+
dnspython==2.6.1
|
15 |
+
easyocr==1.7.1
|
16 |
+
filelock==3.15.4
|
17 |
+
fsspec==2024.6.1
|
18 |
+
gitdb==4.0.11
|
19 |
+
GitPython==3.1.43
|
20 |
+
google-crc32c==1.5.0
|
21 |
+
idna==3.7
|
22 |
+
ifaddr==0.2.0
|
23 |
+
imageio==2.34.2
|
24 |
+
inexactsearch==1.0.2
|
25 |
+
Jinja2==3.1.4
|
26 |
+
jsonschema==4.23.0
|
27 |
+
jsonschema-specifications==2023.12.1
|
28 |
+
lazy_loader==0.4
|
29 |
+
markdown-it-py==3.0.0
|
30 |
+
MarkupSafe==2.1.5
|
31 |
+
mdurl==0.1.2
|
32 |
+
mpmath==1.3.0
|
33 |
+
networkx==3.3
|
34 |
+
ninja==1.11.1.1
|
35 |
+
numpy==2.0.1
|
36 |
+
opencv-python==4.10.0.84
|
37 |
+
opencv-python-headless==4.10.0.84
|
38 |
+
packaging==24.1
|
39 |
+
pandas==2.2.2
|
40 |
+
pillow==10.4.0
|
41 |
+
protobuf==5.27.2
|
42 |
+
pyarrow==17.0.0
|
43 |
+
pyclipper==1.3.0.post5
|
44 |
+
pycparser==2.22
|
45 |
+
pydeck==0.9.1
|
46 |
+
pyee==11.1.0
|
47 |
+
Pygments==2.18.0
|
48 |
+
pylibsrtp==0.10.0
|
49 |
+
pyOpenSSL==24.2.1
|
50 |
+
python-bidi==0.5.1
|
51 |
+
python-dateutil==2.9.0.post0
|
52 |
+
pytz==2024.1
|
53 |
+
PyYAML==6.0.1
|
54 |
+
referencing==0.35.1
|
55 |
+
requests==2.32.3
|
56 |
+
rich==13.7.1
|
57 |
+
rpds-py==0.19.0
|
58 |
+
scikit-image==0.24.0
|
59 |
+
scipy==1.14.0
|
60 |
+
setuptools==71.1.0
|
61 |
+
shapely==2.0.5
|
62 |
+
silpa_common==0.3
|
63 |
+
six==1.16.0
|
64 |
+
smmap==5.0.1
|
65 |
+
soundex==1.1.3
|
66 |
+
spellchecker==0.4
|
67 |
+
streamlit==1.36.0
|
68 |
+
streamlit-webrtc==0.47.7
|
69 |
+
sympy==1.13.1
|
70 |
+
tenacity==8.5.0
|
71 |
+
tifffile==2024.7.21
|
72 |
+
toml==0.10.2
|
73 |
+
toolz==0.12.1
|
74 |
+
torch==2.3.1
|
75 |
+
torchvision==0.18.1
|
76 |
+
tornado==6.4.1
|
77 |
+
typing_extensions==4.12.2
|
78 |
+
tzdata==2024.1
|
79 |
+
urllib3==2.2.2
|
src/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# __init__.py
|
src/chatbot.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
|
3 |
+
import sys
|
4 |
+
import os
|
5 |
+
|
6 |
+
# Add the directory containing this script to the Python path
|
7 |
+
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
8 |
+
|
9 |
+
def chatbot_say(message):
|
10 |
+
with st.chat_message("assistant", avatar="🔮"):
|
11 |
+
st.markdown(message)
|
12 |
+
|
13 |
+
# Add chatbot response to chat history
|
14 |
+
st.session_state.chat_history.append(("assistant", message))
|
15 |
+
|
16 |
+
def run_chatbot():
|
17 |
+
from perplexity_api import chat_completion
|
18 |
+
|
19 |
+
# Initialise chat history in session state
|
20 |
+
if 'chat_history' not in st.session_state:
|
21 |
+
st.session_state.chat_history = []
|
22 |
+
|
23 |
+
# Display chat history
|
24 |
+
for role, content in st.session_state.chat_history:
|
25 |
+
with st.chat_message(role):
|
26 |
+
st.markdown(content)
|
27 |
+
|
28 |
+
# If user has entered a message, add it to chat history and get chatbot response
|
29 |
+
if prompt := st.chat_input("Say something: "):
|
30 |
+
# Display user message
|
31 |
+
with st.chat_message("user"):
|
32 |
+
st.markdown(prompt)
|
33 |
+
|
34 |
+
# Add user message to chat history
|
35 |
+
st.session_state.chat_history.append(("user", prompt))
|
36 |
+
|
37 |
+
info = "none"
|
38 |
+
if st.session_state.latest:
|
39 |
+
info = st.session_state.latest
|
40 |
+
|
41 |
+
response = chat_completion(prompt, info, mode="normal")
|
42 |
+
|
43 |
+
# Display chatbot response
|
44 |
+
with st.chat_message("assistant", avatar="🔮"):
|
45 |
+
st.markdown(response)
|
46 |
+
|
47 |
+
# Add chatbot response to chat history
|
48 |
+
st.session_state.chat_history.append(("assistant", response))
|
49 |
+
|
50 |
+
|
src/ocr.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Perform OCR in a separate thread
|
2 |
+
def ocr_thread(frame_queue, text_queue):
|
3 |
+
from easyocr import Reader
|
4 |
+
reader = Reader(lang_list=['en'])
|
5 |
+
|
6 |
+
while True:
|
7 |
+
frame = frame_queue.get()
|
8 |
+
|
9 |
+
# If queue is empty, exit the loop
|
10 |
+
if frame is None:
|
11 |
+
break
|
12 |
+
|
13 |
+
texts = reader.readtext(frame)
|
14 |
+
text_queue.put(texts)
|
15 |
+
print(texts)
|
16 |
+
|
17 |
+
if __name__ == "__main__":
|
18 |
+
import sys
|
19 |
+
print(sys.path)
|
src/perplexity_api.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import streamlit as st
|
3 |
+
|
4 |
+
# API_KEY = st.secrets["API_KEY"]
|
5 |
+
API_KEY = "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"
|
6 |
+
|
7 |
+
url = "https://api.perplexity.ai/chat/completions"
|
8 |
+
|
9 |
+
ppx_prompt = '''Given a Python list named latest_ocr_values, such as ['I am a Python', 'I a pthon',
|
10 |
+
'I python', 'I am a python', 'a'], directly provide the text that is most consistently detected by the OCR.
|
11 |
+
If multiple answers are possible, choose the most likely one only. If there is no clear answer, state 'None'.
|
12 |
+
The output must follow the format: 'OCR scanned text: (your_answer)'. Do not explain yourself afterwards, do not include
|
13 |
+
multiple valid outputs. Do not include any other information.'''
|
14 |
+
|
15 |
+
normal_prompt = '''Be a good assistant and answer my question, using information from the following prompt or relating to it, as well as
|
16 |
+
knowledge you have about this prompt. If no information is given or if the question is not relevant to the information given,
|
17 |
+
simply answer as normal, using any knowledge you have.'''
|
18 |
+
|
19 |
+
headers = {
|
20 |
+
"Authorization": "Bearer " + API_KEY,
|
21 |
+
"accept": "application/json",
|
22 |
+
"content-type": "application/json"
|
23 |
+
}
|
24 |
+
|
25 |
+
payload = {
|
26 |
+
"model": "mistral-7b-instruct",
|
27 |
+
"messages": [
|
28 |
+
{
|
29 |
+
"role": "system",
|
30 |
+
"content": ""
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"role": "user",
|
34 |
+
"content": ""
|
35 |
+
}
|
36 |
+
]
|
37 |
+
}
|
38 |
+
|
39 |
+
# Perform a chat completion in a separate thread
|
40 |
+
def chat_completion(prompt, info="", mode="normal"):
|
41 |
+
while True:
|
42 |
+
# If queue is empty, exit the loop
|
43 |
+
if not prompt:
|
44 |
+
break
|
45 |
+
|
46 |
+
# Copy payload and insert prompt
|
47 |
+
pl = payload.copy()
|
48 |
+
if mode == "ocr":
|
49 |
+
pl["messages"][0]["content"] = ppx_prompt
|
50 |
+
elif mode == "normal":
|
51 |
+
pl["messages"][0]["content"] = normal_prompt
|
52 |
+
|
53 |
+
pl["messages"][1]["content"] = f"Prompt: {prompt}. Information: {info}."
|
54 |
+
|
55 |
+
# Perform chat completion and add results to queue
|
56 |
+
response = requests.post(url, json=pl, headers=headers)
|
57 |
+
|
58 |
+
if response.status_code == 200:
|
59 |
+
response_data = response.json()
|
60 |
+
output = response_data.get("choices")[0].get("message").get("content")
|
61 |
+
return output
|
62 |
+
else:
|
63 |
+
return "Error"
|
64 |
+
|
65 |
+
|
66 |
+
if __name__ == "__main__":
|
67 |
+
print(chat_completion("latest_ocr_values = ['I am a cat', 'I a cat', 'I cat', 'I am cat', 'a']"))
|
src/st_context.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import threading
|
2 |
+
from typing import Any, TypeVar, cast
|
3 |
+
from streamlit.errors import NoSessionContext
|
4 |
+
from streamlit.runtime.scriptrunner.script_run_context import SCRIPT_RUN_CONTEXT_ATTR_NAME, get_script_run_ctx
|
5 |
+
|
6 |
+
T = TypeVar("T")
|
7 |
+
|
8 |
+
def with_streamlit_context(fn: T) -> T:
|
9 |
+
"""Fix bug in streamlit which raises streamlit.errors.NoSessionContext."""
|
10 |
+
ctx = get_script_run_ctx()
|
11 |
+
if ctx is None:
|
12 |
+
raise NoSessionContext(
|
13 |
+
"with_streamlit_context must be called inside a context; "
|
14 |
+
"construct your function on the fly, not earlier."
|
15 |
+
)
|
16 |
+
|
17 |
+
def _cb(*args: Any, **kwargs: Any) -> Any:
|
18 |
+
"""Do it."""
|
19 |
+
thread = threading.current_thread()
|
20 |
+
do_nothing = hasattr(thread, SCRIPT_RUN_CONTEXT_ATTR_NAME) and (
|
21 |
+
getattr(thread, SCRIPT_RUN_CONTEXT_ATTR_NAME) == ctx
|
22 |
+
)
|
23 |
+
if not do_nothing:
|
24 |
+
setattr(thread, SCRIPT_RUN_CONTEXT_ATTR_NAME, ctx)
|
25 |
+
# Call the callback.
|
26 |
+
ret = fn(*args, **kwargs)
|
27 |
+
if not do_nothing:
|
28 |
+
# Why delattr? Because tasks for different users may be done by
|
29 |
+
# the same thread at different times. Danger danger.
|
30 |
+
delattr(thread, SCRIPT_RUN_CONTEXT_ATTR_NAME)
|
31 |
+
return ret
|
32 |
+
|
33 |
+
return cast(T, _cb)
|
src/system_initializer.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import queue
|
2 |
+
import threading
|
3 |
+
from streamlit.runtime.scriptrunner import add_script_run_ctx
|
4 |
+
from src.ocr import ocr_thread
|
5 |
+
from src.chatbot import run_chatbot
|
6 |
+
|
7 |
+
class SystemInitializer:
|
8 |
+
def initialize_system(self):
|
9 |
+
"""Initializes queues and starts OCR thread."""
|
10 |
+
queues = {
|
11 |
+
'frame_queue': queue.Queue(maxsize=1),
|
12 |
+
'text_queue': queue.Queue(maxsize=1),
|
13 |
+
'annotation_queue': queue.Queue(maxsize=1),
|
14 |
+
'prompt_queue': queue.Queue(maxsize=1),
|
15 |
+
'ppx_queue': queue.Queue(maxsize=1)
|
16 |
+
}
|
17 |
+
ocr_thread_with_ctx = threading.Thread(target=ocr_thread, args=(queues['frame_queue'], queues['text_queue']))
|
18 |
+
add_script_run_ctx(ocr_thread_with_ctx)
|
19 |
+
ocr_thread_with_ctx.start()
|
20 |
+
return queues
|
21 |
+
|
22 |
+
def run_chatbot(self):
|
23 |
+
run_chatbot()
|
src/utilities.py
ADDED
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
import numpy as np
|
3 |
+
from deskew import determine_skew
|
4 |
+
from spellchecker import SpellChecker
|
5 |
+
import streamlit as st
|
6 |
+
|
7 |
+
from src.perplexity_api import chat_completion
|
8 |
+
from src.st_context import with_streamlit_context
|
9 |
+
|
10 |
+
class Utilities:
|
11 |
+
def __init__(self):
|
12 |
+
# Initialize the spell checker
|
13 |
+
self.spell = SpellChecker()
|
14 |
+
|
15 |
+
def fetch_likely_text(self):
|
16 |
+
"""Fetches likely text based on latest OCR values."""
|
17 |
+
# Use the chat_completion function to fetch the latest OCR values from the session state
|
18 |
+
return chat_completion(f"latest_ocr_values = {st.session_state['latest']}")
|
19 |
+
|
20 |
+
@with_streamlit_context
|
21 |
+
def detect_annotations(self, frame, text_queue, conf_thresh):
|
22 |
+
"""Detects annotations for a single video frame."""
|
23 |
+
# If the text queue is empty, return an empty list
|
24 |
+
if text_queue.empty():
|
25 |
+
return []
|
26 |
+
|
27 |
+
# Get detections from the text queue
|
28 |
+
detections = text_queue.get()
|
29 |
+
annotations = []
|
30 |
+
for (box, text, confidence) in detections:
|
31 |
+
# Only consider detections with confidence above the threshold
|
32 |
+
if confidence > conf_thresh / 100.0:
|
33 |
+
# Correct the spelling of the detected text
|
34 |
+
corrected_text = self.correct_spelling(text)
|
35 |
+
# Append the bounding box and corrected text to annotations
|
36 |
+
annotations.append((box, corrected_text))
|
37 |
+
return annotations
|
38 |
+
|
39 |
+
@with_streamlit_context
|
40 |
+
def draw_annotations(self, frame, annotations):
|
41 |
+
"""Draws annotations on the frame."""
|
42 |
+
for (box, text) in annotations:
|
43 |
+
try:
|
44 |
+
# Calculate the size of the text box
|
45 |
+
font = cv2.FONT_HERSHEY_SIMPLEX
|
46 |
+
font_scale = 1
|
47 |
+
thickness = 2
|
48 |
+
text_size, baseline = cv2.getTextSize(text, font, font_scale, thickness)
|
49 |
+
text_width, text_height = text_size
|
50 |
+
|
51 |
+
# Calculate the position for the rectangle and text
|
52 |
+
p1 = (int(box[0][0]), int(box[0][1]))
|
53 |
+
p2 = (p1[0] + text_width, p1[1] - text_height - baseline)
|
54 |
+
|
55 |
+
# Draw a filled rectangle with transparency
|
56 |
+
overlay = frame.copy()
|
57 |
+
cv2.rectangle(overlay, p1, p2, (0, 255, 0), -1)
|
58 |
+
alpha = 0.4 # Transparency factor
|
59 |
+
cv2.addWeighted(overlay, alpha, frame, 1 - alpha, 0, frame)
|
60 |
+
|
61 |
+
# Put the text on top of the rectangle
|
62 |
+
cv2.putText(frame, text, (p1[0], p1[1] - baseline), font, font_scale, (0, 0, 0), thickness)
|
63 |
+
except Exception as e:
|
64 |
+
# Log an error message if annotation fails
|
65 |
+
st.error(f"Failed to annotate frame: {e}")
|
66 |
+
return frame
|
67 |
+
|
68 |
+
def _grayscale(self, image):
|
69 |
+
"""Converts the image to grayscale."""
|
70 |
+
if len(image.shape) == 3:
|
71 |
+
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
72 |
+
else:
|
73 |
+
gray = image
|
74 |
+
return gray
|
75 |
+
|
76 |
+
def _remove_noise(self, image):
|
77 |
+
"""Removes noise from the image using Non-Local Means Denoising."""
|
78 |
+
return cv2.fastNlMeansDenoising(image, None, 10, 7, 21)
|
79 |
+
|
80 |
+
def _enhance_contrast(self, image):
|
81 |
+
"""Enhances the contrast of the image using CLAHE."""
|
82 |
+
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
|
83 |
+
enhanced = clahe.apply(image)
|
84 |
+
return enhanced
|
85 |
+
|
86 |
+
def _deskew(self, image):
|
87 |
+
"""Deskews the image assuming the text is horizontal."""
|
88 |
+
angle = determine_skew(image)
|
89 |
+
(h, w) = image.shape[:2]
|
90 |
+
center = (w // 2, h // 2)
|
91 |
+
|
92 |
+
# Get the rotation matrix
|
93 |
+
M = cv2.getRotationMatrix2D(center, angle, 1.0)
|
94 |
+
|
95 |
+
# Perform the actual rotation and return the image
|
96 |
+
deskewed = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
|
97 |
+
return deskewed
|
98 |
+
|
99 |
+
def _binarize(self, image):
|
100 |
+
"""Converts the image to a binary image using Otsu's binarization."""
|
101 |
+
_, binary = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
102 |
+
return binary
|
103 |
+
|
104 |
+
def preprocess_image(self, image):
|
105 |
+
"""Preprocesses the image by enhancing contrast, removing noise, and deskewing."""
|
106 |
+
gray = self._grayscale(image)
|
107 |
+
denoised = self._remove_noise(gray)
|
108 |
+
enhanced = self._enhance_contrast(denoised)
|
109 |
+
deskewed = self._deskew(enhanced)
|
110 |
+
binary = self._binarize(deskewed)
|
111 |
+
return binary
|
112 |
+
|
113 |
+
def correct_spelling(self, text):
|
114 |
+
"""Corrects the spelling of the given text."""
|
115 |
+
corrected_text = []
|
116 |
+
for word in text.split():
|
117 |
+
corrected_word = self.spell.correction(word)
|
118 |
+
if corrected_word:
|
119 |
+
corrected_text.append(corrected_word)
|
120 |
+
|
121 |
+
return ' '.join(corrected_text)
|
122 |
+
|
123 |
+
def overlay_annotations(self, frame, annotated_frame):
|
124 |
+
"""Overlay annotations from the annotated frame onto the current frame."""
|
125 |
+
alpha = 0.4 # Transparency factor
|
126 |
+
cv2.addWeighted(annotated_frame, alpha, frame, 1 - alpha, 0, frame)
|
127 |
+
return frame
|
src/video_processor.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
from collections import deque
|
3 |
+
from streamlit_webrtc import VideoProcessorBase
|
4 |
+
from src.utilities import Utilities
|
5 |
+
from src.st_context import with_streamlit_context
|
6 |
+
|
7 |
+
class VideoProcessor(VideoProcessorBase):
|
8 |
+
def __init__(self, queues, conf_thresh, n, k):
|
9 |
+
self.queues = queues
|
10 |
+
self.conf_thresh = conf_thresh
|
11 |
+
self.n = n
|
12 |
+
self.k = k
|
13 |
+
self.frame_counter = 0
|
14 |
+
self.utilities = Utilities()
|
15 |
+
self.annotation_counter = 0
|
16 |
+
|
17 |
+
@with_streamlit_context
|
18 |
+
def transform(self, frame):
|
19 |
+
img = frame.to_ndarray(format="bgr24")
|
20 |
+
self.frame_counter += 1
|
21 |
+
|
22 |
+
if self.frame_counter % self.n == 0:
|
23 |
+
# Preprocess the frame
|
24 |
+
preprocessed_frame = self.utilities.preprocess_image(img)
|
25 |
+
|
26 |
+
# Add frame to queue if it is empty
|
27 |
+
if self.queues['frame_queue'].empty():
|
28 |
+
self.queues['frame_queue'].put(preprocessed_frame)
|
29 |
+
|
30 |
+
annotations = self.utilities.detect_annotations(preprocessed_frame, self.queues['text_queue'], self.conf_thresh)
|
31 |
+
if annotations:
|
32 |
+
self.queues['annotation_queue'].put((annotations, self.k)) # Store annotations with counter
|
33 |
+
self.annotation_counter = self.k # Reset the counter
|
34 |
+
|
35 |
+
# Draw annotations from the queue
|
36 |
+
if not self.queues['annotation_queue'].empty() and self.annotation_counter > 0:
|
37 |
+
annotations, _ = self.queues['annotation_queue'].get()
|
38 |
+
img = self.utilities.draw_annotations(img, annotations)
|
39 |
+
self.annotation_counter -= 1
|
40 |
+
|
41 |
+
return img
|