sythenticdata / app.py
mgbam's picture
Update app.py
fe72195 verified
raw
history blame
2.62 kB
import streamlit as st
import pdfplumber
import pytesseract
from PIL import Image
import json
import pandas as pd
from io import BytesIO
import time
from openai import OpenAI
import groq
class SyntheticDataFactory:
PROVIDER_CONFIG = {
"Deepseek": {
"client": lambda key: OpenAI(base_url="https://api.deepseek.com/v1", api_key=key),
"models": ["deepseek-chat"],
"key_label": "Deepseek API Key"
},
"OpenAI": {
"client": lambda key: OpenAI(api_key=key),
"models": ["gpt-4-turbo"],
"key_label": "OpenAI API Key"
},
"Groq": {
"client": lambda key: groq.Groq(api_key=key),
"models": ["mixtral-8x7b-32768", "llama2-70b-4096"],
"key_label": "Groq API Key"
}
}
def __init__(self):
self.init_session_state()
def init_session_state(self):
if 'qa_data' not in st.session_state:
st.session_state.qa_data = {
'pairs': [],
'metadata': {},
'exports': {}
}
if 'processing' not in st.session_state:
st.session_state.processing = {
'stage': 'idle',
'errors': []
}
# Add remaining class methods from previous implementation
# (process_pdf, generate_qa, etc.)
def setup_sidebar():
"""Configure sidebar with provider settings"""
with st.sidebar:
st.header("βš™οΈ AI Configuration")
provider = st.selectbox("Provider", list(SyntheticDataFactory.PROVIDER_CONFIG.keys()))
config = SyntheticDataFactory.PROVIDER_CONFIG[provider]
api_key = st.text_input(config["key_label"], type="password")
model = st.selectbox("Model", config["models"])
temp = st.slider("Temperature", 0.0, 1.0, 0.3)
return provider, api_key, model, temp
def main():
st.set_page_config(
page_title="Enterprise Data Factory",
page_icon="🏭",
layout="wide"
)
# Initialize factory instance
factory = SyntheticDataFactory()
# Setup UI components
provider, api_key, model, temp = setup_sidebar()
st.title("πŸš€ Enterprise Synthetic Data Factory")
# File upload and processing logic
uploaded_file = st.file_uploader("Upload Financial PDF", type=["pdf"])
if uploaded_file and api_key:
if st.button("Start Synthetic Generation"):
# Process document and generate Q&A pairs
pass # Add processing logic here
if __name__ == "__main__":
main()