Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pdfplumber | |
import pytesseract | |
from PIL import Image | |
import json | |
import pandas as pd | |
from io import BytesIO | |
import time | |
from openai import OpenAI | |
import groq | |
class SyntheticDataFactory: | |
PROVIDER_CONFIG = { | |
"Deepseek": { | |
"client": lambda key: OpenAI(base_url="https://api.deepseek.com/v1", api_key=key), | |
"models": ["deepseek-chat"], | |
"key_label": "Deepseek API Key" | |
}, | |
"OpenAI": { | |
"client": lambda key: OpenAI(api_key=key), | |
"models": ["gpt-4-turbo"], | |
"key_label": "OpenAI API Key" | |
}, | |
"Groq": { | |
"client": lambda key: groq.Groq(api_key=key), | |
"models": ["mixtral-8x7b-32768", "llama2-70b-4096"], | |
"key_label": "Groq API Key" | |
} | |
} | |
def __init__(self): | |
self.init_session_state() | |
def init_session_state(self): | |
if 'qa_data' not in st.session_state: | |
st.session_state.qa_data = { | |
'pairs': [], | |
'metadata': {}, | |
'exports': {} | |
} | |
if 'processing' not in st.session_state: | |
st.session_state.processing = { | |
'stage': 'idle', | |
'errors': [] | |
} | |
# Add remaining class methods from previous implementation | |
# (process_pdf, generate_qa, etc.) | |
def setup_sidebar(): | |
"""Configure sidebar with provider settings""" | |
with st.sidebar: | |
st.header("βοΈ AI Configuration") | |
provider = st.selectbox("Provider", list(SyntheticDataFactory.PROVIDER_CONFIG.keys())) | |
config = SyntheticDataFactory.PROVIDER_CONFIG[provider] | |
api_key = st.text_input(config["key_label"], type="password") | |
model = st.selectbox("Model", config["models"]) | |
temp = st.slider("Temperature", 0.0, 1.0, 0.3) | |
return provider, api_key, model, temp | |
def main(): | |
st.set_page_config( | |
page_title="Enterprise Data Factory", | |
page_icon="π", | |
layout="wide" | |
) | |
# Initialize factory instance | |
factory = SyntheticDataFactory() | |
# Setup UI components | |
provider, api_key, model, temp = setup_sidebar() | |
st.title("π Enterprise Synthetic Data Factory") | |
# File upload and processing logic | |
uploaded_file = st.file_uploader("Upload Financial PDF", type=["pdf"]) | |
if uploaded_file and api_key: | |
if st.button("Start Synthetic Generation"): | |
# Process document and generate Q&A pairs | |
pass # Add processing logic here | |
if __name__ == "__main__": | |
main() |