Spaces:
Sleeping
Sleeping
File size: 2,623 Bytes
253afd8 fe72195 2d8777b 253afd8 fe72195 9f48d45 253afd8 fe72195 9f48d45 fe72195 9f48d45 fe72195 9f48d45 fe72195 9f48d45 fe72195 9f48d45 fe72195 2d8777b fe72195 2d8777b fe72195 9f48d45 fe72195 9f48d45 fe72195 9f48d45 fe72195 9f48d45 fe72195 9f48d45 fe72195 253afd8 fe72195 218d2f0 fe72195 218d2f0 fe72195 253afd8 9f48d45 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import streamlit as st
import pdfplumber
import pytesseract
from PIL import Image
import json
import pandas as pd
from io import BytesIO
import time
from openai import OpenAI
import groq
class SyntheticDataFactory:
PROVIDER_CONFIG = {
"Deepseek": {
"client": lambda key: OpenAI(base_url="https://api.deepseek.com/v1", api_key=key),
"models": ["deepseek-chat"],
"key_label": "Deepseek API Key"
},
"OpenAI": {
"client": lambda key: OpenAI(api_key=key),
"models": ["gpt-4-turbo"],
"key_label": "OpenAI API Key"
},
"Groq": {
"client": lambda key: groq.Groq(api_key=key),
"models": ["mixtral-8x7b-32768", "llama2-70b-4096"],
"key_label": "Groq API Key"
}
}
def __init__(self):
self.init_session_state()
def init_session_state(self):
if 'qa_data' not in st.session_state:
st.session_state.qa_data = {
'pairs': [],
'metadata': {},
'exports': {}
}
if 'processing' not in st.session_state:
st.session_state.processing = {
'stage': 'idle',
'errors': []
}
# Add remaining class methods from previous implementation
# (process_pdf, generate_qa, etc.)
def setup_sidebar():
"""Configure sidebar with provider settings"""
with st.sidebar:
st.header("⚙️ AI Configuration")
provider = st.selectbox("Provider", list(SyntheticDataFactory.PROVIDER_CONFIG.keys()))
config = SyntheticDataFactory.PROVIDER_CONFIG[provider]
api_key = st.text_input(config["key_label"], type="password")
model = st.selectbox("Model", config["models"])
temp = st.slider("Temperature", 0.0, 1.0, 0.3)
return provider, api_key, model, temp
def main():
st.set_page_config(
page_title="Enterprise Data Factory",
page_icon="🏭",
layout="wide"
)
# Initialize factory instance
factory = SyntheticDataFactory()
# Setup UI components
provider, api_key, model, temp = setup_sidebar()
st.title("🚀 Enterprise Synthetic Data Factory")
# File upload and processing logic
uploaded_file = st.file_uploader("Upload Financial PDF", type=["pdf"])
if uploaded_file and api_key:
if st.button("Start Synthetic Generation"):
# Process document and generate Q&A pairs
pass # Add processing logic here
if __name__ == "__main__":
main() |