File size: 2,623 Bytes
253afd8
 
 
fe72195
2d8777b
253afd8
 
 
fe72195
9f48d45
253afd8
fe72195
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9f48d45
fe72195
9f48d45
fe72195
 
 
 
 
 
 
 
 
 
9f48d45
 
 
fe72195
9f48d45
 
fe72195
 
9f48d45
fe72195
 
 
 
 
 
2d8777b
fe72195
 
 
2d8777b
fe72195
9f48d45
fe72195
9f48d45
fe72195
9f48d45
fe72195
9f48d45
 
fe72195
 
9f48d45
fe72195
 
253afd8
fe72195
218d2f0
fe72195
 
218d2f0
fe72195
 
 
 
253afd8
 
9f48d45
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import streamlit as st
import pdfplumber
import pytesseract
from PIL import Image
import json
import pandas as pd
from io import BytesIO
import time
from openai import OpenAI
import groq

class SyntheticDataFactory:
    PROVIDER_CONFIG = {
        "Deepseek": {
            "client": lambda key: OpenAI(base_url="https://api.deepseek.com/v1", api_key=key),
            "models": ["deepseek-chat"],
            "key_label": "Deepseek API Key"
        },
        "OpenAI": {
            "client": lambda key: OpenAI(api_key=key),
            "models": ["gpt-4-turbo"],
            "key_label": "OpenAI API Key"
        },
        "Groq": {
            "client": lambda key: groq.Groq(api_key=key),
            "models": ["mixtral-8x7b-32768", "llama2-70b-4096"],
            "key_label": "Groq API Key"
        }
    }

    def __init__(self):
        self.init_session_state()

    def init_session_state(self):
        if 'qa_data' not in st.session_state:
            st.session_state.qa_data = {
                'pairs': [],
                'metadata': {},
                'exports': {}
            }
        if 'processing' not in st.session_state:
            st.session_state.processing = {
                'stage': 'idle',
                'errors': []
            }

    # Add remaining class methods from previous implementation
    # (process_pdf, generate_qa, etc.)

def setup_sidebar():
    """Configure sidebar with provider settings"""
    with st.sidebar:
        st.header("⚙️ AI Configuration")
        provider = st.selectbox("Provider", list(SyntheticDataFactory.PROVIDER_CONFIG.keys()))
        config = SyntheticDataFactory.PROVIDER_CONFIG[provider]
        
        api_key = st.text_input(config["key_label"], type="password")
        model = st.selectbox("Model", config["models"])
        temp = st.slider("Temperature", 0.0, 1.0, 0.3)
        
        return provider, api_key, model, temp

def main():
    st.set_page_config(
        page_title="Enterprise Data Factory",
        page_icon="🏭",
        layout="wide"
    )
    
    # Initialize factory instance
    factory = SyntheticDataFactory()
    
    # Setup UI components
    provider, api_key, model, temp = setup_sidebar()
    
    st.title("🚀 Enterprise Synthetic Data Factory")
    
    # File upload and processing logic
    uploaded_file = st.file_uploader("Upload Financial PDF", type=["pdf"])
    
    if uploaded_file and api_key:
        if st.button("Start Synthetic Generation"):
            # Process document and generate Q&A pairs
            pass  # Add processing logic here

if __name__ == "__main__":
    main()