Spaces:

alx-d
/

scout

Sleeping

App Files Files Community

alxd commited on Mar 26

Commit

c59b529

1 Parent(s): 320f15a

added openai and max_tokens

Browse files

Files changed (2) hide show

requirements.txt +3 -1
scoutLLM.py +106 -40

requirements.txt CHANGED Viewed

@@ -1,7 +1,7 @@
 gradio==3.40.0
 langchain-community==0.0.19
 langchain_core==0.1.22
-langchain-openai==0.0.5
 faiss-cpu==1.7.3
 huggingface-hub==0.24.7
 google-generativeai==0.3.2
@@ -56,3 +56,5 @@ google-auth-oauthlib
 google-auth-httplib2
 pyperclip

 gradio==3.40.0
 langchain-community==0.0.19
 langchain_core==0.1.22
+#langchain-openai==0.0.5
 faiss-cpu==1.7.3
 huggingface-hub==0.24.7
 google-generativeai==0.3.2
 google-auth-httplib2
 pyperclip
+openai==0.28

scoutLLM.py CHANGED Viewed

@@ -20,7 +20,8 @@ from googleapiclient.discovery import build
 import base64
 from google.oauth2.credentials import Credentials
 from google.auth.transport.requests import Request
 # ------------------------------
 # Helper functions and globals
@@ -28,6 +29,7 @@ from google.auth.transport.requests import Request
 sheet_data = None
 file_name = None
 sheet = None
 def debug_print(message: str):
     print(f"[{datetime.datetime.now().isoformat()}] {message}", flush=True)
@@ -49,40 +51,95 @@ def count_tokens(text: str) -> int:
             return len(text.split())
     return len(text.split())
-def generate_response(prompt: str, model_name: str, sheet_data: str) -> str:
-    full_prompt = f"{prompt}\n\nSheet Data:\n{sheet_data}"  # Append sheet data to prompt
-    if "Mistral" in model_name:
-        mistral_api_key = os.getenv("MISTRAL_API_KEY")
-        if not mistral_api_key:
-            raise ValueError("MISTRAL_API_KEY environment variable not set.")
-        mistral_client = Mistral(api_key=mistral_api_key)
-        response = mistral_client.chat.complete(
-            model="mistral-small-latest",
-            messages=[{"role": "user", "content": full_prompt}],
-            temperature=0.7,
-            top_p=0.95
-        )
-        return response.choices[0].message.content
-    elif "Meta-Llama" in model_name:
-        hf_api_token = os.getenv("HF_API_TOKEN")
-        if not hf_api_token:
-            raise ValueError("HF_API_TOKEN environment variable not set.")
-        client = InferenceClient(token=hf_api_token)
-        response = client.text_generation(
-            full_prompt,
-            model="meta-llama/Meta-Llama-3-8B-Instruct",
-            temperature=0.7,
-            top_p=0.95,
-            max_new_tokens=512
-        )
-        return response
-    else:
-        raise ValueError("Invalid model selection. Please choose either 'Mistral-API' or 'Meta-Llama-3'.")
 def process_query(prompt: str, model_name: str):
     global sheet_data
@@ -103,9 +160,6 @@ def process_query(prompt: str, model_name: str):
     # Return the response along with token counts
     return response, f"Input tokens: {input_tokens}", f"Output tokens: {output_tokens}"
-def ui_process_query(prompt, model_name):
-    return process_query(prompt, model_name)
 # ------------------------------
 # Global variables for background jobs
 # ------------------------------
@@ -182,10 +236,12 @@ def process_in_background(job_id, func, args):
     debug_print(f"Job {job_id} finished processing in background.")
-def submit_query_async(query, model_choice=None):
     """Asynchronous version of submit_query_updated to prevent timeouts."""
     global last_job_id
     global sheet_data
     if not query:
         return ("Please enter a non-empty query", "", "Input tokens: 0", "Output tokens: 0", "", "", get_job_list())
@@ -197,6 +253,7 @@ def submit_query_async(query, model_choice=None):
     if sheet_data is None:
         sheet_data = get_sheet_data()
     query = f"{query}\n\nSheet Data:\n{sheet_data}"  # Append sheet data to prompt
     # Start background thread to process the query
@@ -510,11 +567,21 @@ with gr.Blocks() as app:
         with gr.Column(scale=1):
             gr.Markdown("### 🚀 Submit Query")
             gr.Markdown("Enter your prompt below and choose a model. Your query will be processed in the background.")
             model_dropdown = gr.Dropdown(
-                choices=["🇺🇸 Remote Meta-Llama-3", "🇪🇺 Mistral-API"],
-                value="🇪🇺 Mistral-API",  # Default model set to Mistral
                 label="Select Model"
             )
             prompt_input = gr.Textbox(label="Enter your prompt", value=default_prompt, lines=6)
             with gr.Row():
                 auto_refresh_checkbox = gr.Checkbox(
@@ -562,7 +629,6 @@ with gr.Blocks() as app:
     def load_file(file, sheet_name):
         global sheet_data
         global file_name
-        global sheet
         file_name = file
         sheet = sheet_name
@@ -585,7 +651,7 @@ with gr.Blocks() as app:
     # When submitting a query asynchronously
     submit_button.click(
         fn=submit_query_async,
-        inputs=[prompt_input, model_dropdown],
         outputs=[
             response_output, token_info,
             input_tokens_display, output_tokens_display,

 import base64
 from google.oauth2.credentials import Credentials
 from google.auth.transport.requests import Request
+import openai  # Correct OpenAI import
+from openai.error import RateLimitError  # Import rate limit error handling
 # ------------------------------
 # Helper functions and globals
 sheet_data = None
 file_name = None
 sheet = None
+slider_max_tokens = None
 def debug_print(message: str):
     print(f"[{datetime.datetime.now().isoformat()}] {message}", flush=True)
             return len(text.split())
     return len(text.split())
+def get_model_max_tokens(model_name: str) -> int:
+    """Return the max context length for the selected model."""
+    model_token_limits = {
+        "GPT-3.5": 16385,
+        "GPT-4o": 128000,
+        "GPT-4o mini": 128000,
+        "Meta-Llama-3": 4096,  # Adjust based on actual limits
+        "Mistral-API": 128000     # Adjust based on actual limits
+    }
+    for key in model_token_limits:
+        if key in model_name:
+            return model_token_limits[key]
+    return 4096  # Default safety limit
+def get_model_max_tokens(model_name: str) -> int:
+    """Return the max context length for the selected model."""
+    model_token_limits = {
+        "GPT-3.5": 16385,
+        "GPT-4o": 128000,
+        "GPT-4o mini": 128000,
+        "Meta-Llama-3": 4096,
+        "Mistral-API": 4096
+    }
+    for key in model_token_limits:
+        if key in model_name:
+            return model_token_limits[key]
+    return 4096  # Default safety limit
+def generate_response(prompt: str, model_name: str, sheet_data: str = "") -> str:
+    global slider_max_tokens
+    full_prompt = f"{prompt}\n\nSheet Data:\n{sheet_data}" if sheet_data else prompt
+    max_context_tokens = get_model_max_tokens(model_name)
+    max_tokens = min(slider_max_tokens, max_context_tokens)
+    try:
+        if "Mistral" in model_name:
+            mistral_api_key = os.getenv("MISTRAL_API_KEY")
+            if not mistral_api_key:
+                raise ValueError("MISTRAL_API_KEY environment variable not set.")
+            mistral_client = Mistral(api_key=mistral_api_key)
+            response = mistral_client.chat.complete(
+                model="mistral-small-latest",
+                messages=[{"role": "user", "content": full_prompt[:max_tokens]}],
+                temperature=0.7,
+                top_p=0.95
+            )
+            return f"[Model: {model_name}]" + response.choices[0].message.content
+        elif "Meta-Llama" in model_name:
+            hf_api_token = os.getenv("HF_API_TOKEN")
+            if not hf_api_token:
+                raise ValueError("HF_API_TOKEN environment variable not set.")
+            client = InferenceClient(token=hf_api_token)
+            response = client.text_generation(
+                full_prompt[:max_tokens],
+                model="meta-llama/Meta-Llama-3-8B-Instruct",
+                temperature=0.7,
+                top_p=0.95,
+                max_new_tokens=max_tokens
+            )
+            return f"[Model: {model_name}]" + response
+        elif any(model in model_name for model in ["GPT-3.5", "GPT-4o", "GPT-4o mini"]):
+            model_map = {
+                "GPT-3.5": "gpt-3.5-turbo",
+                "GPT-4o": "gpt-4o",
+                "GPT-4o mini": "gpt-4o-mini"
+            }
+            model = next((model_map[key] for key in model_map if key in model_name), None)
+            if not model:
+                raise ValueError(f"Unsupported OpenAI model: {model_name}")
+            response = openai.ChatCompletion.create(
+                model=model,
+                messages=[{"role": "user", "content": full_prompt[:max_tokens]}],
+                temperature=0.7,
+                max_tokens=max_tokens
+            )
+            return f"[Model: {model_name}]" + response["choices"][0]["message"]["content"]
+    except Exception as e:
+        debug_print(f"❌ Error generating response: {str(e)}")
+        return f"[Model: {model_name}][Error] {str(e)}"
 def process_query(prompt: str, model_name: str):
     global sheet_data
     # Return the response along with token counts
     return response, f"Input tokens: {input_tokens}", f"Output tokens: {output_tokens}"
 # ------------------------------
 # Global variables for background jobs
 # ------------------------------
     debug_print(f"Job {job_id} finished processing in background.")
+def submit_query_async(query, model_choice, max_tokens_slider):
     """Asynchronous version of submit_query_updated to prevent timeouts."""
     global last_job_id
     global sheet_data
+    global slider_max_tokens
+    slider_max_tokens = max_tokens_slider
     if not query:
         return ("Please enter a non-empty query", "", "Input tokens: 0", "Output tokens: 0", "", "", get_job_list())
     if sheet_data is None:
         sheet_data = get_sheet_data()
     query = f"{query}\n\nSheet Data:\n{sheet_data}"  # Append sheet data to prompt
     # Start background thread to process the query
         with gr.Column(scale=1):
             gr.Markdown("### 🚀 Submit Query")
             gr.Markdown("Enter your prompt below and choose a model. Your query will be processed in the background.")
+            # Update the model dropdown in the Gradio UI
+            # Update the model dropdown in the Gradio UI
             model_dropdown = gr.Dropdown(
+                choices=[
+                    "🇺🇸 GPT-3.5",
+                    "🇺🇸 GPT-4o",
+                    "🇺🇸 GPT-4o mini",
+                    "🇺🇸 Remote Meta-Llama-3",
+                    "🇪🇺 Mistral-API",
+                ],
+                value="🇺🇸 GPT-4o mini",  # Default model set to Mistral
                 label="Select Model"
             )
+            max_tokens_slider = gr.Slider(minimum=50, maximum=4096, value=512, label="🔢 Max Tokens", step=50)
             prompt_input = gr.Textbox(label="Enter your prompt", value=default_prompt, lines=6)
             with gr.Row():
                 auto_refresh_checkbox = gr.Checkbox(
     def load_file(file, sheet_name):
         global sheet_data
         global file_name
         file_name = file
         sheet = sheet_name
     # When submitting a query asynchronously
     submit_button.click(
         fn=submit_query_async,
+        inputs=[prompt_input, model_dropdown, max_tokens_slider],
         outputs=[
             response_output, token_info,
             input_tokens_display, output_tokens_display,