File size: 2,728 Bytes
6ddf684
7443aec
 
d59cc6b
20a8d5c
d59cc6b
137c3a1
d59cc6b
 
 
137c3a1
d59cc6b
20a8d5c
137c3a1
d59cc6b
 
 
137c3a1
20a8d5c
137c3a1
d59cc6b
20a8d5c
d59cc6b
137c3a1
d59cc6b
ca2c2ed
 
6ddf684
 
 
 
 
 
 
 
ca2c2ed
 
 
d59cc6b
 
ca2c2ed
 
 
 
137c3a1
 
 
 
 
ca2c2ed
137c3a1
 
ca2c2ed
137c3a1
 
ca2c2ed
137c3a1
ca2c2ed
137c3a1
 
 
d59cc6b
 
 
ca2c2ed
d59cc6b
 
 
 
 
ca2c2ed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import os
import subprocess
from playwright.async_api import async_playwright
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# Define model and inference parameters
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
MAX_LENGTH = 512
NUM_BEAMS = 5
TEMPERATURE = 0.7

# Function to generate actions using Zephyr-7b-beta model
def generate_actions(input_text):
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)

    # Use pipeline for text generation
    generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
    outputs = generator(input_text, max_length=MAX_LENGTH, num_beams=NUM_BEAMS, temperature=TEMPERATURE)

    response = outputs[0]['generated_text']
    actions = response.split("\n")
    return actions

# Function to initialize browser and page (now asynchronous)
async def initialize_browser():
    # Attempt to download browser binaries if not already present
    try:
        subprocess.check_output(["which", "chromium"], stderr=subprocess.STDOUT)
    except subprocess.CalledProcessError:
        print("Chromium not found. Attempting to download...")
        # Note: Directly executing 'playwright install' in Python is not supported.
        # You would need to ensure the environment is pre-configured or use a different method to install the browser.

    async with async_playwright() as p:
        browser = await p.chromium.launch()
        page = await browser.new_page()
        return browser, page

# Gradio interface (now using the asynchronous function)
async def run_agent(input_text):
    async with async_playwright() as p:
        browser, page = await initialize_browser()
        actions = generate_actions(input_text)

        for action in actions:
            if "open website" in action:
                website = action.split(" ")[-1]
                await page.goto(website)
            elif "click" in action:
                selector = action.split(" ")[-1]
                await page.click(selector)
            elif "type" in action:
                text = action.split(" ")[-1]
                await page.type(text)
            elif "submit" in action:
                await page.press("Enter")
            else:
                print(f"Action not recognized: {action}")

        return f"Successfully executed actions based on: {input_text}"

iface = gr.Interface(
    fn=run_agent,  # Pass the async function directly
    inputs=gr.Textbox(label="Enter your request"),
    outputs=gr.Textbox(label="Response"),
    title="Automated Agent",
    description="Enter a task or instruction for the agent to perform."
)
iface.launch(share=True)  # Enable sharing