Data_Generation_LabelingCopy

Running

App Files Files

Wedyan2023 commited on May 14

Commit

ac47e7a

verified ·

1 Parent(s): d9f4633

Delete app100.py

Browse files

Files changed (1) hide show

app100.py +0 -1247

app100.py DELETED Viewed

@@ -1,1247 +0,0 @@
-import streamlit as st
-import pandas as pd
-import os
-import json
-import base64
-import random
-from streamlit_pdf_viewer import pdf_viewer
-from langchain.prompts import PromptTemplate
-from datetime import datetime
-from pathlib import Path
-from openai import OpenAI
-from dotenv import load_dotenv
-import warnings
-warnings.filterwarnings('ignore')
-os.getenv("OAUTH_CLIENT_ID")
-# Load environment variables and initialize the OpenAI client to use Hugging Face Inference API.
-load_dotenv()
-client = OpenAI(
-    base_url="https://api-inference.huggingface.co/v1",
-    api_key=os.environ.get('TOKEN2') # Hugging Face API token
-)
-# Create necessary directories
-for dir_name in ['data', 'feedback']:
-    if not os.path.exists(dir_name):
-        os.makedirs(dir_name)
-# Custom CSS
-st.markdown("""
-<style>
-    .stButton > button {
-        width: 100%;
-        margin-bottom: 10px;
-        background-color: #4CAF50;
-        color: white;
-        border: none;
-        padding: 10px;
-        border-radius: 5px;
-    }
-    .task-button {
-        background-color: #2196F3 !important;
-    }
-    .stSelectbox {
-        margin-bottom: 20px;
-    }
-    .output-container {
-        padding: 20px;
-        border-radius: 5px;
-        border: 1px solid #ddd;
-        margin: 10px 0;
-    }
-    .status-container {
-        padding: 10px;
-        border-radius: 5px;
-        margin: 10px 0;
-    }
-    .sidebar-info {
-        padding: 10px;
-        background-color: #f0f2f6;
-        border-radius: 5px;
-        margin: 10px 0;
-    }
-    .feedback-button {
-        background-color: #ff9800 !important;
-    }
-    .feedback-container {
-        padding: 15px;
-        background-color: #f5f5f5;
-        border-radius: 5px;
-        margin: 15px 0;
-    }
-</style>
-""", unsafe_allow_html=True)
-# Helper functions
-def read_csv_with_encoding(file):
-    encodings = ['utf-8', 'latin1', 'iso-8859-1', 'cp1252']
-    for encoding in encodings:
-        try:
-            return pd.read_csv(file, encoding=encoding)
-        except UnicodeDecodeError:
-            continue
-    raise UnicodeDecodeError("Failed to read file with any supported encoding")
-#def save_feedback(feedback_data):
-    #feedback_file = 'feedback/user_feedback.csv'
-    #feedback_df = pd.DataFrame([feedback_data])
-    #if os.path.exists(feedback_file):
-        #feedback_df.to_csv(feedback_file, mode='a', header=False, index=False)
-    #else:
-        #feedback_df.to_csv(feedback_file, index=False)
-def reset_conversation():
-    st.session_state.conversation = []
-    st.session_state.messages = []
-    if 'task_choice' in st.session_state:
-        del st.session_state.task_choice
-    return None
-    #new 24 March
-    #user_input = st.text_input("Enter your prompt:")
-###########33
-# Initialize session state variables
-if "messages" not in st.session_state:
-    st.session_state.messages = []
-if "examples_to_classify" not in st.session_state:
-    st.session_state.examples_to_classify = []
-if "system_role" not in st.session_state:
-    st.session_state.system_role = ""
-# Main app title
-st.title("🤖🦙 Text Data Labeling and Generation App")
-# def embed_pdf_sidebar(pdf_path):
-#     with open(pdf_path, "rb") as f:
-#         base64_pdf = base64.b64encode(f.read()).decode('utf-8')
-#     pdf_display = f"""
-#         <iframe src="data:application/pdf;base64,{base64_pdf}"
-#         width="100%" height="400" type="application/pdf"></iframe>
-#     """
-#     st.markdown(pdf_display, unsafe_allow_html=True)
-#
-# Sidebar settings
-with st.sidebar:
-    st.title("⚙️ Settings")
-    # Add PDF upload section
-    #
-    # if st.button("📘 Show Instructions"):
-    #     # This should be a path to a local file
-    #     pdf_path = os.path.join("Streamlit.pdf")
-    #     pdf_viewer(
-    #         pdf_path,
-    #         width="100%",
-    #         height=300,
-    #         render_text=True
-       # )
-    # with st.sidebar:
-    #     with st.expander("📘 View Instructions"):
-    #         pdf_viewer("Streamlit.pdf", width="100%", height=300, render_text=True)
-    #
-    ###4
-    # with st.sidebar:
-    #     st.markdown("### 📘 Instructions")
-    #     st.markdown("[📄 Open Instructions PDF](/file/instructions.pdf)")
-#
-    ####2
-# #with st.sidebar:
-#     st.markdown("### 📘 Instructions")
-#     # PDF served from Space's file system
-#     pdf_url = "/file/instructions.pdf"
-#     st.markdown(f"""
-#         <a href="{pdf_url}" target="_blank">
-#             <button style='padding:10px;width:100%;font-size:16px;'>📄 Open Instructions PDF</button>
-#         </a>
-#     """, unsafe_allow_html=True)
-# ###3 working code
-# with st.sidebar:
-#     with open("instructions.pdf", "rb") as f:
-#         st.sidebar.download_button(
-#         label="📄 Download Instructions PDF",
-#         data=f,
-#         file_name="instructions.pdf",
-#         mime="application/pdf"
-#     )
-###6
-#this last code works
-with st.sidebar:
-    st.markdown("### 📘Data Generation and Labeling Instructions")
-    #st.markdown("<h4 style='color: #4A90E2;'>📘    Instructions</h4>", unsafe_allow_html=True)
-    with open("User instructions.pdf", "rb") as f:
-        st.download_button(
-            label="📄 Download Instructions PDF",
-            data=f,
-            #file_name="instructions.pdf",
-            file_name="User instructions.pdf",
-            mime="application/pdf"
-        )
-#works with blu color text
-# with st.sidebar:
-#     # Stylish "Instructions" label
-#     st.markdown("<h4 style='color: #4A90E2;'>📘 Instructions</h4>", unsafe_allow_html=True)
-#     # PDF download button
-#     with open("instructions.pdf", "rb") as f:
-#         st.download_button(
-#             label="📄 Download Instructions PDF",
-#             data=f,
-#             file_name="instructions.pdf",
-#             mime="application/pdf"
-#         )
-        ###5
-#with st.sidebar:
-    # st.markdown("### 📘 Instructions")
-    # # PDF served from Space's file system
-    # pdf_url = "/file/instructions.pdf"
-    # st.markdown(f"""
-    #     <a href="{pdf_url}" target="_blank">
-    #         <button style='padding:15px;width:100%;font-size:16px;'>  📄 Open Instructions PDF</button>
-    #     </a>
-    # """, unsafe_allow_html=True)
-    selected_model = st.selectbox(
-        "Select Model",
-        ["meta-llama/Llama-3.3-70B-Instruct", "meta-llama/Llama-3.2-3B-Instruct","meta-llama/Llama-4-Scout-17B-16E-Instruct", "meta-llama/Meta-Llama-3-8B-Instruct",
-         "meta-llama/Llama-3.1-70B-Instruct"],
-        key='model_select'
-    )
-    temperature = st.slider(
-        "Temperature",
-        0.0, 1.0, 0.7,
-        help="Controls randomness in generation"
-    )
-    st.button("🔄 New Conversation", on_click=reset_conversation)
-    # st.markdown("### 📘 Instructions")
-    # embed_pdf_sidebar("Streamlit.pdf")
-    #Add PDF Instructions
-    # with st.expander("📚 Instructions"):
-    #     st.write("View or download instruction guides:")
-    #     # Option 1: Using st.download_button for PDFs stored in your app
-    #     with open("file:///C:/Users/hp/Downloads/Streamlit.pdf", "rb") as file:
-    #         first_pdf = file.read()
-    #     st.download_button(
-    #         label="Download Guide 1",
-    #         data=first_pdf,
-    #         file_name="user_guide.pdf",
-    #         mime="application/pdf"
-    #     )
-    #     #with open("https://huggingface.co/spaces/Wedyan2023/COPY/blob/main/Streamlit.pdf", "rb") as file:
-    #     with open("file:///C:/Users/hp/Downloads/Streamlit.pdf", "rb") as file:
-    #         second_pdf = file.read()
-    #     st.download_button(
-    #         label="Download Guide 2",
-    #         data=second_pdf,
-    #         file_name="technical_guide.pdf",
-    #         mime="application/pdf"
-    #     )
-    with st.container():
-        st.markdown(f"""
-           <div class="sidebar-info">
-               <h4>Current Model: {selected_model}</h4>
-               <p><em>Note: Generated content may be inaccurate or false. Check important info.</em></p>
-           </div>
-            """, unsafe_allow_html=True)
-# with st.sidebar:
-#     st.markdown("### 📘 Instructions")
-#     if pdf_file := st.file_uploader("Upload Instruction PDF", type="pdf"):
-#         embed_pdf(pdf_file)
-    feedback_url = "https://docs.google.com/forms/d/e/1FAIpQLSdZ_5mwW-pjqXHgxR0xriyVeRhqdQKgb5c-foXlYAV55Rilsg/viewform?usp=header"
-    st.sidebar.markdown(
-        f'<a href="{feedback_url}" target="_blank"><button style="width: 100%;">Feedback Form</button></a>',
-        unsafe_allow_html=True
-    )
-# Display conversation
-for message in st.session_state.messages:
-    with st.chat_message(message["role"]):
-        st.markdown(message["content"])
-# Main content
-if 'task_choice' not in st.session_state:
-    col1, col2 = st.columns(2)
-    with col1:
-        if st.button("📝 Data Generation", key="gen_button", help="Generate new data"):
-            st.session_state.task_choice = "Data Generation"
-    with col2:
-        if st.button("🏷️ Data Labeling", key="label_button", help="Label existing data"):
-            st.session_state.task_choice = "Data Labeling"
-if "task_choice" in st.session_state:
-    if st.session_state.task_choice == "Data Generation":
-        st.header("📝 Data Generation")
-       # 1. Domain selection
-        domain_selection = st.selectbox("Domain", [
-            "Restaurant reviews", "E-Commerce reviews", "News", "AG News", "Tourism", "Custom"
-        ])
-        # 2. Handle custom domain input
-        custom_domain_valid = True  # Assume valid until proven otherwise
-        if domain_selection == "Custom":
-            domain = st.text_input("Specify custom domain")
-            if not domain.strip():
-                st.error("Please specify a domain name.")
-                custom_domain_valid = False
-        else:
-            domain = domain_selection
-        # Classification type selection
-        classification_type = st.selectbox(
-            "Classification Type",
-            ["Sentiment Analysis", "Binary Classification", "Multi-Class Classification"]
-        )
-#system role before
-####
-        # Labels setup based on classification type
-        #labels = []
-        labels = []
-        labels_valid = False
-        errors = []
-        def validate_binary_labels(labels):
-            errors = []
-            normalized = [label.strip().lower() for label in labels]
-            if not labels[0].strip():
-                errors.append("First class name is required.")
-            if not labels[1].strip():
-                errors.append("Second class name is required.")
-            if normalized[0] == normalized[1] and all(normalized):
-                errors.append("Class names must be different.")
-            return errors
-        if classification_type == "Sentiment Analysis":
-            st.write("### Sentiment Analysis Labels (Fixed)")
-            col1, col2, col3 = st.columns(3)
-            with col1:
-                st.text_input("First class", "Positive", disabled=True)
-            with col2:
-                st.text_input("Second class", "Negative", disabled=True)
-            with col3:
-                st.text_input("Third class", "Neutral", disabled=True)
-            labels = ["Positive", "Negative", "Neutral"]
-        elif classification_type == "Binary Classification":
-            st.write("### Binary Classification Labels")
-            col1, col2 = st.columns(2)
-            with col1:
-                label_1 = st.text_input("First class", "Positive")
-            with col2:
-                label_2 = st.text_input("Second class", "Negative")
-            labels = [label_1, label_2]
-            errors = validate_binary_labels(labels)
-            if errors:
-                st.error("\n".join(errors))
-            else:
-                st.success("Binary class names are valid and unique!")
-        # if classification_type == "Sentiment Analysis":
-        #     st.write("### Sentiment Analysis Labels (Fixed)")
-        #     col1, col2, col3 = st.columns(3)
-        #     with col1:
-        #         label_1 = st.text_input("First class", "Positive", disabled=True)
-        #     with col2:
-        #         label_2 = st.text_input("Second class", "Negative", disabled=True)
-        #     with col3:
-        #         label_3 = st.text_input("Third class", "Neutral", disabled=True)
-        #     labels = ["Positive", "Negative", "Neutral"]
-        # elif classification_type == "Binary Classification":
-        #     st.write("### Binary Classification Labels")
-        #     col1, col2 = st.columns(2)
-        #     with col1:
-        #         label_1 = st.text_input("First class", "Positive")
-        #     with col2:
-        #         label_2 = st.text_input("Second class", "Negative")
-        #     errors = []
-        #     labels = [label_1.strip(), label_2.strip()]
-        #     # Check for empty class names
-        #     if not labels[0]:
-        #         errors.append("First class name is required.")
-        #     if not labels[1]:
-        #         errors.append("Second class name is required.")
-        #     # Check for duplicates
-        #     if labels[0].lower() == labels[1].lower():
-        #         errors.append("Class names must be different.")
-        #     # Show errors or success
-        #     if errors:
-        #         for error in errors:
-        #             st.error(error)
-        #     else:
-        #         st.success("Binary class names are valid and unique!")
-        #########
-        elif classification_type == "Multi-Class Classification":
-            st.write("### Multi-Class Classification Labels")
-            default_labels_by_domain = {
-                "News": ["Political", "Sports", "Entertainment", "Technology", "Business"],
-                "AG News": ["World", "Sports", "Business", "Sci/Tech"],
-                "Tourism": ["Accommodation", "Transportation", "Tourist Attractions",
-                            "Food & Dining", "Local Experience", "Adventure Activities",
-                            "Wellness & Spa", "Eco-Friendly Practices", "Family-Friendly",
-                            "Luxury Tourism"],
-                "Restaurant reviews": ["Italian", "French", "American"],
-                "E-Commerce reviews": ["Mobile Phones & Accessories", "Laptops & Computers","Kitchen & Dining",
-                                       "Beauty & Personal Care", "Home & Furniture", "Clothing & Fashion",
-                                      "Shoes & Handbags", "Health & Wellness", "Electronics & Gadgets",
-                                       "Books & Stationery","Toys & Games", "Sports & Fitness",
-                                       "Grocery & Gourmet Food","Watches & Accessories", "Baby Products"]
-            }
-            num_classes = st.slider("Number of classes", 3, 15, 3)
-            # Get defaults for selected domain, or empty list
-            defaults = default_labels_by_domain.get(domain, [])
-            labels = []
-            errors = []
-            cols = st.columns(3)
-            for i in range(num_classes):
-                with cols[i % 3]:
-                    default_value = defaults[i] if i < len(defaults) else ""
-                    label_input = st.text_input(f"Class {i+1}", default_value)
-                    normalized_label = label_input.strip().title()
-                    if not normalized_label:
-                        errors.append(f"Class {i+1} name is required.")
-                    else:
-                        labels.append(normalized_label)
-            # Check for duplicates (case-insensitive)
-            if len(labels) != len(set(labels)):
-                errors.append("Labels names must be unique (case-insensitive, normalized to Title Case).")
-            # Show validation results
-            if errors:
-                for error in errors:
-                    st.error(error)
-            else:
-                st.success("All Labels names are valid and unique!")
-            labels_valid = not errors  # Will be True only if there are no label errors
-                    ##############
-        # Generation parameters
-        col1, col2 = st.columns(2)
-        with col1:
-            min_words = st.number_input("Min words", 1, 100, 20)
-        with col2:
-            max_words = st.number_input("Max words", min_words, 100, 50)
-        # Few-shot examples
-        use_few_shot = st.toggle("Use few-shot examples")
-        few_shot_examples = []
-        if use_few_shot:
-            num_examples = st.slider("Number of few-shot examples", 1, 10, 1)
-            for i in range(num_examples):
-                with st.expander(f"Example {i+1}"):
-                    content = st.text_area(f"Content", key=f"few_shot_content_{i}")
-                    label = st.selectbox(f"Label", labels, key=f"few_shot_label_{i}")
-                    if content and label:
-                        few_shot_examples.append({"content": content, "label": label})
-        num_to_generate = st.number_input("Number of examples", 1, 200, 10)
-        #sytem role after
-         # System role customization
-        default_system_role = f"You are a professional {classification_type} expert, your role is to generate text examples for {domain} domain. Always generate unique diverse examples and do not repeat the generated data. The generated text should be between {min_words} to {max_words} words long."
-        system_role = st.text_area("Modify System Role (optional)",
-                                value=default_system_role,
-                                key="system_role_input")
-        st.session_state['system_role'] = system_role if system_role else default_system_role
-        # Labels initialization
-        #labels = []
-        user_prompt = st.text_area("User Prompt (optional)")
-        # Updated prompt template including system role
-        prompt_template = PromptTemplate(
-            input_variables=["system_role", "classification_type", "domain", "num_examples",
-                           "min_words", "max_words", "labels", "user_prompt", "few_shot_examples"],
-            template=(
-                "{system_role}\n"
-                "- Use the following parameters:\n"
-                "- Generate {num_examples} examples\n"
-                "- Each example should be between {min_words} to {max_words} words long\n"
-                #"- Word range: {min_words} - {max_words} words\n "
-                "- Use these labels: {labels}.\n"
-                "- Generate the examples in this format: 'Example text. Label: label'\n"
-                "- Do not include word counts or any additional information\n"
-                "- Always use your creativity and intelligence to generate unique and diverse text data\n"
-                "- Write unique examples every time.\n"
-                "- DO NOT REPEAT your gnerated text. \n"
-                "- For each Output, describe it once and move to the next.\n"
-                "- List each Output only once, and avoid repeating details.\n"
-                "- Additional instructions: {user_prompt}\n\n"
-                "- Use the following examples as a reference in the generation process\n\n {few_shot_examples}. \n"
-                "- Think step by step, generate numbered examples, and check each newly generated example to ensure it has not been generated before. If it has, modify it"
-                #"- Think step by step, generate numbered examples and check every new generated example if it is generated before and change it."
-            )
-        )
-        # Generate system prompt
-        system_prompt = prompt_template.format(
-            system_role=st.session_state['system_role'],
-            classification_type=classification_type,
-            domain=domain,
-            num_examples=num_to_generate,
-            min_words=min_words,
-            max_words=max_words,
-            labels=", ".join(labels),
-            user_prompt=user_prompt,
-            few_shot_examples="\n".join([f"{ex['content']}\nLabel: {ex['label']}" for ex in few_shot_examples]) if few_shot_examples else ""
-        )
-        # Store system prompt in session state
-        st.session_state['system_prompt'] = system_prompt
-        # Display system prompt
-        st.write("System Prompt:")
-        st.text_area("Current System Prompt", value=st.session_state['system_prompt'],
-                    height=400, disabled=True)
-        if st.button("🎯 Generate Examples"):
-            #
-            errors = []
-            if domain_selection == "Custom" and not domain.strip():
-                st.warning("Custom domain name is required.")
-            elif len(labels) != len(set(labels)):
-                st.warning("Class names must be unique.")
-            elif any(not lbl.strip() for lbl in labels):
-                st.warning("All class labels must be filled in.")
-            #else:
-                #st.success("Generating examples for domain: {domain}")
-            #if not custom_domain_valid:
-                #st.warning("Custom domain name is required.")
-            #elif not labels_valid:
-                #st.warning("Please fix the label errors before generating examples.")
-            #else:
-                # Proceed to generate examples
-                #st.success(f"Generating examples for domain: {domain}")
-            with st.spinner("Generating examples..."):
-                try:
-                    stream = client.chat.completions.create(
-                        model=selected_model,
-                        messages=[{"role": "system", "content": st.session_state['system_prompt']}],
-                        temperature=temperature,
-                        stream=True,
-                        max_tokens=80000,
-                        top_p=0.9,
-                       # repetition_penalty=1.2,
-                        #frequency_penalty=0.5,      # Discourages frequent words
-                        #presence_penalty=0.6,
-                    )
- #st.session_state['system_prompt'] = system_prompt
-                    #new 24 march
-                    st.session_state.messages.append({"role": "user", "content": system_prompt})
-                 # # ####################
-                    response = st.write_stream(stream)
-                    st.session_state.messages.append({"role": "assistant", "content": response})
-                        # Initialize session state variables if they don't exist
-                    if 'system_prompt' not in st.session_state:
-                        st.session_state.system_prompt = system_prompt
-                    if 'response' not in st.session_state:
-                        st.session_state.response = response
-                    if 'generated_examples' not in st.session_state:
-                        st.session_state.generated_examples = []
-                    if 'generated_examples_csv' not in st.session_state:
-                        st.session_state.generated_examples_csv = None
-                    if 'generated_examples_json' not in st.session_state:
-                        st.session_state.generated_examples_json = None
-                    # Parse response and generate examples list
-                    examples_list = []
-                    for line in response.split('\n'):
-                        if line.strip():
-                            parts = line.rsplit('Label:', 1)
-                            if len(parts) == 2:
-                                text = parts[0].strip()
-                                label = parts[1].strip()
-                                if text and label:
-                                    examples_list.append({
-                                        'text': text,
-                                        'label': label,
-                                        'system_prompt': st.session_state.system_prompt,
-                                        'system_role': st.session_state.system_role,
-                                        'task_type': 'Data Generation',
-                                        'Use few-shot example?': 'Yes' if use_few_shot else 'No',
-                                    })
-                    if examples_list:
-                        # Update session state with new data
-                        st.session_state.generated_examples = examples_list
-                        # Generate CSV and JSON data
-                        df = pd.DataFrame(examples_list)
-                        st.session_state.generated_examples_csv = df.to_csv(index=False).encode('utf-8')
-                        st.session_state.generated_examples_json = json.dumps(examples_list, indent=2).encode('utf-8')
-                       # Vertical layout with centered "or" between buttons
-                        st.download_button(
-                            "📥 Download Generated Examples (CSV)",
-                            st.session_state.generated_examples_csv,
-                            "generated_examples.csv",
-                            "text/csv",
-                            key='download-csv-persistent'
-                        )
-                        # Add space and center the "or"
-                        st.markdown("""
-                        <div style='text-align: left; margin:15px 0; font-weight: 600; color: #666;'>. . . . . .         or</div>
-                        """, unsafe_allow_html=True)
-                        st.download_button(
-                            "📥 Download Generated Examples (JSON)",
-                            st.session_state.generated_examples_json,
-                            "generated_examples.json",
-                            "application/json",
-                            key='download-json-persistent'
-                        )
-                        # # Display the labeled examples
-                        # st.markdown("##### 📋 Labeled Examples Preview")
-                        # st.dataframe(df, use_container_width=True)
-                    if st.button("Continue"):
-                        if follow_up == "Generate more examples":
-                            st.experimental_rerun()
-                        elif follow_up == "Data Labeling":
-                            st.session_state.task_choice = "Data Labeling"
-                            st.experimental_rerun()
-                except Exception as e:
-                    st.error("An error occurred during generation.")
-                    st.error(f"Details: {e}")
-# Lableing Process
-    elif st.session_state.task_choice == "Data Labeling":
-        st.header("🏷️ Data Labeling")
-        #new new new
-        # 1. Domain selection
-        # 1. Domain selection
-        domain_selection = st.selectbox("Domain", ["Restaurant reviews", "E-Commerce reviews", "News", "AG News", "Tourism", "Custom"])
-            # 2. Handle custom domain input
-        custom_domain_valid = True  # Assume valid until proven otherwise
-        if domain_selection == "Custom":
-            domain = st.text_input("Specify custom domain")
-            if not domain.strip():
-                st.error("Please specify a domain name.")
-                custom_domain_valid = False
-        else:
-            domain = domain_selection
-        # # Classification type selection
-        # classification_type = st.selectbox(
-        #     "Classification Type",
-        #     ["Sentiment Analysis", "Binary Classification", "Multi-Class Classification"]
-        # )
-    #NNew edit
-        # classification_type = st.selectbox(
-        #     "Classification Type",
-        #     #["Sentiment Analysis", "Binary Classification", "Multi-Class Classification", "Named Entity Recognition (NER)"],
-        #     ["Sentiment Analysis", "Binary Classification", "Multi-Class Classification"],
-        #     key="label_class_type"
-        # )
-        # Classification type selection
-        classification_type = st.selectbox(
-            "Classification Type",
-            ["Sentiment Analysis", "Binary Classification", "Multi-Class Classification", "Named Entity Recognition (NER)"]
-        )
-#NNew edit
-        # Labels setup based on classification type
-        labels = []
-        labels_valid = False
-        errors = []
-        if classification_type == "Sentiment Analysis":
-            st.write("### Sentiment Analysis Labels (Fixed)")
-            col1, col2, col3 = st.columns(3)
-            with col1:
-                label_1 = st.text_input("First class", "Positive", disabled=True)
-            with col2:
-                label_2 = st.text_input("Second class", "Negative", disabled=True)
-            with col3:
-                label_3 = st.text_input("Third class", "Neutral", disabled=True)
-            labels = ["Positive", "Negative", "Neutral"]
-        elif classification_type == "Binary Classification":
-            st.write("### Binary Classification Labels")
-            col1, col2 = st.columns(2)
-            with col1:
-                label_1 = st.text_input("First class", "Positive")
-            with col2:
-                label_2 = st.text_input("Second class", "Negative")
-            errors = []
-            labels = [label_1.strip(), label_2.strip()]
-            # Strip and lower-case labels for validation
-            label_1 = labels[0].strip()
-            label_2 = labels[1].strip()
-            # Check for empty class names
-            if not label_1:
-                errors.append("First class name is required.")
-            if not label_2:
-                errors.append("Second class name is required.")
-            # Check for duplicates (case insensitive)
-            if label_1.lower() == label_2.lower() and label_1 and label_2:
-                errors.append("Class names must be different.")
-            # Show errors or success
-            if errors:
-                for error in errors:
-                    st.error(error)
-            else:
-                st.success("Binary class names are valid and unique!")
-        elif classification_type == "Multi-Class Classification":
-                st.write("### Multi-Class Classification Labels")
-                default_labels_by_domain = {
-                    "News": ["Political", "Sports", "Entertainment", "Technology", "Business"],
-                    "AG News": ["World", "Sports", "Business", "Sci/Tech"],
-                    "Tourism": ["Accommodation", "Transportation", "Tourist Attractions",
-                                "Food & Dining", "Local Experience", "Adventure Activities",
-                                "Wellness & Spa", "Eco-Friendly Practices", "Family-Friendly",
-                                "Luxury Tourism"],
-                    "Restaurant reviews": ["Italian", "French", "American"],
-                    "E-Commerce reviews": ["Mobile Phones & Accessories", "Laptops & Computers","Kitchen & Dining",
-                                       "Beauty & Personal Care", "Home & Furniture", "Clothing & Fashion",
-                                      "Shoes & Handbags", "Health & Wellness", "Electronics & Gadgets",
-                                       "Books & Stationery","Toys & Games", "Sports & Fitness",
-                                       "Grocery & Gourmet Food","Watches & Accessories", "Baby Products"]
-            }
-                # Ask user how many classes they want to define
-                num_classes = st.slider("Select the number of classes (labels)", min_value=3, max_value=10, value=3)
-                # Use default labels based on selected domain, if available
-                defaults = default_labels_by_domain.get(domain, [])
-                labels = []
-                errors = []
-                cols = st.columns(3)  # For nicely arranged label inputs
-                for i in range(num_classes):
-                    with cols[i % 3]:  # Distribute inputs across columns
-                        default_value = defaults[i] if i < len(defaults) else ""
-                        label_input = st.text_input(f"Label {i + 1}", default_value)
-                        normalized_label = label_input.strip().title()
-                        if not normalized_label:
-                            errors.append(f"Label {i + 1} is required.")
-                        else:
-                            labels.append(normalized_label)
-                # Check for duplicates (case-insensitive)
-                normalized_set = {label.lower() for label in labels}
-                if len(labels) != len(normalized_set):
-                    errors.append("Label names must be unique (case-insensitive).")
-                # Show validation results
-                if errors:
-                    for error in errors:
-                        st.error(error)
-                else:
-                    st.success("All label names are valid and unique!")
-                labels_valid = not errors  # True if no validation errors
-        elif classification_type == "Named Entity Recognition (NER)":
-            # NER entity options
-            ner_entities = [
-                "PERSON - Names of people, fictional characters, historical figures",
-                "ORG - Companies, institutions, agencies, teams",
-                "LOC - Physical locations (mountains, oceans, etc.)",
-                "GPE - Countries, cities, states, political regions",
-                "DATE - Calendar dates, years, centuries",
-                "TIME - Times, durations",
-                "MONEY - Monetary values with currency"
-            ]
-            selected_entities = st.multiselect(
-                 "Select entities to recognize",
-                ner_entities,
-                default=["PERSON - Names of people, fictional characters, historical figures",
-                         "ORG - Companies, institutions, agencies, teams",
-                        "LOC - Physical locations (mountains, oceans, etc.)",
-                        "GPE - Countries, cities, states, political regions",
-                        "DATE - Calendar dates, years, centuries",
-                        "TIME - Times, durations",
-                        "MONEY - Monetary values with currency"],
-                        key="ner_entity_selection"
-            )
-            # Extract just the entity type (before the dash)
-            labels = [entity.split(" - ")[0] for entity in selected_entities]
-            if not labels:
-                st.warning("Please select at least one entity type")
-                labels = ["PERSON"]  # Default if nothing selected
-    #NNew edit
-            # elif classification_type == "Multi-Class Classification":
-            #     st.write("### Multi-Class Classification Labels")
-            #     default_labels_by_domain = {
-            #         "News": ["Political", "Sports", "Entertainment", "Technology", "Business"],
-            #         "AG News": ["World", "Sports", "Business", "Sci/Tech"],
-            #         "Tourism": ["Accommodation", "Transportation", "Tourist Attractions",
-            #                     "Food & Dining", "Local Experience", "Adventure Activities",
-            #                     "Wellness & Spa", "Eco-Friendly Practices", "Family-Friendly",
-            #                     "Luxury Tourism"],
-            #         "Restaurant reviews": ["Italian", "French", "American"]
-            #     }
-            #     num_classes = st.slider("Number of classes", 3, 10, 3)
-            #     # Get defaults for selected domain, or empty list
-            #     defaults = default_labels_by_domain.get(domain, [])
-            #     labels = []
-            #     errors = []
-            #     cols = st.columns(3)
-            #     for i in range(num_classes):
-            #         with cols[i % 3]:
-            #             default_value = defaults[i] if i < len(defaults) else ""
-            #             label_input = st.text_input(f"Class {i+1}", default_value)
-            #             normalized_label = label_input.strip().title()
-            #             if not normalized_label:
-            #                 errors.append(f"Class {i+1} name is required.")
-            #             else:
-            #                 labels.append(normalized_label)
-            #     # Check for duplicates (case-insensitive)
-            #     if len(labels) != len(set(labels)):
-                #     errors.append("Labels names must be unique (case-insensitive, normalized to Title Case).")
-                # # Show validation results
-                # if errors:
-                #     for error in errors:
-                #         st.error(error)
-                # else:
-                #     st.success("All Labels names are valid and unique!")
-                # labels_valid = not errors  # Will be True only if there are no label errors
-        # else:
-        #     num_classes = st.slider("Number of classes", 3, 23, 3, key="label_num_classes")
-        #     labels = []
-        #     cols = st.columns(3)
-        #     for i in range(num_classes):
-        #         with cols[i % 3]:
-        #             label = st.text_input(f"Class {i+1}", f"Class_{i+1}", key=f"label_class_{i}")
-        #             labels.append(label)
-        use_few_shot = st.toggle("Use few-shot examples for labeling")
-        few_shot_examples = []
-        if use_few_shot:
-            num_few_shot = st.slider("Number of few-shot examples", 1, 10, 1)
-            for i in range(num_few_shot):
-                with st.expander(f"Few-shot Example {i+1}"):
-                    content = st.text_area(f"Content", key=f"label_few_shot_content_{i}")
-                    label = st.selectbox(f"Label", labels, key=f"label_few_shot_label_{i}")
-                    if content and label:
-                        few_shot_examples.append(f"{content}\nLabel: {label}")
-        num_examples = st.number_input("Number of examples to classify", 1, 100, 1)
-        examples_to_classify = []
-        if num_examples <= 20:
-            for i in range(num_examples):
-                example = st.text_area(f"Example {i+1}", key=f"example_{i}")
-                if example:
-                    examples_to_classify.append(example)
-        else:
-            examples_text = st.text_area(
-                "Enter examples (one per line)",
-                height=300,
-                help="Enter each example on a new line"
-            )
-            if examples_text:
-                examples_to_classify = [ex.strip() for ex in examples_text.split('\n') if ex.strip()]
-                if len(examples_to_classify) > num_examples:
-                    examples_to_classify = examples_to_classify[:num_examples]
-        #New Wedyan
-        default_system_role = f"You are a professional {classification_type} expert, your role is to classify the provided text examples for {domain} domain."
-        system_role = st.text_area("Modify System Role (optional)",
-                                value=default_system_role,
-                                key="system_role_input")
-        st.session_state['system_role'] = system_role if system_role else default_system_role
-        # Labels initialization
-        #labels = []
-        ####
-        user_prompt = st.text_area("User prompt (optional)", key="label_instructions")
-        few_shot_text = "\n\n".join(few_shot_examples) if few_shot_examples else ""
-        examples_text = "\n".join([f"{i+1}. {ex}" for i, ex in enumerate(examples_to_classify)])
-        # Customize prompt template based on classification type
-        if classification_type == "Named Entity Recognition (NER)":
-            label_prompt_template = PromptTemplate(
-                input_variables=["system_role", "labels", "few_shot_examples", "examples", "domain", "user_prompt"],
-                template=(
-                    "{system_role}\n"
-                    #"- You are a professional Named Entity Recognition (NER) expert in {domain} domain. Your role is to identify and extract the following entity types: {labels}.\n"
-                    "- For each text example provided, identify all entities of the requested types.\n"
-                    "- Use the following entities: {labels}.\n"
-                    "- Return each example followed by the entities you found in this format: 'Example text.\n Entities: [ENTITY_TYPE: entity text\n, ENTITY_TYPE: entity text\n, ...] or [No entities found]'\n"
-                    "- If no entities of the requested types are found, indicate 'No entities found' in this text.\n"
-                    "- Be precise about entity boundaries - don't include unnecessary words.\n"
-                    "- Do not provide any additional information or explanations.\n"
-                    "- Additional instructions:\n {user_prompt}\n\n"
-                    "- Use user few-shot examples as guidance if provided:\n{few_shot_examples}\n\n"
-                    "- Examples to analyze:\n{examples}\n\n"
-                    "Output:\n"
-                )
-            )
-        else:
-                label_prompt_template = PromptTemplate(
-                input_variables=["system_role", "classification_type", "labels", "few_shot_examples", "examples","domain", "user_prompt"],
-                template=(
-                    #"- Let'\s think step by step:"
-                    "{system_role}\n"
-                   # "- You are a professional {classification_type} expert in {domain} domain. Your role is to classify the following examples using these labels: {labels}.\n"
-                    "- Use the following instructions:\n"
-                    "- Use the following labels: {labels}.\n"
-                    "- Return the classified text followed by the label in this format: 'text. Label: [label]'\n"
-                    "- Do not provide any additional information or explanations\n"
-                    "- User prompt:\n {user_prompt}\n\n"
-                    "- Use user provided examples as guidence in the classification process:\n\n {few_shot_examples}\n"
-                    "- Examples to classify:\n{examples}\n\n"
-                    "- Think step by step then classify the examples"
-                    #"Output:\n"
-                ))
-        # Check if few_shot_examples is already a formatted string
-           # Check if few_shot_examples is already a formatted string
-        if isinstance(few_shot_examples, str):
-            formatted_few_shot = few_shot_examples
-        # If it's a list of already formatted strings
-        elif isinstance(few_shot_examples, list) and all(isinstance(ex, str) for ex in few_shot_examples):
-            formatted_few_shot = "\n".join(few_shot_examples)
-        # If it's a list of dictionaries with 'content' and 'label' keys
-        elif isinstance(few_shot_examples, list) and all(isinstance(ex, dict) and 'content' in ex and 'label' in ex for ex in few_shot_examples):
-            formatted_few_shot = "\n".join([f"{ex['content']}\nLabel: {ex['label']}" for ex in few_shot_examples])
-        else:
-            formatted_few_shot = ""
-        system_prompt = label_prompt_template.format(
-            system_role=st.session_state['system_role'],
-            classification_type=classification_type,
-            domain=domain,
-            examples="\n".join(examples_to_classify),
-            labels=", ".join(labels),
-            user_prompt=user_prompt,
-            few_shot_examples=formatted_few_shot
-        )
-        # Step 2: Store the system_prompt in st.session_state
-        st.session_state['system_prompt'] = system_prompt
-#::contentReference[oaicite:0]{index=0}
-        st.write("System Prompt:")
-        #st.code(system_prompt)
-        #st.code(st.session_state['system_prompt'])
-        st.text_area("System Prompt", value=st.session_state['system_prompt'], height=300, max_chars=None, key=None, help=None, disabled=True)
-        if st.button("🏷️ Label Data"):
-            if examples_to_classify:
-                with st.spinner("Labeling data..."):
-                    # Generate the system prompt based on classification type
-                    if classification_type == "Named Entity Recognition (NER)":
-                        system_prompt = label_prompt_template.format(
-                            system_role=st.session_state['system_role'],
-                            labels=", ".join(labels),
-                            domain = domain,
-                            few_shot_examples=few_shot_text,
-                            examples=examples_text,
-                            user_prompt=user_prompt
-                        )
-                    else:
-                        system_prompt = label_prompt_template.format(
-                            classification_type=classification_type,
-                            system_role=st.session_state['system_role'],
-                            domain = domain,
-                            labels=", ".join(labels),
-                            few_shot_examples=few_shot_text,
-                            examples=examples_text,
-                            user_prompt=user_prompt
-                        )
-                    try:
-                        stream = client.chat.completions.create(
-                            model=selected_model,
-                            messages=[{"role": "system", "content": system_prompt}],
-                            temperature=temperature,
-                            stream=True,
-                            max_tokens=20000,
-                            top_p = 0.9,
-                        )
-                        #new 24 March
-                        # Append user message
-                        st.session_state.messages.append({"role": "user", "content": system_prompt})
-                        #################
-                        response = st.write_stream(stream)
-                        st.session_state.messages.append({"role": "assistant", "content": response})
-                         # Display the labeled examples
-                        #    # Optional: If you want to add it as a chat-style message log
-                        # preview_str = st.session_state.labeled_preview.to_markdown(index=False)
-                        # st.session_state.messages.append({"role": "assistant", "content": f"Here is a preview of the labeled examples:\n\n{preview_str}"})
-                        # # Stream response and append assistant message
-                        # #14/4/2024
-                        # response = st.write_stream(stream)
-                        # st.session_state.messages.append({"role": "assistant", "content": response})
-                          # Initialize session state variables if they don't exist
-                        if 'system_prompt' not in st.session_state:
-                            st.session_state.system_prompt = system_prompt
-                        if 'response' not in st.session_state:
-                            st.session_state.response = response
-                        if 'generated_examples' not in st.session_state:
-                            st.session_state.generated_examples = []
-                        if 'generated_examples_csv' not in st.session_state:
-                            st.session_state.generated_examples_csv = None
-                        if 'generated_examples_json' not in st.session_state:
-                            st.session_state.generated_examples_json = None
-                        # Save labeled examples to CSV
-                        #new 14/4/2025
-                        labeled_examples = []
-                        if classification_type == "Named Entity Recognition (NER)":
-                            labeled_examples = []
-                            for line in response.split('\n'):
-                                if line.strip():
-                                    parts = line.rsplit('Entities:', 1)
-                                    if len(parts) == 2:
-                                        text = parts[0].strip()
-                                        entities = parts[1].strip()
-                                        if text and entities:
-                                            labeled_examples.append({
-                                                'text': text,
-                                                'entities': entities,
-                                                'system_prompt': st.session_state.system_prompt,
-                                                'system_role': st.session_state.system_role,
-                                                'task_type': 'Named Entity Recognition (NER)',
-                                                'Use few-shot example?': 'Yes' if use_few_shot else 'No',
-                                            })
-                        else:
-                            labeled_examples = []
-                            for line in response.split('\n'):
-                                if line.strip():
-                                    parts = line.rsplit('Label:', 1)
-                                    if len(parts) == 2:
-                                        text = parts[0].strip()
-                                        label = parts[1].strip()
-                                        if text and label:
-                                            labeled_examples.append({
-                                                'text': text,
-                                                'label': label,
-                                                'system_prompt': st.session_state.system_prompt,
-                                                'system_role': st.session_state.system_role,
-                                                'task_type': 'Data Labeling',
-                                                'Use few-shot example?': 'Yes' if use_few_shot else 'No',
-                                            })
-                       # Save and provide download options
-                        if labeled_examples:
-                            # Update session state
-                            st.session_state.labeled_examples = labeled_examples
-                            # Convert to CSV and JSON
-                            df = pd.DataFrame(labeled_examples)
-                            st.session_state.labeled_examples_csv = df.to_csv(index=False).encode('utf-8')
-                            st.session_state.labeled_examples_json = json.dumps(labeled_examples, indent=2).encode('utf-8')
-                            # Download buttons
-                            st.download_button(
-                                "📥 Download Labeled Examples (CSV)",
-                                st.session_state.labeled_examples_csv,
-                                "labeled_examples.csv",
-                                "text/csv",
-                                key='download-labeled-csv'
-                            )
-                            st.markdown("""
-                            <div style='text-align: left; margin:15px 0; font-weight: 600; color: #666;'>. . . . . .         or</div>
-                            """, unsafe_allow_html=True)
-                            st.download_button(
-                                "📥 Download Labeled Examples (JSON)",
-                                st.session_state.labeled_examples_json,
-                                "labeled_examples.json",
-                                "application/json",
-                                key='download-labeled-json'
-                            )
-                            # Display the labeled examples
-                            st.markdown("##### 📋 Labeled Examples Preview")
-                            st.dataframe(df, use_container_width=True)
-                            # Display section
-                            #st.markdown("### 📋 Labeled Examples Preview")
-                            #st.dataframe(st.session_state.labeled_preview, use_container_width=True)
-                        # if labeled_examples:
-                        #     df = pd.DataFrame(labeled_examples)
-                        #     csv = df.to_csv(index=False).encode('utf-8')
-                        #     st.download_button(
-                        #         "📥 Download Labeled Examples",
-                        #         csv,
-                        #         "labeled_examples.csv",
-                        #         "text/csv",
-                        #         key='download-labeled-csv'
-                        #     )
-                        # # Add space and center the "or"
-                        # st.markdown("""
-                        # <div style='text-align: left; margin:15px 0; font-weight: 600; color: #666;'>. . . . . .         or</div>
-                        # """, unsafe_allow_html=True)
-                        # if labeled_examples:
-                        #     df = pd.DataFrame(labeled_examples)
-                        #     csv = df.to_csv(index=False).encode('utf-8')
-                        #     st.download_button(
-                        #         "📥 Download Labeled Examples",
-                        #         csv,
-                        #         "labeled_examples.json",
-                        #         "text/json",
-                        #         key='download-labeled-JSON'
-                        #     )
-                        # Add follow-up interaction options
-                        #st.markdown("---")
-                        #follow_up = st.radio(
-                            #"What would you like to do next?",
-                             #["Label more data", "Data Generation"],
-                           # key="labeling_follow_up"
-                      #  )
-                        if st.button("Continue"):
-                            if follow_up == "Label more data":
-                                st.session_state.examples_to_classify = []
-                                st.experimental_rerun()
-                            elif follow_up == "Data Generation":
-                                st.session_state.task_choice = "Data Labeling"
-                                st.experimental_rerun()
-                    except Exception as e:
-                        st.error("An error occurred during labeling.")
-                        st.error(f"Details: {e}")
-            else:
-                st.warning("Please enter at least one example to classify.")
-    #st.session_state.messages.append({"role": "assistant", "content": response})
-# Footer
-st.markdown("---")
-st.markdown(
-    """
-    <div style='text-align: center'>
-        <p>Made with ❤️ by Wedyan AlSakran 2025</p>
-    </div>
-    """,
-    unsafe_allow_html=True
-)