Spaces:

skolvankar
/

meraspace

Runtime error

File size: 4,755 Bytes

5a359a3
 
 
 
 
 
 
 
 
 
0ee26ac
5a359a3
 
0ee26ac
5a359a3

# -*- coding: utf-8 -*-
"""Skill Transformation Journey.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/11XAXUP2fzy553V9v0x-gxJXcXL3uHJcw
"""

# Commented out IPython magic to ensure Python compatibility.
%pip install openai

# Commented out IPython magic to ensure Python compatibility.
%pip install gradio

import gradio as gr
import re
import openai
from openai import OpenAI
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

client = OpenAI(
    # defaults to os.environ.get("OPENAI_API_KEY")
    api_key="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",
)

# Specify the sheet name in the Excel file
excel_file_path = "/content/drive/MyDrive/Skill Transformation Journey/AI Integrations in SWAYAM_V1.0.xlsx"
sheet_name = "Shortlisted Courses"  # Replace with the actual sheet name

# Read the Excel file into a Pandas DataFrame
courses_df = pd.read_excel(excel_file_path, sheet_name=sheet_name)

# Function to recommend courses based on user input using GPT and TF-IDF
def recommend_courses(user_skill, ed_qual):#, #primary_skill_1):
    # Combine user's input into a single string for TF-IDF
    user_input = f"{user_skill} {ed_qual}"

    # Use TF-IDF and cosine similarity for initial course recommendations
    user_vector = tfidf_vectorizer.transform([user_input])
    cosine_similarities = linear_kernel(user_vector, tfidf_matrix)

    # Get initial course recommendations based on similarity scores
    recommendations = courses_df.copy()
    recommendations['Similarity'] = cosine_similarities[0]

    # Sort by similarity and get top recommendations
    top_recommendations = recommendations.sort_values(by='Similarity', ascending=False).head(5)

    # Generate a text summary of the initial recommendations
    initial_recommendations_text = top_recommendations[['Course Name', 'Course Link']].to_string(index=False)

    # Combine user input and initial recommendations for GPT prompt
    gpt_prompt = f"Refine the following course recommendations based on user input:\n\n"\
                 f"{user_input}\n\n"\
                 f"Initial Recommendations:\n\n"\
                 f"{initial_recommendations_text}"

    # Use GPT to generate refined recommendations
    gpt_response = client.chat.completions.create(
            messages=[
            {
                "role": "system",
                "content": gpt_prompt,
            }
        ],
        model="gpt-3.5-turbo",
        max_tokens=1000
    )

    # Assume GPT generates HTML-formatted final recommendations
    gpt_content = gpt_response.choices[0].message.content #gpt_response['choices'][0]['text']

    # Assume GPT generates HTML-formatted final recommendations
    final_recommendations_html = html_coversion(gpt_content)

    return final_recommendations_html

# Create a TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(courses_df['Course Name'].fillna(''))

user_skill = "psychology"

ed_qual = "B.Tech/B.Sc"

html = recommend_courses(user_skill, ed_qual)

html

def html_coversion(gpt_content):

  # Provided data in text format
  data_text = gpt_content

  # Extract course details using a modified regular expression
  courses = re.findall(r'(\d+)\. (.*?):\n\s*- Course Link: \[([^\]]+)\]\(([^)]+)\)\n\s*- Description: ([^\n]+)', data_text)

  # Process each tuple to remove the second occurrence of the course link
  processed_courses = []
  for course_tuple in courses:
      # Find the index of the second occurrence of the course link
      index_of_second_occurrence = course_tuple.index(course_tuple[2], course_tuple.index(course_tuple[2]) + 1)
      # Remove the second occurrence of the course link from the tuple
      processed_tuple = course_tuple[:index_of_second_occurrence] + course_tuple[index_of_second_occurrence + 1:]
      processed_courses.append(processed_tuple)

  # Convert the processed list of tuples into a DataFrame
  df = pd.DataFrame(processed_courses, columns=['Sr No', 'Course Name', 'Course Link', 'Description'])

  # Convert the DataFrame to an HTML table
  html_table = df.to_html(index=False, escape=False)

  # Print or save the HTML table
  return html_table

# Gradio Interface with dynamically generated dropdown options
iface = gr.Interface(
    fn=recommend_courses,
    inputs=[
        gr.Textbox("text", label="Enter expected skill"),
        gr.Dropdown(["B.Tech/B.Sc", "M.Tech/M.Sc", "Management"], label="Highest Educational Qualification"),
    ],
    outputs="html",
    live=True
)

# Launch the Gradio interface and save the output to an HTML file
iface.launch(share=True)