Spaces:
Runtime error
Runtime error
File size: 4,755 Bytes
5a359a3 0ee26ac 5a359a3 0ee26ac 5a359a3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
# -*- coding: utf-8 -*-
"""Skill Transformation Journey.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/11XAXUP2fzy553V9v0x-gxJXcXL3uHJcw
"""
# Commented out IPython magic to ensure Python compatibility.
%pip install openai
# Commented out IPython magic to ensure Python compatibility.
%pip install gradio
import gradio as gr
import re
import openai
from openai import OpenAI
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
client = OpenAI(
# defaults to os.environ.get("OPENAI_API_KEY")
api_key="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",
)
# Specify the sheet name in the Excel file
excel_file_path = "/content/drive/MyDrive/Skill Transformation Journey/AI Integrations in SWAYAM_V1.0.xlsx"
sheet_name = "Shortlisted Courses" # Replace with the actual sheet name
# Read the Excel file into a Pandas DataFrame
courses_df = pd.read_excel(excel_file_path, sheet_name=sheet_name)
# Function to recommend courses based on user input using GPT and TF-IDF
def recommend_courses(user_skill, ed_qual):#, #primary_skill_1):
# Combine user's input into a single string for TF-IDF
user_input = f"{user_skill} {ed_qual}"
# Use TF-IDF and cosine similarity for initial course recommendations
user_vector = tfidf_vectorizer.transform([user_input])
cosine_similarities = linear_kernel(user_vector, tfidf_matrix)
# Get initial course recommendations based on similarity scores
recommendations = courses_df.copy()
recommendations['Similarity'] = cosine_similarities[0]
# Sort by similarity and get top recommendations
top_recommendations = recommendations.sort_values(by='Similarity', ascending=False).head(5)
# Generate a text summary of the initial recommendations
initial_recommendations_text = top_recommendations[['Course Name', 'Course Link']].to_string(index=False)
# Combine user input and initial recommendations for GPT prompt
gpt_prompt = f"Refine the following course recommendations based on user input:\n\n"\
f"{user_input}\n\n"\
f"Initial Recommendations:\n\n"\
f"{initial_recommendations_text}"
# Use GPT to generate refined recommendations
gpt_response = client.chat.completions.create(
messages=[
{
"role": "system",
"content": gpt_prompt,
}
],
model="gpt-3.5-turbo",
max_tokens=1000
)
# Assume GPT generates HTML-formatted final recommendations
gpt_content = gpt_response.choices[0].message.content #gpt_response['choices'][0]['text']
# Assume GPT generates HTML-formatted final recommendations
final_recommendations_html = html_coversion(gpt_content)
return final_recommendations_html
# Create a TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(courses_df['Course Name'].fillna(''))
user_skill = "psychology"
ed_qual = "B.Tech/B.Sc"
html = recommend_courses(user_skill, ed_qual)
html
def html_coversion(gpt_content):
# Provided data in text format
data_text = gpt_content
# Extract course details using a modified regular expression
courses = re.findall(r'(\d+)\. (.*?):\n\s*- Course Link: \[([^\]]+)\]\(([^)]+)\)\n\s*- Description: ([^\n]+)', data_text)
# Process each tuple to remove the second occurrence of the course link
processed_courses = []
for course_tuple in courses:
# Find the index of the second occurrence of the course link
index_of_second_occurrence = course_tuple.index(course_tuple[2], course_tuple.index(course_tuple[2]) + 1)
# Remove the second occurrence of the course link from the tuple
processed_tuple = course_tuple[:index_of_second_occurrence] + course_tuple[index_of_second_occurrence + 1:]
processed_courses.append(processed_tuple)
# Convert the processed list of tuples into a DataFrame
df = pd.DataFrame(processed_courses, columns=['Sr No', 'Course Name', 'Course Link', 'Description'])
# Convert the DataFrame to an HTML table
html_table = df.to_html(index=False, escape=False)
# Print or save the HTML table
return html_table
# Gradio Interface with dynamically generated dropdown options
iface = gr.Interface(
fn=recommend_courses,
inputs=[
gr.Textbox("text", label="Enter expected skill"),
gr.Dropdown(["B.Tech/B.Sc", "M.Tech/M.Sc", "Management"], label="Highest Educational Qualification"),
],
outputs="html",
live=True
)
# Launch the Gradio interface and save the output to an HTML file
iface.launch(share=True)
|