File size: 4,755 Bytes
5a359a3
 
 
 
 
 
 
 
 
 
0ee26ac
5a359a3
 
0ee26ac
5a359a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# -*- coding: utf-8 -*-
"""Skill Transformation Journey.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/11XAXUP2fzy553V9v0x-gxJXcXL3uHJcw
"""

# Commented out IPython magic to ensure Python compatibility.
%pip install openai

# Commented out IPython magic to ensure Python compatibility.
%pip install gradio

import gradio as gr
import re
import openai
from openai import OpenAI
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

client = OpenAI(
    # defaults to os.environ.get("OPENAI_API_KEY")
    api_key="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",
)

# Specify the sheet name in the Excel file
excel_file_path = "/content/drive/MyDrive/Skill Transformation Journey/AI Integrations in SWAYAM_V1.0.xlsx"
sheet_name = "Shortlisted Courses"  # Replace with the actual sheet name

# Read the Excel file into a Pandas DataFrame
courses_df = pd.read_excel(excel_file_path, sheet_name=sheet_name)

# Function to recommend courses based on user input using GPT and TF-IDF
def recommend_courses(user_skill, ed_qual):#, #primary_skill_1):
    # Combine user's input into a single string for TF-IDF
    user_input = f"{user_skill} {ed_qual}"

    # Use TF-IDF and cosine similarity for initial course recommendations
    user_vector = tfidf_vectorizer.transform([user_input])
    cosine_similarities = linear_kernel(user_vector, tfidf_matrix)

    # Get initial course recommendations based on similarity scores
    recommendations = courses_df.copy()
    recommendations['Similarity'] = cosine_similarities[0]

    # Sort by similarity and get top recommendations
    top_recommendations = recommendations.sort_values(by='Similarity', ascending=False).head(5)

    # Generate a text summary of the initial recommendations
    initial_recommendations_text = top_recommendations[['Course Name', 'Course Link']].to_string(index=False)

    # Combine user input and initial recommendations for GPT prompt
    gpt_prompt = f"Refine the following course recommendations based on user input:\n\n"\
                 f"{user_input}\n\n"\
                 f"Initial Recommendations:\n\n"\
                 f"{initial_recommendations_text}"

    # Use GPT to generate refined recommendations
    gpt_response = client.chat.completions.create(
            messages=[
            {
                "role": "system",
                "content": gpt_prompt,
            }
        ],
        model="gpt-3.5-turbo",
        max_tokens=1000
    )

    # Assume GPT generates HTML-formatted final recommendations
    gpt_content = gpt_response.choices[0].message.content #gpt_response['choices'][0]['text']

    # Assume GPT generates HTML-formatted final recommendations
    final_recommendations_html = html_coversion(gpt_content)

    return final_recommendations_html

# Create a TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(courses_df['Course Name'].fillna(''))

user_skill = "psychology"

ed_qual = "B.Tech/B.Sc"

html = recommend_courses(user_skill, ed_qual)

html

def html_coversion(gpt_content):

  # Provided data in text format
  data_text = gpt_content

  # Extract course details using a modified regular expression
  courses = re.findall(r'(\d+)\. (.*?):\n\s*- Course Link: \[([^\]]+)\]\(([^)]+)\)\n\s*- Description: ([^\n]+)', data_text)

  # Process each tuple to remove the second occurrence of the course link
  processed_courses = []
  for course_tuple in courses:
      # Find the index of the second occurrence of the course link
      index_of_second_occurrence = course_tuple.index(course_tuple[2], course_tuple.index(course_tuple[2]) + 1)
      # Remove the second occurrence of the course link from the tuple
      processed_tuple = course_tuple[:index_of_second_occurrence] + course_tuple[index_of_second_occurrence + 1:]
      processed_courses.append(processed_tuple)

  # Convert the processed list of tuples into a DataFrame
  df = pd.DataFrame(processed_courses, columns=['Sr No', 'Course Name', 'Course Link', 'Description'])

  # Convert the DataFrame to an HTML table
  html_table = df.to_html(index=False, escape=False)

  # Print or save the HTML table
  return html_table

# Gradio Interface with dynamically generated dropdown options
iface = gr.Interface(
    fn=recommend_courses,
    inputs=[
        gr.Textbox("text", label="Enter expected skill"),
        gr.Dropdown(["B.Tech/B.Sc", "M.Tech/M.Sc", "Management"], label="Highest Educational Qualification"),
    ],
    outputs="html",
    live=True
)

# Launch the Gradio interface and save the output to an HTML file
iface.launch(share=True)