File size: 10,170 Bytes
cb817b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4ac2b6b
 
 
 
 
 
 
 
 
 
 
 
 
cb817b6
 
4ac2b6b
 
 
cb817b6
 
 
4ac2b6b
 
 
 
 
 
cb817b6
3d5ae02
4ac2b6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb817b6
4ac2b6b
 
cb817b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4ac2b6b
 
f9ab1fa
 
4ac2b6b
cb817b6
 
 
 
3d5ae02
cb817b6
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
import streamlit as st
from datasets import load_dataset
import pandas as pd
from transformers import pipeline
import time

# Constants
universities_url = "https://www.4icu.org/top-universities-world/"

# Load datasets with caching to optimize performance
@st.cache_resource
def load_datasets():
    ds_jobs = load_dataset("lukebarousse/data_jobs")
    ds_courses = load_dataset("azrai99/coursera-course-dataset")
    ds_custom_courses = pd.read_csv("final_cleaned_merged_coursera_courses.csv")
    ds_custom_jobs = pd.read_csv("merged_data_science_jobs.csv")
    ds_custom_universities = pd.read_csv("merged_university_data_cleaned (1).csv")
    return ds_jobs, ds_courses, ds_custom_courses, ds_custom_jobs, ds_custom_universities

ds_jobs, ds_courses, ds_custom_courses, ds_custom_jobs, ds_custom_universities = load_datasets()

# Initialize the pipeline with caching, using an accessible model like 'google/flan-t5-large'
@st.cache_resource
def load_pipeline():
    return pipeline("text2text-generation", model="google/flan-t5-large")

qa_pipeline = load_pipeline()

# Streamlit App Interface
st.title("Career Counseling Application")
st.subheader("Build Your Profile and Discover Tailored Career Recommendations")

# Sidebar for Profile Setup
st.sidebar.header("Profile Setup")
educational_background = st.sidebar.text_input("Educational Background (e.g., Degree, Major)")
interests = st.sidebar.text_input("Interests (e.g., AI, Data Science, Engineering)")
tech_skills = st.sidebar.text_area("Technical Skills (e.g., Python, SQL, Machine Learning)")
soft_skills = st.sidebar.text_area("Soft Skills (e.g., Communication, Teamwork)")

# Save profile data for session-based recommendations
if st.sidebar.button("Save Profile"):
    with st.spinner('Saving your profile...'):
        time.sleep(2)  # Simulate processing time
        st.session_state.profile_data = {
            "educational_background": educational_background,
            "interests": interests,
            "tech_skills": tech_skills,
            "soft_skills": soft_skills
        }
        st.session_state.question_index = 0  # Initialize question index
        st.session_state.answers = {}  # Initialize dictionary for answers
        st.session_state.submitted = False  # Track if an answer was just submitted
        st.sidebar.success("Profile saved successfully!")

        st.write("To provide more personalized job and course recommendations, please answer the following questions one by one:")

# Additional questions for more tailored recommendations
additional_questions = [
    "What industry do you prefer working in (e.g., healthcare, finance, tech)?",
    "What type of job role are you most interested in (e.g., research, management, development)?",
    "Are you looking for remote, hybrid, or on-site opportunities?",
    "Do you have any certifications or licenses related to your field?",
    "What level of experience do you have (e.g., entry-level, mid-level, senior)?",
    "What languages are you proficient in, apart from English (if any)?",
    "Do you prefer working for startups, mid-sized companies, or large corporations?",
    "What is your preferred learning style for courses (e.g., video tutorials, interactive projects, reading material)?",
    "Are you open to relocation? If yes, to which cities or regions?",
    "Do you have a preference for job roles in specific countries or regions?"
]

# Display questions one by one after the profile is saved
if "profile_data" in st.session_state and "question_index" in st.session_state:
    total_questions = len(additional_questions)
    
    # Show progress bar
    progress = (st.session_state.question_index / total_questions) * 100
    st.progress(int(progress))
    st.write(f"Progress: {st.session_state.question_index} out of {total_questions} questions answered")

    if st.session_state.question_index < total_questions:
        question = additional_questions[st.session_state.question_index]
        answer = st.text_input(question, key=f"q{st.session_state.question_index}")

        # Submit button for each question
        if st.button("Submit Answer", key=f"submit{st.session_state.question_index}"):
            if answer:
                st.session_state.answers[question] = answer
                st.session_state.question_index += 1
                # Trigger page refresh using st.session_state change
                st.session_state.updated = True  # Indicate that a change has occurred
                st.query_params = {"updated": "true"}  # Update query params to indicate change
            else:
                st.warning("Please enter an answer before submitting.")

    # Check if all questions are answered and show the "Generate Response" button
    if st.session_state.question_index == total_questions:
        st.success("All questions have been answered. Click below to generate your recommendations.")
        if st.button("Generate Response"):
            # Save all answers in the profile data
            st.session_state.profile_data.update(st.session_state.answers)

            # Career and Job Recommendations Section
            st.header("Job Recommendations")
            with st.spinner('Generating job recommendations...'):
                time.sleep(2)  # Simulate processing time
                job_recommendations = []

                # Find jobs from ds_jobs
                for job in ds_jobs["train"]:
                    job_title = job.get("job_title_short", "Unknown Job Title")
                    job_skills = job.get("job_skills", "") or ""
                    if any(skill.lower() in job_skills.lower() for skill in st.session_state.profile_data["tech_skills"].split(",")):
                        job_recommendations.append(job_title)

                # Find jobs from ds_custom_jobs
                for _, job in ds_custom_jobs.iterrows():
                    job_title = job.get("job_title", "Unknown Job Title")
                    job_skills = job.get("skills", "") or ""
                    if any(skill.lower() in job_skills.lower() for skill in st.session_state.profile_data["tech_skills"].split(",")):
                        job_recommendations.append(job_title)

                # Remove duplicates and keep the unique job titles
                job_recommendations = list(set(job_recommendations))

                if job_recommendations:
                    st.subheader("Based on your profile, here are some potential job roles:")
                    for job in job_recommendations[:5]:  # Limit to top 5 job recommendations
                        st.write("- ", job)
                else:
                    st.write("No specific job recommendations found matching your profile. Here are some general recommendations:")
                    for job in ["Data Analyst", "Software Engineer", "Project Manager", "Research Scientist", "Business Analyst"][:5]:
                        st.write("- ", job)

            # Course Suggestions Section
            st.header("Recommended Courses")
            with st.spinner('Finding courses related to your profile...'):
                time.sleep(2)  # Simulate processing time
                course_recommendations = []

                # Find relevant courses in ds_courses
                for course in ds_courses["train"]:
                    if any(interest.lower() in course.get("Course Name", "").lower() for interest in st.session_state.profile_data["interests"].split(",")):
                        course_recommendations.append({
                            "name": course.get("Course Name", "Unknown Course Title"),
                            "url": course.get("Links", "#")
                        })

                # Find relevant courses in ds_custom_courses
                for _, row in ds_custom_courses.iterrows():
                    if any(interest.lower() in row["Course Name"].lower() for interest in st.session_state.profile_data["interests"].split(",")):
                        course_recommendations.append({
                            "name": row["Course Name"],
                            "url": row.get("Links", "#")
                        })

                # Remove duplicates from course recommendations by converting to a set of tuples and back to a list
                course_recommendations = list({(course["name"], course["url"]) for course in course_recommendations})

                # If there are fewer than 5 exact matches, add nearly related courses
                if len(course_recommendations) < 5:
                    for course in ds_courses["train"]:
                        if len(course_recommendations) >= 5:
                            break
                        if any(skill.lower() in course.get("Course Name", "").lower() for skill in st.session_state.profile_data["tech_skills"].split(",")):
                            course_recommendations.append((course.get("Course Name", "Unknown Course Title"), course.get("Links", "#")))

                    for _, row in ds_custom_courses.iterrows():
                        if len(course_recommendations) >= 5:
                            break
                        if any(skill.lower() in row["Course Name"].lower() for skill in st.session_state.profile_data["tech_skills"].split(",")):
                            course_recommendations.append((row["Course Name"], row.get("Links", "#")))

                    # Remove duplicates again after adding nearly related courses
                    course_recommendations = list({(name, url) for name, url in course_recommendations})

                if course_recommendations:
                    st.write("Here are the top 5 courses related to your interests:")
                    for course in course_recommendations[:5]:  # Limit to top 5 course recommendations
                        st.write(f"- [{course[0]}]({course[1]})")

# University Recommendations Section
st.header("Top Universities")
st.write("For further education, you can explore the top universities worldwide:")
st.write(f"[View Top Universities Rankings]({universities_url})")

# Conclusion
st.write("Thank you for using the Career Counseling Application!")