File size: 3,117 Bytes
351b233
 
 
 
 
 
 
 
 
 
 
 
6e14a9e
 
351b233
 
 
 
6e14a9e
 
351b233
 
 
 
 
 
 
 
 
6e14a9e
351b233
 
 
 
 
 
 
 
 
 
 
 
 
6e14a9e
 
351b233
 
 
6e14a9e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
351b233
 
 
 
 
 
 
6e14a9e
351b233
 
6e14a9e
 
351b233
 
 
 
 
 
 
 
 
 
 
6e14a9e
 
 
351b233
6e14a9e
 
8245559
6e14a9e
 
351b233
a04aaa9
 
8245559
a04aaa9
75028e0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import gradio as gr

df=pd.read_csv("mexican_medical_students_mental_health_data.csv")
df.head()
df.info
target=df.iloc[:,19:27].sum(axis=1)
df.insert(43,"gad_total",target)
df.head()
df.nunique() #Checking the number of unique values for primary keys or constants
df.isna().sum()#Missing values 
h_mean=df["height"].mean()
w_mean=df["weight"].mean()
age_mean=df["age"].mean()
g_mode=df["gender"].mode()[0]
r_mode=df["reported_sleep_hours"].mode()[0]
n_mode=df["nap_duration"].mode()[0]
p1=df["phq1"].mode()[0]
p2=df["phq2"].mode()[0]
p3=df["phq3"].mode()[0]
p4=df["phq4"].mode()[0]
p5=df["phq5"].mode()[0]
p6=df["phq6"].mode()[0]
p7=df["phq7"].mode()[0]
p8=df["phq8"].mode()[0]
p9=df["phq9"].mode()[0]
r_mode
df["height"].fillna(h_mean,inplace=True)
df["weight"].fillna(w_mean,inplace=True)
df["age"].fillna(age_mean,inplace=True)
df["gender"].fillna(g_mode,inplace=True)
df["phq1"].fillna(p1,inplace=True)
df["phq2"].fillna(p2,inplace=True)
df["phq3"].fillna(p3,inplace=True)
df["phq4"].fillna(p4,inplace=True)
df["phq5"].fillna(p5,inplace=True)
df["phq6"].fillna(p6,inplace=True)
df["phq7"].fillna(p7,inplace=True)
df["phq8"].fillna(p8,inplace=True)
df["phq9"].fillna(p9,inplace=True)
df["reported_sleep_hours"].fillna(r_mode,inplace=True)
df["nap_duration"].fillna(n_mode,inplace=True)

df.isna().sum()

import datetime
new=[]
for i in range(len(df["reported_sleep_hours"])):
    con=datetime.datetime.strptime(str(df["reported_sleep_hours"][i]),"%H:%M")
    t=float(con.minute/60)
    tot=float(con.hour)+t
    new.append(tot)
df.insert(44,"reported_sleep_in_hours",new)
new=[]
for i in range(len(df["nap_duration"])):
    con=datetime.datetime.strptime(str(df["nap_duration"][i]),"%H:%M")
    t=float(con.minute/60)
    tot=float(con.hour)+t
    new.append(tot)
df.insert(45,"nap_duration_hours",new)

from sklearn import preprocessing  
le= preprocessing.LabelEncoder()
df["gender"]=le.fit_transform(df["gender"])
df.head()


# In[22]:


X=df[["age","gender","height","weight","phq1","phq2","phq3","phq4","phq5","phq6","phq7","phq8","phq9","reported_sleep_in_hours","nap_duration_hours"]]
y=df["gad_total"]

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=21)

model=LinearRegression()

model.fit(X_train,y_train)
print("Training complete.")

r2_score=model.score(X_test,y_test)
print(r2_score*100,"%")

y_pred = model.predict(X_test)
print('Coefficients: \n', model.coef_)
print("Mean squared error: %.2f" % np.mean((model.predict(X_test) - y_test) ** 2))

def greet(input):
    temp= input.split(",")
    y = model.predict([[temp[0],temp[1],temp[2],temp[3],temp[4],temp[5],temp[6],temp[7],temp[8],temp[9],temp[10],temp[11],temp[12],temp[13],temp[14]]])
    y = str(y)
    return y

textbox_x = gr.Textbox(label="Variable X:", placeholder="10", lines=1)
textbox_y = gr.Textbox(label="Variable Y:", placeholder ="11", lines=1)
grIn = gr.Interface(title = "Lineare Regression",fn=greet, inputs=textbox_x, outputs=textbox_y)

grIn.launch()