In [2]:
#1.LR
#2.Score
#3.Lasso
#4.Score
#5.Ridge
#6.Score
In [94]:
import pandas as pd
from sklearn.linear_model import LinearRegression ,Lasso,Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
In [95]:
df=pd.read_csv("patient_health_data.csv")
df.head()
Out[95]:
| age | bmi | blood_pressure | cholesterol | glucose | insulin | heart_rate | activity_level | diet_quality | smoking_status | alcohol_intake | health_risk_score | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 58 | 24.865215 | 122.347094 | 165.730375 | 149.289441 | 22.306844 | 75.866391 | 1.180237 | 7.675409 | No | 0.824123 | 150.547752 |
| 1 | 71 | 19.103168 | 136.852028 | 260.610781 | 158.584646 | 13.869817 | 69.481114 | 7.634622 | 8.933057 | No | 0.852910 | 160.320350 |
| 2 | 48 | 22.316562 | 137.592457 | 177.342582 | 178.760166 | 22.849816 | 69.386962 | 7.917398 | 3.501119 | Yes | 4.740542 | 187.487398 |
| 3 | 34 | 22.196893 | 153.164775 | 234.594764 | 136.351714 | 15.140336 | 95.348387 | 3.192910 | 2.745585 | No | 2.226231 | 148.773138 |
| 4 | 62 | 29.837173 | 92.768973 | 276.106498 | 158.753516 | 17.228576 | 77.680975 | 7.044026 | 8.918348 | No | 3.944011 | 170.609655 |
In [96]:
df["smoking_status"]=df["smoking_status"].replace({"No":0,"Yes":1})
C:\Users\dell\AppData\Local\Temp\ipykernel_8168\2923759407.py:1: FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`
df["smoking_status"]=df["smoking_status"].replace({"No":0,"Yes":1})
In [97]:
df["smoking_status"]
Out[97]:
0 0
1 0
2 1
3 0
4 0
..
245 0
246 0
247 0
248 0
249 1
Name: smoking_status, Length: 250, dtype: int64
In [98]:
df[["smoking_status"]].dtypes
Out[98]:
smoking_status int64 dtype: object
In [99]:
X=df.drop(columns=["health_risk_score"])
y=df[["health_risk_score"]]
In [100]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
In [101]:
lm=LinearRegression()
In [102]:
lm.fit(X_train,y_train)
Out[102]:
LinearRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Parameters
In [103]:
y_test.head()
Out[103]:
| health_risk_score | |
|---|---|
| 142 | 142.378299 |
| 6 | 180.270293 |
| 97 | 124.210564 |
| 60 | 105.188048 |
| 112 | 128.443865 |
In [104]:
lm.predict(X_test.head())
Out[104]:
array([[157.87195261],
[172.62188521],
[127.52072532],
[111.53729062],
[134.27687977]])
In [105]:
y_test.head().values.flatten() - lm.predict(X_test.head()).flatten()
Out[105]:
array([-15.49365311, 7.64840809, -3.31016102, -6.34924232,
-5.83301507])
In [106]:
lm.score(X_test,y_test)
Out[106]:
0.7552011116606276
In [124]:
# Predictions
y_predict = lm.predict(X_test.head())
# Agar y_test bhi available hai
y_actual = y_test.head()
plt.figure()
plt.plot(y_actual.values)
plt.plot(y_predict)
plt.xlabel("Observations")
plt.ylabel("Target Value")
plt.title("Actual vs Predicted Values")
plt.show()
In [108]:
lsm = Lasso()
In [109]:
lsm.fit(X_train,y_train)
Out[109]:
Lasso()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Parameters
In [110]:
y_test.head()
Out[110]:
| health_risk_score | |
|---|---|
| 142 | 142.378299 |
| 6 | 180.270293 |
| 97 | 124.210564 |
| 60 | 105.188048 |
| 112 | 128.443865 |
In [111]:
lsm.predict(X_test.head())
Out[111]:
array([160.11053281, 172.29794385, 128.91242291, 111.1273273 ,
132.56074787])
In [112]:
y_test.head().values.flatten() - lsm.predict(X_test.head()).flatten()
Out[112]:
array([-17.73223331, 7.97234945, -4.70185861, -5.939279 ,
-4.11688317])
In [113]:
abs(y_test.values.flatten() - lsm.predict(X_test).flatten()).mean()
Out[113]:
np.float64(8.49163379591994)
In [114]:
lsm.score(X_test,y_test)
Out[114]:
0.7711217595735915
In [122]:
# Predictions
y_predict = lsm.predict(X_test.head())
# Agar y_test bhi available hai
y_actual = y_test.head()
plt.figure()
plt.plot(y_actual.values)
plt.plot(y_predict)
plt.xlabel("Observations")
plt.ylabel("Target Value")
plt.title("Actual vs Predicted Values")
plt.show()
In [115]:
lsm.score(X_train, y_train)
Out[115]:
0.8661955368140437
In [116]:
rid=Ridge()
In [117]:
rid.fit(X_train,y_train)
Out[117]:
Ridge()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
Parameters
In [118]:
y_test.head()
Out[118]:
| health_risk_score | |
|---|---|
| 142 | 142.378299 |
| 6 | 180.270293 |
| 97 | 124.210564 |
| 60 | 105.188048 |
| 112 | 128.443865 |
In [73]:
rid.predict(X_test.head())
Out[73]:
array([157.87616582, 172.61576099, 127.52236415, 111.5386635 ,
134.26888828])
In [123]:
# Predictions
y_predict = rid.predict(X_test.head())
# Agar y_test bhi available hai
y_actual = y_test.head()
plt.figure()
plt.plot(y_actual.values)
plt.plot(y_predict)
plt.xlabel("Observations")
plt.ylabel("Target Value")
plt.title("Actual vs Predicted Values")
plt.show()
In [74]:
rid.score(X_train, y_train)
Out[74]:
0.8679267298654003
In [121]:
rid.score(X_test,y_test)
Out[121]:
0.7552806598840451
In [ ]: