import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
data = pd.read_csv('possum.csv')
print(data.head())
print(data.shape)
sub_data = data[['footlgth', 'earconch']]
print(sub_data.describe())
print(sub_data.isnull().sum())
data_clean = sub_data.dropna()
plt.figure(figsize=(8,6))
plt.scatter(x='footlgth', y='earconch', data=data_clean)
plt.xlabel('Longueur d\'empreinte')
plt.ylabel('Taille du pavillon auditif')
plt.show()
def predict_model(alpha, beta, x):
return alpha*x + beta
def mean_squared_error(y_true, y_pred):
return np.mean((y_true - y_pred) ** 2)
X = data_clean['footlgth']
y = data_clean['earconch']
y_pred = predict_model(alpha=0.25, beta=0.5, x=X)
mse = mean_squared_error(y_true=y, y_pred=y_pred)
print()