MLP models are deep learning models that work well with exceptionally large data sets; however, these models require extensive computational resources and do not handle missing values. Black-box models like MLP models also lack simple interpretation. In contrast, Random Forest models in MSS can ingest missing values and are traditional machine learning models which provide a fundamental algorithmic structure for simpler interpretation.
from sklearn.neural_network import MLPRegressor
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import pandas
import numpy
#Read from the prepared Ames data.
AmesData = pandas.read_csv("AmesHousingTestData.csv")
#List out variables and pull the subset of data from the csv.
headers = list(["Sale Price","Lot Area","Total Basement SF","1st Floor SF","2nd Floor SF","Garage Area SF","Total Rooms","Year Built","Year Remod/Add","Zoning","Type","Heating Quality","Kitchen Quality","Garage Quality","Exterior Quality","Sample_Id","prediction-score"])
AmesData = AmesData[AmesData.columns.intersection(headers)]
#Remove all rows containing any missing numeric data.
test = AmesData.stack()
test2=test[test.eq('*')].index
missingIndex=numpy.unique(test2.get_level_values(level=0))
missingIndex
AmesData=AmesData.drop(index=missingIndex)
#subset into test and train
testData = AmesData.loc[AmesData['Sample_Id'] == "Test"]
trainData = AmesData.loc[AmesData['Sample_Id'] == "Training"]
xTest = testData.drop(columns=['Sale Price','Sample_Id'])
xTrain = trainData.drop(columns=['Sale Price','Sample_Id'])
yTest = testData['Sale Price']
yTrain = trainData['Sale Price']
#Size of training data
trainSize = xTrain.shape[1]
testSize = xTest.shape[1]
#create MLP model
MLPReg = MLPRegressor(random_state=1,max_iter=200,learning_rate_init=0.2,beta_1=0.8).fit(xTrain,yTrain)
from sklearn.metrics import mean_absolute_error
#generate predictions
pyPred=MLPReg.predict(xTest)
#obtain summary statistics about current performance
#R-squared
R2Value_Python=MLPReg.score(xTest,yTest)
#MAD Calculation, also known as Mean Absolute Error
MADValue_Python = mean_absolute_error(yTest, pyPred)
R2Value_Python