King County Housing Predictions¶

by Dirk Van Curan github.com/dirkstrong1¶

and Aaron Chen github.com/AaronWChen¶

Our approach to this model is to discover key variables that can help accurately determine sale price of a home in King County. Our first task will be to see what sorts of questions we can answer to solve business problems with data that is present in the raw data. Secondly, we will clean the data, eliminating any features that do not help solve our business propositions and adding any that can give better insight into what factors affect price. Once we have our initial model we will tweak features to fit our model to our split training data to later test on the test data for more accurate predictions. Our end goal would be to minimize error as much as possible, provide insights on what affects price, and develop a strategy around solving business questions posed.

# import modules needed for data analysis and get them ready for use in the notebook
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.preprocessing import OneHotEncoder
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import sklearn.metrics as metrics
from geopy import distance
import math
from sklearn.linear_model import LassoCV
ss = StandardScaler()
lr = LinearRegression()
lscv = LassoCV(max_iter=150000)
raw_data = pd.read_csv("kc_house_data.csv")

Clean data by dropping variables that don't serve our business needs¶

drop_raw = raw_data.drop(['id', 'date', 'condition', 
                                      'sqft_above', 'sqft_basement', 'yr_renovated',
                                     'sqft_living15', 'sqft_lot15'], axis=1)
clean_data = drop_raw[drop_raw['bedrooms'] != 33].copy()
clean_data = clean_data.fillna(0)

#Make a new column to of the longitude and latitude to help determine businesses
clean_data['lat_and_long'] = list(zip(clean_data['lat'], clean_data['long']))

finder = clean_data[clean_data['zipcode']==98005]

Import geoopy¶

geopy is y, x

avg_lat_98005 = np.mean(finder['lat'])
avg_long_98005 = np.mean(finder['long'])
(avg_lat_98005, avg_long_98005)
#center of Bellevue based on zipcodes

(47.61153154761905, -122.16726785714285)

#Denote points of interest based on lat and long.
mid_of_bellevue = (avg_lat_98005, avg_long_98005)
seattle_lat_long = (47.6062, -122.3321)
airport_lat_long = (47.4502, -122.3088)
snoq_falls_lat_long = (47.5417, -121.8377)
vancouver_lat_long = (49.2827, -123.1207)
mt_rain_lat_long = (46.8523, -121.7603)
oly_lat_long = (47.8021, -123.6044)
tacoma_lat_long = (47.2529, -122.4443)
stevens_lat_long = (47.7448, -121.0890)

#iterate the distance of each sold house from points of interest
distances_col = [distance.distance(elem, mid_of_bellevue).miles for elem in clean_data['lat_and_long']]
seattle_distances_col = [distance.distance(elem, seattle_lat_long).miles for elem in clean_data['lat_and_long']]
seatac_distances_col = [distance.distance(elem, airport_lat_long).miles for elem in clean_data['lat_and_long']]
snoq_falls_dist_col = [distance.distance(elem, snoq_falls_lat_long).miles for elem in clean_data['lat_and_long']]
vanc_dist_col = [distance.distance(elem, vancouver_lat_long).miles for elem in clean_data['lat_and_long']]
mt_rain_dist_col = [distance.distance(elem, mt_rain_lat_long).miles for elem in clean_data['lat_and_long']]
oly_dist_col = [distance.distance(elem, oly_lat_long).miles for elem in clean_data['lat_and_long']]
tacoma_dist_col = [distance.distance(elem, tacoma_lat_long).miles for elem in clean_data['lat_and_long']]
stevens_dist_col = [distance.distance(elem, stevens_lat_long).miles for elem in clean_data['lat_and_long']]

#Create new columns based on the distances developed above
clean_data['dist_from_bellevue'] = distances_col
clean_data['dist_from_seattle'] = seattle_distances_col
clean_data['dist_from_seatac'] = seatac_distances_col
clean_data['dist_from_snoq_falls'] = snoq_falls_dist_col
clean_data['dist_from_vancouver'] = vanc_dist_col
clean_data['dist_from_mt_rain'] = mt_rain_dist_col
clean_data['dist_from_oly'] = oly_dist_col
clean_data['dist_from_tacoma'] = tacoma_dist_col
clean_data['dist_from_stevens'] = stevens_dist_col

Below we are creating new columns through multiplicity¶

clean_data['sqft_living'] = np.log(clean_data['sqft_living'])

clean_data['beds_and_baths'] = np.log(clean_data['bedrooms'] * clean_data['bathrooms'])

clean_data['sqft_living_div_floors_div_sqft_lot'] = (clean_data['sqft_living']/clean_data['floors'])/clean_data['sqft_lot']

clean_data['squared_living'] = np.log(np.square(clean_data['sqft_living']))

clean_data['squared_lot'] = np.square(clean_data['sqft_lot'])

clean_data['sqft_divby_bedroom'] = np.log(clean_data['sqft_living']/clean_data['bedrooms'])

clean_data['dist_seatac_seattle'] = (clean_data['dist_from_seatac'] + clean_data['dist_from_seattle'])/2

clean_data['dist_seatac_bellevue'] = (clean_data['dist_from_seatac'] + clean_data['dist_from_bellevue'])/2

clean_data['square_dist_seatac'] = np.square(clean_data['dist_from_seatac'])

clean_data['square_seatac_bellevue'] = np.square(clean_data['dist_seatac_bellevue'])

clean_data['sqft_times_grade'] = np.log(clean_data['sqft_living'] * clean_data['grade'])

clean_data['age'] = 2019 - clean_data['yr_built']

clean_data['sq_age'] = np.square(clean_data['age'])

clean_data['water_weight'] = np.log((1+clean_data['waterfront']) * clean_data['sqft_living'])

clean_data['view_weight'] = np.log((1+clean_data['view']) * clean_data['sqft_living'])

clean_data_dist = clean_data.drop(['lat', 'long', 'lat_and_long'], axis=1).copy()

clean_data_dist.drop('zipcode', axis=1).corr().loc[['price']].T
#See coefficients for all columns

ohe = OneHotEncoder(drop='first', categories='auto') 
price_zip_trans = ohe.fit_transform(clean_data_dist['zipcode'].values.reshape(-1,1))
zip_sparse = pd.DataFrame(price_zip_trans.todense(), columns=ohe.get_feature_names())
#Import one hot encode and then use it to create individual zipcodes as columns

clean_data_dist_no_zip = clean_data_dist.drop(['zipcode'], axis=1).copy()
#remove zipcodes from data as we now have each individual zipcode that represents each record

clean_data_dist_no_zip['log_price'] = np.log(clean_data_dist_no_zip['price'])
clean_data_dist_no_zip = clean_data_dist_no_zip.drop(['price', 'waterfront', 'sqft_living_div_floors_div_sqft_lot','squared_lot', 'age', 'sq_age'], axis=1)

model_data = zip_sparse.join(clean_data_dist_no_zip, how='inner')
#model_data.head()
#Define the clean data we will use to model and join it with the matrix of zipcodes so we have
    #each variable in one place.

X = model_data.drop('log_price', axis=1)
y = model_data['log_price']
predictors = sm.add_constant(X)
model_stats = sm.OLS(y, predictors).fit()
model_stats.summary()

X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y,
                                                   random_state=10)

ss.fit(X_train)
X_train_sc = ss.transform(X_train)
X_test_sc = ss.transform(X_test)
lr.fit(X_train_sc, y_train)
lr.score(X_test_sc, y_test)

0.8574849305774981

#list(zip(lr.coef_, X_train.columns))

price_predict = lr.predict(X_test_sc)
y_test_non_log = np.exp(y_test)
price_predict_non_log = np.exp(price_predict)
metrics.mean_squared_error(y_test_non_log, price_predict_non_log)
math.sqrt(metrics.mean_squared_error(y_test_non_log, price_predict_non_log))

138475.51254206055

lscv.fit(X_train_sc, y_train)
lscv.score(X_test_sc, y_test)

//anaconda3/envs/learn-env/lib/python3.6/site-packages/sklearn/model_selection/_split.py:1978: FutureWarning: The default value of cv will change from 3 to 5 in version 0.22. Specify it explicitly to silence this warning.
  warnings.warn(CV_WARNING, FutureWarning)

0.8536463518898956

price_predict2 = lscv.predict(X_test_sc)
price_predict2_non_log = np.exp(price_predict2)
math.sqrt(metrics.mean_squared_error(y_test_non_log, price_predict2_non_log))

139895.8862875905

lscv.alpha_

0.00038425863508620965

non_log_prices = np.exp(model_data['log_price'])
non_log_prices

0         221900.0
1         538000.0
2         180000.0
3         604000.0
4         510000.0
5        1230000.0
6         257500.0
7         291850.0
8         229500.0
9         323000.0
10        662500.0
11        468000.0
12        310000.0
13        400000.0
14        530000.0
15        650000.0
16        395000.0
17        485000.0
18        189000.0
19        230000.0
20        385000.0
21       2000000.0
22        285000.0
23        252700.0
24        329000.0
25        233000.0
26        937000.0
27        667000.0
28        438000.0
29        719000.0
           ...    
21566     490000.0
21567     399950.0
21568     380000.0
21569     270000.0
21570     505000.0
21571     385000.0
21572     414500.0
21573     347500.0
21574    1220000.0
21575     572000.0
21576     475000.0
21577    1090000.0
21578     350000.0
21579     520000.0
21580     679950.0
21581    1580000.0
21582     541800.0
21583     810000.0
21584    1540000.0
21585     467000.0
21586     224000.0
21587     507250.0
21588     429000.0
21589     610685.0
21590    1010000.0
21591     475000.0
21592     360000.0
21593     400000.0
21594     402101.0
21595     400000.0
Name: log_price, Length: 21595, dtype: float64

compiled_data = model_data.copy()
compiled_data['nonlog_prices'] = non_log_prices

colus=ohe.get_feature_names()
to_exclude = list(colus)
to_exclude.append('log_price')
to_exclude.append('nonlog_prices')
#to_exclude

for feature in compiled_data.drop(to_exclude, axis=1).columns:
    compiled_data.plot(x=feature, y='log_price', kind='scatter', title=f"{feature}", figsize=(12,10));
    plt.savefig(f"{feature} with log(price).png")
    
    #multiple plots to visualize our log and actual prices versus our various variables

//anaconda3/envs/learn-env/lib/python3.6/site-packages/pandas/plotting/_core.py:310: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  fig = self.plt.figure(figsize=self.figsize)

for feature in compiled_data.drop(to_exclude, axis=1).columns:
    compiled_data.plot(x=feature, y='nonlog_prices', kind='scatter', title=f"{feature}", figsize=(12,10));
    plt.savefig(f"{feature} with price.png")

//anaconda3/envs/learn-env/lib/python3.6/site-packages/pandas/plotting/_core.py:310: RuntimeWarning: More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
  fig = self.plt.figure(figsize=self.figsize)

lscv.coef_

array([-1.97433384e-03, -0.00000000e+00,  2.78530527e-02, -4.56578518e-03,
        3.45762692e-04, -5.98886992e-03, -7.66659112e-03,  6.60014814e-03,
       -6.70790013e-03, -1.56516940e-03, -2.00438998e-03, -3.47710553e-03,
       -5.95432898e-03, -1.18436278e-03,  2.96759034e-03, -1.13952802e-02,
        3.73540812e-03, -6.83117423e-04, -0.00000000e+00, -1.37351293e-03,
        3.79467959e-03, -1.02973885e-02, -4.73060538e-04,  1.57147122e-02,
        2.22160107e-02, -5.90397051e-03, -1.07890800e-03, -5.83117922e-03,
        5.05288915e-03, -3.26501177e-03, -8.61871296e-03, -8.49961158e-03,
       -1.91993540e-03,  3.50480931e-04,  1.05350935e-02, -1.03191352e-03,
       -9.36395072e-03, -0.00000000e+00,  1.64482880e-03, -4.26465752e-03,
        1.09676734e-03,  2.93863573e-03,  6.62234566e-03, -1.16964380e-02,
        3.52482573e-03, -1.57921341e-02,  6.57473697e-03,  1.28693039e-02,
        3.04243341e-03,  8.92365737e-03,  8.43643351e-03, -1.22403167e-02,
        8.41503395e-03, -9.08999426e-03, -9.74770939e-03,  3.50805906e-03,
       -1.49274151e-02,  1.40628918e-02, -1.00179663e-02, -2.06157378e-04,
        5.60186276e-03, -1.15699456e-02,  2.25015064e-02, -8.91963695e-03,
        2.63861326e-03, -1.61802534e-02,  5.29820857e-03,  8.07419830e-03,
        7.10650729e-03, -5.51288450e-03,  4.73832639e-02,  1.95042544e-01,
        2.66472677e-02, -5.14947136e-03,  3.21446215e-02,  2.20913939e-01,
       -4.54664894e-02, -1.63485980e-01, -2.83105721e-01,  3.46032262e-01,
       -1.80889799e-02,  4.08131321e-01,  0.00000000e+00, -2.28887304e-01,
       -0.00000000e+00, -6.62393528e-02, -3.50308126e-02, -0.00000000e+00,
       -5.45047542e-03, -0.00000000e+00,  0.00000000e+00,  4.72432364e-03,
        0.00000000e+00, -9.58418517e-02,  5.68480472e-02,  1.95749178e-02])

list(zip(lscv.coef_, X_train.columns))

[(-0.0019743338437340253, 'x0_98002'),
 (-0.0, 'x0_98003'),
 (0.027853052656053645, 'x0_98004'),
 (-0.004565785184704222, 'x0_98005'),
 (0.0003457626923063995, 'x0_98006'),
 (-0.005988869917703845, 'x0_98007'),
 (-0.0076665911210164524, 'x0_98008'),
 (0.006600148137425352, 'x0_98010'),
 (-0.006707900125076926, 'x0_98011'),
 (-0.0015651694025169654, 'x0_98014'),
 (-0.0020043899787348537, 'x0_98019'),
 (-0.0034771055264297074, 'x0_98022'),
 (-0.005954328976180814, 'x0_98023'),
 (-0.0011843627783693416, 'x0_98024'),
 (0.0029675903437207355, 'x0_98027'),
 (-0.011395280246900494, 'x0_98028'),
 (0.003735408120683963, 'x0_98029'),
 (-0.0006831174230191479, 'x0_98030'),
 (-0.0, 'x0_98031'),
 (-0.0013735129261359018, 'x0_98032'),
 (0.0037946795932431573, 'x0_98033'),
 (-0.010297388481631142, 'x0_98034'),
 (-0.00047306053779642575, 'x0_98038'),
 (0.015714712151473863, 'x0_98039'),
 (0.022216010704524402, 'x0_98040'),
 (-0.005903970513213782, 'x0_98042'),
 (-0.0010789080026529898, 'x0_98045'),
 (-0.005831179223906756, 'x0_98052'),
 (0.005052889149163035, 'x0_98053'),
 (-0.00326501177406814, 'x0_98055'),
 (-0.008618712956296107, 'x0_98056'),
 (-0.008499611577478468, 'x0_98058'),
 (-0.0019199354021303728, 'x0_98059'),
 (0.00035048093053893954, 'x0_98065'),
 (0.01053509351381041, 'x0_98070'),
 (-0.0010319135191382276, 'x0_98072'),
 (-0.009363950720810373, 'x0_98074'),
 (-0.0, 'x0_98075'),
 (0.0016448287980174363, 'x0_98077'),
 (-0.004264657517751785, 'x0_98092'),
 (0.0010967673393870517, 'x0_98102'),
 (0.002938635732281619, 'x0_98103'),
 (0.0066223456566022, 'x0_98105'),
 (-0.011696437987376115, 'x0_98106'),
 (0.003524825734693973, 'x0_98107'),
 (-0.015792134140053492, 'x0_98108'),
 (0.0065747369732405345, 'x0_98109'),
 (0.012869303883037796, 'x0_98112'),
 (0.0030424334128217926, 'x0_98115'),
 (0.008923657371487578, 'x0_98116'),
 (0.00843643351236147, 'x0_98117'),
 (-0.012240316673862491, 'x0_98118'),
 (0.00841503395411995, 'x0_98119'),
 (-0.009089994258923312, 'x0_98122'),
 (-0.009747709387959234, 'x0_98125'),
 (0.0035080590616502136, 'x0_98126'),
 (-0.014927415113196665, 'x0_98133'),
 (0.014062891787961695, 'x0_98136'),
 (-0.010017966312454853, 'x0_98144'),
 (-0.00020615737771785187, 'x0_98146'),
 (0.00560186275710225, 'x0_98148'),
 (-0.011569945597225054, 'x0_98155'),
 (0.022501506408550254, 'x0_98166'),
 (-0.008919636954915915, 'x0_98168'),
 (0.002638613257906492, 'x0_98177'),
 (-0.016180253445807397, 'x0_98178'),
 (0.005298208573617016, 'x0_98188'),
 (0.008074198304203029, 'x0_98198'),
 (0.007106507290133583, 'x0_98199'),
 (-0.005512884500576624, 'bedrooms'),
 (0.04738326389902876, 'bathrooms'),
 (0.1950425436452883, 'sqft_living'),
 (0.02664726766598502, 'sqft_lot'),
 (-0.005149471363474862, 'floors'),
 (0.03214462151601672, 'view'),
 (0.22091393912535656, 'grade'),
 (-0.04546648935028459, 'yr_built'),
 (-0.16348597992286515, 'dist_from_bellevue'),
 (-0.28310572134881173, 'dist_from_seattle'),
 (0.3460322617453836, 'dist_from_seatac'),
 (-0.018088979899978114, 'dist_from_snoq_falls'),
 (0.40813132091394966, 'dist_from_vancouver'),
 (0.0, 'dist_from_mt_rain'),
 (-0.22888730361782592, 'dist_from_oly'),
 (-0.0, 'dist_from_tacoma'),
 (-0.06623935275802956, 'dist_from_stevens'),
 (-0.03503081260158008, 'beds_and_baths'),
 (-0.0, 'squared_living'),
 (-0.005450475423002799, 'sqft_divby_bedroom'),
 (-0.0, 'dist_seatac_seattle'),
 (0.0, 'dist_seatac_bellevue'),
 (0.004724323642285781, 'square_dist_seatac'),
 (0.0, 'square_seatac_bellevue'),
 (-0.09584185171591378, 'sqft_times_grade'),
 (0.05684804715310281, 'water_weight'),
 (0.019574917765933925, 'view_weight')]

Distance from both Seatac Airport and Vancouver have high coefficients. This could imply that people either need to be close for work or pleasure, contrarily this could also just show a higher density of houses that have minimized distance to those locations. We would need to explore further on the correlation of those features. Obviously the square feet of a home is a major factor in prices of homes and using that coupled with other factors that were on a scale helped the model learn(i.e. grade).

Our model, though not perfect, helps show the various factors that can affect house price in King County. Our root mean square error on that price is just over $130k for both the linear regression and through the lasso function meaning we can expect a variance in price to that amount when predicting a sale price for a home. By using data that takes into account data that spans more years, has more data from the more rural zipcodes for King County, and/or potentially grouping zipcodes into bins to help see the price based on an area versus arbitrary zipcodes may help make the model better. Our presentation mentions some next steps we would take given the time and tools to help improve the model but feel through our current iteration we could certainly make some business decisions based on our findings.

Dep. Variable:	log_price	R-squared:	0.866
Model:	OLS	Adj. R-squared:	0.865
Method:	Least Squares	F-statistic:	1475.
Date:	Tue, 27 Aug 2019	Prob (F-statistic):	0.00
Time:	15:32:45	Log-Likelihood:	4892.9
No. Observations:	21595	AIC:	-9596.
Df Residuals:	21500	BIC:	-8838.
Df Model:	94
Covariance Type:	nonrobust

	coef	std err	t	P>\|t\|	[0.025	0.975]
const	18.1623	2.884	6.297	0.000	12.509	23.815
x0_98002	-0.0360	0.017	-2.096	0.036	-0.070	-0.002
x0_98003	-0.0073	0.016	-0.470	0.638	-0.038	0.023
x0_98004	0.1954	0.016	12.201	0.000	0.164	0.227
x0_98005	-0.1191	0.020	-6.105	0.000	-0.157	-0.081
x0_98006	0.0112	0.015	0.762	0.446	-0.018	0.040
x0_98007	-0.1496	0.020	-7.329	0.000	-0.190	-0.110
x0_98008	-0.1351	0.017	-8.092	0.000	-0.168	-0.102
x0_98010	0.0635	0.023	2.784	0.005	0.019	0.108
x0_98011	-0.0402	0.018	-2.214	0.027	-0.076	-0.005
x0_98014	-0.0825	0.022	-3.773	0.000	-0.125	-0.040
x0_98019	-0.0121	0.019	-0.636	0.525	-0.050	0.025
x0_98022	-0.0503	0.020	-2.474	0.013	-0.090	-0.010
x0_98023	-0.0585	0.014	-4.158	0.000	-0.086	-0.031
x0_98024	-0.0470	0.025	-1.877	0.061	-0.096	0.002
x0_98027	0.0134	0.015	0.907	0.364	-0.016	0.042
x0_98028	-0.0685	0.016	-4.177	0.000	-0.101	-0.036
x0_98029	0.0194	0.016	1.222	0.222	-0.012	0.050
x0_98030	-0.0120	0.016	-0.741	0.459	-0.044	0.020
x0_98031	0.0070	0.016	0.434	0.664	-0.025	0.039
x0_98032	-0.0545	0.020	-2.660	0.008	-0.095	-0.014
x0_98033	-0.0046	0.015	-0.310	0.756	-0.033	0.024
x0_98034	-0.0713	0.014	-5.053	0.000	-0.099	-0.044
x0_98038	-0.0152	0.014	-1.079	0.281	-0.043	0.012
x0_98039	0.2872	0.030	9.668	0.000	0.229	0.345
x0_98040	0.2257	0.017	13.641	0.000	0.193	0.258
x0_98042	-0.0420	0.014	-3.013	0.003	-0.069	-0.015
x0_98045	-0.0798	0.019	-4.235	0.000	-0.117	-0.043
x0_98052	-0.0727	0.014	-5.140	0.000	-0.100	-0.045
x0_98053	0.0064	0.015	0.426	0.670	-0.023	0.036
x0_98055	-0.0157	0.017	-0.951	0.342	-0.048	0.017
x0_98056	-0.0364	0.015	-2.392	0.017	-0.066	-0.007
x0_98058	-0.0283	0.015	-1.930	0.054	-0.057	0.000
x0_98059	0.0082	0.015	0.563	0.573	-0.020	0.037
x0_98065	-0.0052	0.017	-0.310	0.756	-0.038	0.027
x0_98070	0.0331	0.024	1.382	0.167	-0.014	0.080
x0_98072	0.0235	0.017	1.396	0.163	-0.010	0.057
x0_98074	-0.1102	0.015	-7.461	0.000	-0.139	-0.081
x0_98075	-0.0317	0.015	-2.060	0.039	-0.062	-0.002
x0_98077	0.0556	0.019	2.991	0.003	0.019	0.092
x0_98092	-0.0373	0.015	-2.519	0.012	-0.066	-0.008
x0_98102	0.0195	0.022	0.875	0.382	-0.024	0.063
x0_98103	0.0132	0.014	0.959	0.337	-0.014	0.040
x0_98105	0.0592	0.017	3.457	0.001	0.026	0.093
x0_98106	-0.1187	0.016	-7.611	0.000	-0.149	-0.088
x0_98107	0.0036	0.017	0.218	0.827	-0.029	0.036
x0_98108	-0.1453	0.018	-7.944	0.000	-0.181	-0.109
x0_98109	0.0844	0.022	3.863	0.000	0.042	0.127
x0_98112	0.1152	0.017	6.925	0.000	0.083	0.148
x0_98115	0.0134	0.014	0.969	0.333	-0.014	0.041
x0_98116	0.0323	0.016	2.036	0.042	0.001	0.063
x0_98117	0.0270	0.014	1.875	0.061	-0.001	0.055
x0_98118	-0.0615	0.014	-4.244	0.000	-0.090	-0.033
x0_98119	0.0674	0.018	3.678	0.000	0.031	0.103
x0_98122	-0.0732	0.016	-4.475	0.000	-0.105	-0.041
x0_98125	-0.0827	0.015	-5.563	0.000	-0.112	-0.054
x0_98126	-0.0085	0.015	-0.552	0.581	-0.039	0.022
x0_98133	-0.0968	0.015	-6.546	0.000	-0.126	-0.068
x0_98136	0.0885	0.017	5.289	0.000	0.056	0.121
x0_98144	-0.0792	0.016	-5.052	0.000	-0.110	-0.048
x0_98146	-0.0398	0.016	-2.425	0.015	-0.072	-0.008
x0_98148	0.0746	0.029	2.590	0.010	0.018	0.131
x0_98155	-0.0471	0.015	-3.117	0.002	-0.077	-0.017
x0_98166	0.1625	0.017	9.292	0.000	0.128	0.197
x0_98168	-0.1028	0.017	-6.108	0.000	-0.136	-0.070
x0_98177	0.0234	0.017	1.350	0.177	-0.011	0.057
x0_98178	-0.1253	0.017	-7.440	0.000	-0.158	-0.092
x0_98188	0.0528	0.021	2.520	0.012	0.012	0.094
x0_98198	0.0257	0.016	1.571	0.116	-0.006	0.058
x0_98199	0.0192	0.016	1.194	0.232	-0.012	0.051
bedrooms	-0.0488	0.007	-7.086	0.000	-0.062	-0.035
bathrooms	0.0259	0.009	2.827	0.005	0.008	0.044
sqft_living	1.8076	0.123	14.690	0.000	1.566	2.049
sqft_lot	6.243e-07	3.48e-08	17.913	0.000	5.56e-07	6.93e-07
floors	-0.0134	0.003	-4.083	0.000	-0.020	-0.007
view	0.0634	0.013	4.992	0.000	0.038	0.088
grade	0.1741	0.012	14.039	0.000	0.150	0.198
yr_built	-0.0014	7.17e-05	-19.129	0.000	-0.002	-0.001
dist_from_bellevue	-0.0359	0.001	-24.557	0.000	-0.039	-0.033
dist_from_seattle	-0.0411	0.002	-21.696	0.000	-0.045	-0.037
dist_from_seatac	0.0607	0.001	49.509	0.000	0.058	0.063
dist_from_snoq_falls	0.0077	0.003	3.054	0.002	0.003	0.013
dist_from_vancouver	0.1302	0.008	15.760	0.000	0.114	0.146
dist_from_mt_rain	-0.0209	0.013	-1.639	0.101	-0.046	0.004
dist_from_oly	-0.1477	0.012	-12.516	0.000	-0.171	-0.125
dist_from_tacoma	0.0165	0.005	3.190	0.001	0.006	0.027
dist_from_stevens	-0.0523	0.008	-6.367	0.000	-0.068	-0.036
beds_and_baths	0.0093	0.018	0.518	0.604	-0.026	0.044
squared_living	-4.9595	0.483	-10.273	0.000	-5.906	-4.013
sqft_divby_bedroom	-0.0998	0.029	-3.402	0.001	-0.157	-0.042
dist_seatac_seattle	0.0098	0.001	9.806	0.000	0.008	0.012
dist_seatac_bellevue	0.0124	0.001	13.337	0.000	0.011	0.014
square_dist_seatac	0.0036	0.000	8.324	0.000	0.003	0.004
square_seatac_bellevue	-0.0039	0.000	-12.950	0.000	-0.005	-0.003
sqft_times_grade	-0.4589	0.096	-4.785	0.000	-0.647	-0.271
water_weight	0.6555	0.027	24.055	0.000	0.602	0.709
view_weight	-0.0011	0.026	-0.042	0.967	-0.053	0.050

Omnibus:	1160.615	Durbin-Watson:	2.077
Prob(Omnibus):	0.000	Jarque-Bera (JB):	3946.287
Skew:	-0.189	Prob(JB):	0.00
Kurtosis:	5.060	Cond. No.	3.16e+19

	price
price	1.000000
bedrooms	0.315961
bathrooms	0.525915
sqft_living	0.611846
sqft_lot	0.089879
floors	0.256820
waterfront	0.264308
view	0.393502
grade	0.667964
yr_built	0.053965
dist_from_bellevue	-0.412256
dist_from_seattle	-0.285989
dist_from_seatac	0.143247
dist_from_snoq_falls	-0.126487
dist_from_vancouver	-0.285459
dist_from_mt_rain	0.273155
dist_from_oly	-0.112809
dist_from_tacoma	0.283631
dist_from_stevens	-0.168673
beds_and_baths	0.443883
sqft_living_div_floors_div_sqft_lot	-0.188655
squared_living	0.597781
squared_lot	0.034688
sqft_divby_bedroom	-0.203862
dist_seatac_seattle	-0.105053
dist_seatac_bellevue	-0.177082
square_dist_seatac	0.075784
square_seatac_bellevue	-0.172638
sqft_times_grade	0.660003
age	-0.053965
sq_age	-0.009248
water_weight	0.587076
view_weight	0.453713