第乙個線性回歸程式 基於Jupyter

import pandas as pd

import seaborn as sns

sns.set(context="notebook", style="whitegrid", palette="dark")

import matplotlib.pyplot as plt

import tensorflow as tf

import numpy as np

df = pd.read_csv('ex1data1.txt', names=['population', 'profit'])#讀取資料並賦予列名



sns.lmplot('population', 'profit', df, size=6, fit_reg=false)


def get_x(df):#讀取特徵

ones = pd.dataframe()#ones是m行1列的dataframe

data = pd.concat([ones, df], axis=1) # 合併資料,根據列合併

return data.iloc[:, :-1].as_matrix() # 這個操作返回 ndarray,不是矩陣

def get_y(df):#讀取標籤

return np.array(df.iloc[:, -1])

def linear_regression(x_data, y_data, alpha, epoch, optimizer=tf.train.gradientdescentoptimizer):# 這個函式是舊金山的乙個大神lucas shen寫的

# placeholder for graph input

x = tf.placeholder(tf.float32, shape=x_data.shape)

y = tf.placeholder(tf.float32, shape=y_data.shape)

# construct the graph

with tf.variable_scope('linear-regression'):

w = tf.get_variable("weights",

(x_data.shape[1], 1),

initializer=tf.constant_initializer()) # n*1

y_pred = tf.matmul(x, w) # m*n @ n*1 -> m*1

loss = 1 / (2 * len(x_data)) * tf.matmul((y_pred - y), (y_pred - y), transpose_a=true) # (m*1).t @ m*1 = 1*1

opt = optimizer(learning_rate=alpha)

opt_operation = opt.minimize(loss)

# run the session

with tf.session() as sess:


loss_data =

if len(loss_data) > 1 and np.abs(loss_data[-1] - loss_data[-2]) < 10 ** -9: # early break when it's converged

# print('converged at epoch {}'.format(i))


# clear the graph


return # just want to return in row vector format

data = pd.read_csv('ex1data1.txt', names=['population', 'profit'])#讀取資料,並賦予列名


x = get_x(data)

print(x.shape, type(x))

y = get_y(data)

print(y.shape, type(y))


theta = np.zeros(x.shape[1])#x.shape[1]=2,代表特徵數n

def lr_cost(theta, x, y):

# """

# x: r(m*n), m 樣本數, n 特徵數

# y: r(m)

# theta : r(n), 線性回歸的引數

# """

m = x.shape[0]#m為樣本數

inner = x @ theta - y # r(m*1),x @ theta等價於x.dot(theta)

# 1*m @ m*1 = 1*1 in matrix multiplication

# but you know numpy didn't do transpose in 1d array, so here is just a

# vector inner product to itselves

square_sum = inner.t @ inner

cost = square_sum / (2 * m)

return cost

lr_cost(theta, x, y)#返回theta的值

def gradient(theta, x, y):

m = x.shape[0]

inner = x.t @ (x @ theta - y) # (m,n).t @ (m, 1) -> (n, 1),x @ theta等價於x.dot(theta)

return inner / m

def batch_gradient_decent(theta, x, y, epoch, alpha=0.01):

# 擬合線性回歸,返回引數和代價

# epoch: 批處理的輪數

# """

cost_data = [lr_cost(theta, x, y)]

_theta = theta.copy() # 拷貝乙份,不和原來的theta混淆

return _theta, cost_data


epoch = 500

final_theta, cost_data = batch_gradient_decent(theta, x, y, epoch)




# 看下代價資料

# 計算最終的代價

lr_cost(final_theta, x, y)

ax = sns.tsplot(cost_data, time=np.arange(epoch+1))





b = final_theta[0] # intercept,y軸上的截距

m = final_theta[1] # slope,斜率

plt.scatter(data.population, data.profit, label="training data")

plt.plot(data.population, data.population*m + b, label="prediction")




