python大賽對名資料探勘消除隊名的影響

#import pandas as pd#import numpy as np## #載入資料集#data_filename = "nba15_16_dataset/basketball.csv"#dataset = pd.read_csv(data_filename,encoding="utf-8")##清洗資料##1#dataset = pd.read_csv(data_filename,parse_dates=["date"])##2#dataset.columns = ["date", "start(et)","visitor team", "visitorpts", "home team", "homepts", "ot?", "score type","attend.", "notes"]##抽取新的特徵#dataset["homewin"] = dataset["visitorpts"] < dataset["homepts"]## dataset.head()#y_true = dataset["homewin"].values#dataset["homewin"].mean()##構造新屬性需要**的兩隻球隊在各自的上場比賽中勝負情況#from collections import defaultdict

#won_last = defaultdict(int)#dataset["homelastwin"] = 0#dataset["visitorlastwin"] = 0#for index, row in dataset.iterrows():

#home_team = row["home team"]#visitor_team = row["visitor team"]#row["homelastwin"] = won_last[home_team]

#dataset.set_value(index, "homelastwin", won_last[home_team])#dataset.set_value(index, "visitorlastwin", won_last[visitor_team])#won_last[home_team] = int(row["homewin"])#won_last[visitor_team] = 1 - int(row["homewin"])

##決策樹進行**#from sklearn.tree import decisiontreeclassifier#from sklearn.cross_validation import cross_val_score#import numpy as np

#clf = decisiontreeclassifier(random_state=14)#x_previouswins = dataset[["homelastwin", "visitorlastwin"]].values

#scores = cross_val_score(clf, x_previouswins, y_true, scoring="accuracy")#print(scores)#print("accuracy: %".format(np.mean(scores) * 100))##新建特徵排名#standings_filename = "nba15_16_dataset/standings.csv"#standings = pd.read_csv(standings_filename, skiprows=0, encoding="utf-8")#standings.head()#dataset["hometeamrankshigher"] = 0#for index, row in dataset.iterrows():#home_team = row["home team"]#visitor_team = row["visitor team"]#home_rank = standings[standings["team"] == home_team]["rk"].values[0]#visitor_rank = standings[standings["team"] == visitor_team]["rk"].values[0]#dataset.set_value(index, "hometeamrankshigher",int(home_rank < visitor_rank))#x_homehigher = dataset[["hometeamrankshigher","homelastwin", "visitorlastwin",]].values

#clf = decisiontreeclassifier(random_state=14)#scores = cross_val_score(clf, x_homehigher, y_true, scoring="accuracy")#print("accuracy: %".format(np.mean(scores) * 100))#dataset["hometeamrankshigher"] = 0#for index, row in dataset.iterrows():#home_team = row["home team"]#visitor_team = row["visitor team"]#home_rank = standings[standings["team"] == home_team]["rk"].values[0]#visitor_rank = standings[standings["team"] == visitor_team]["rk"].values[0]#dataset.set_value(index, "hometeamrankshigher",int(home_rank < visitor_rank))#x_homehigher = dataset[["hometeamrankshigher","homelastwin", "visitorlastwin",]].values

#clf = decisiontreeclassifier(random_state=14)#scores = cross_val_score(clf, x_homehigher, y_true, scoring="accuracy")#print("accuracy: %".format(np.mean(scores) * 100))

python大賽對名資料探勘消除隊名的影響

python資料探勘面試位元組跳動資料探勘面試總結

python資料分析與挖掘實戰資料探勘基礎

資料探勘技術對ERP的影響

python大賽對名 資料探勘 消除隊名的影響

python資料探勘面試 位元組跳動資料探勘面試總結

python資料分析與挖掘實戰 資料探勘基礎

資料探勘技術對ERP的影響

相關推薦

python大賽對名資料探勘消除隊名的影響

python資料探勘面試位元組跳動資料探勘面試總結

python資料分析與挖掘實戰資料探勘基礎