# Data Collection - Pt.1
# Import the first dataset, found here: https://www.kaggle.com/umutalpaydn/nba-20202021-season-player-stats?select=nba2021_per_game.csv

from bs4 import BeautifulSoup as bs
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model

# pd.set_option("display.max_rows", None, "display.max_columns", None)
nba2021_df = pd.read_csv('nba2021_per_game.csv')

nba2021_df.head()


# Data Parsing - Pt.1
# Clean the table by making sure all player entries are unique

players = nba2021_df.groupby('Player')

mult_teams = []
for player, player_df in players:
    if len(player_df.index) > 1:
        mult_teams.append(player)

for idx, row in nba2021_df.iterrows():
    # if a player played for multiple teams, we only want to keep their total stats
    if row.Player in mult_teams and row.Tm != 'TOT':
        nba2021_df = nba2021_df.drop([idx])

for player in mult_teams:
    # confirming that every player who played for multiple teams now only has one row (uniqueness)
    print(player, nba2021_df[nba2021_df.Player == player].shape[0])

Alex Len 1
Dennis Smith Jr. 1
Derrick Rose 1
James Harden 1
Jarrett Allen 1
Rodions Kurucs 1
Taurean Prince 1
Victor Oladipo 1


# Data Collection - Pt.2
# Now that we have a fully unique datatset of current players, lets study HOF players (scrape data using Requests + BS4)

URL = 'https://www.basketball-reference.com/awards/hof.html'

req = requests.get(URL)
soup = bs(req.content, 'html.parser')

table = soup.find('table').prettify()
hof_df = pd.read_html(table, flavor = 'bs4')[0]

hof_df.head()


# Data Parsing - Pt.2

# The website has data as a multi-index, but we don't need the top level
hof_df.columns = hof_df.columns.droplevel()

# The table also contains non-player data, this isn't helpful to us
for idx, row in hof_df.iterrows():
   if row.Category != 'Player':
       hof_df = hof_df.drop([idx])

# Drop WNBA players since we're only studying NBA
for idx, row in hof_df.iterrows():
    if 'WNBA' in row.Name:
        hof_df = hof_df.drop(idx)

# The Name category has some extra stuff, lets drop it to only <firstName lastName> 
# NOTE: we could just do this w a one-line lambda but we have some special cases with nicknames 
def fix_name(name):
    name_list = name.split()
    if len(name_list) == 2:     # if name is already in the right format
        return name
    new_name = ''
    for word in name_list:
        if word == 'Player':
            break
        new_name += word + ' '
    return new_name[:-1]     

hof_df['Name'] = hof_df['Name'].apply(lambda x: fix_name(x))

# Drop unnecessary cols:
hof_df = hof_df.drop(columns=['Unnamed: 14_level_1', 'G', 'W', 'L', 'W/L%'], axis=1)

hof_df = hof_df.sort_values(by=['Year'])

# Some non-NBA players are still in the Hall of Fame data frame
# Drop rows of the df that have PTS listed as NaN. This will remove all non-NBA players
hof_df = hof_df.dropna(subset=['PTS'])

# Cast the numbers in the main categories into floating points from strings (to prep data for analysis)
hof_df["Year"] = pd.to_numeric(pd.Series(hof_df["Year"])) 
hof_df["PTS"] = pd.to_numeric(pd.Series(hof_df["PTS"])) 
hof_df["TRB"] = pd.to_numeric(pd.Series(hof_df["TRB"])) 
hof_df["AST"] = pd.to_numeric(pd.Series(hof_df["AST"])) 
hof_df["BLK"] = pd.to_numeric(pd.Series(hof_df["BLK"])) 
hof_df["STL"] = pd.to_numeric(pd.Series(hof_df["STL"])) 

hof_df.head()


# Show a visual of how stats of hall of famers have changed through the years
plt.scatter(hof_df["Year"], hof_df["PTS"])
plt.title("HOF PTS vs Year")
plt.ylim(0)

plt.figure()
plt.scatter(hof_df["Year"], hof_df["AST"])
plt.title("HOF AST vs Year")
plt.ylim(0)

plt.figure()
plt.scatter(hof_df["Year"], hof_df["TRB"])
plt.title("HOF REBS vs Year")
plt.ylim(0)

(0.0, 23.955)


plt.figure()
plt.scatter(hof_df["Year"], hof_df["PTS"])
plt.title("HOF PTS vs Year")
plt.ylim(0)
m,b = np.polyfit(hof_df["Year"], hof_df["PTS"], 1)
plt.plot(hof_df["Year"], m*hof_df["Year"] + b)

[<matplotlib.lines.Line2D at 0x7fb03fff8fd0>]


# plot residuals 
hof_df['pts_rsd'] = hof_df['PTS'] - (hof_df['Year']*m + b)
hof_df.plot.scatter('Year', 'pts_rsd', color='grey', title='HOF PTS Residuals Vs. Year')

<AxesSubplot:title={'center':'HOF PTS Residuals Vs. Year'}, xlabel='Year', ylabel='pts_rsd'>


# build a footprint of the "average nba player today" by finding the mean points, rebounds, assists, steals, and blocks scored today
curr_points = np.mean(nba2021_df["PTS"])
curr_assists = np.mean(nba2021_df["AST"])
curr_rebounds = np.mean(nba2021_df["TRB"])
curr_blocks = np.mean(nba2021_df["BLK"])
curr_steals = np.mean(nba2021_df["STL"])

# build a footprint of the average hall of famer by finding the mean in the same fashion as above
hof_points = np.mean(hof_df["PTS"])
hof_assists = np.mean(hof_df["AST"])
hof_rebounds = np.mean(hof_df["TRB"])
hof_blocks = np.mean(hof_df["BLK"])
hof_steals = np.mean(hof_df["STL"])


# compare the average player today with the average hall of famer across all categories

plt.figure(figsize=(25, 7))

colors = ['crimson', 'blue']

plt.subplot(1, 5,1)
plt.bar(["Average Hall of Famer","Average NBA Player"], [hof_points, curr_points],color=colors)
plt.title("Points")

plt.subplot(1, 5,2)
plt.bar(["Average Hall of Famer", "Average NBA Player"], [hof_assists, curr_assists],color=colors)
plt.title("Assists")

plt.subplot(1, 5, 3)
plt.bar(["Average Hall of Famer", "Average NBA Player"], [hof_rebounds, curr_rebounds],color=colors)
plt.title("Rebounds")

plt.subplot(1, 5, 4)
plt.bar(["Average Hall of Famer", "Average NBA Player"], [hof_blocks, curr_blocks],color=colors)
plt.title("Blocks")

plt.subplot(1, 5, 5)
plt.bar(["Average Hall of Famer", "Average NBA Player"], [hof_steals,curr_steals],color=colors)
plt.title("Steals")

Text(0.5, 1.0, 'Steals')


# Method to calculate average of a category given DataFrame and category name
def calc_category_average(hof_df, category):
    player_category_stat = [float(stat) for stat in hof_df[category]]
    player_category_stat = list(filter(lambda x: np.isnan(x) == False , player_category_stat))
    avg_stat = sum(player_category_stat) / float(len(player_category_stat))
    return avg_stat

#avg_games = calc_category_average(hof_df, 'G')
avg_pts = calc_category_average(hof_df, 'PTS')
avg_trb = calc_category_average(hof_df, 'TRB')
avg_ast = calc_category_average(hof_df, 'AST')
avg_stl = calc_category_average(hof_df, 'STL')
avg_blk = calc_category_average(hof_df, 'BLK')

avg_stats = [avg_pts, avg_trb, avg_ast, avg_stl, avg_blk]


avg_stats = [hof_points, hof_rebounds, hof_assists, hof_steals, hof_blocks]

def get_player_stats(hof_df, player_name):
    for i, row in hof_df.iterrows():
        if row['Name'] == player_name:
            return [float(row['PTS']), float(row['TRB']), float(row['AST']), float(row['STL']), float(row['BLK'])]


# Back to comparisons with today's players
avg_3pt_hof = calc_category_average(hof_df,'3P%')
avg_3pt_curr = calc_category_average(nba2021_df, '3P%')
fig = plt.figure(figsize = (4, 4))
plt.bar(['Average HOF', 'Average Modern Player'], [avg_3pt_hof, avg_3pt_curr], color =['crimson','blue'],width = 0.8)

plt.xlabel("Player Type")
plt.ylabel("3P Percentage")
plt.title("Avg. HOF Player vs. Avg. Modern Player (3P%) ")
plt.show()


avg_blk_hof = calc_category_average(hof_df, 'BLK')
avg_blk_curr = calc_category_average(nba2021_df, 'BLK')

fig = plt.figure(figsize = (4, 4))
plt.bar(['Average HOF', 'Average Modern Player'], [avg_blk_hof, avg_blk_curr], color =['crimson','blue'],width = 0.8)

plt.xlabel("Player Type")
plt.ylabel("Blocks per game")
plt.title("Avg. HOF Player vs. Avg. Modern Player (Blocks)")
plt.show()


pts = np.array(hof_df['PTS']).reshape(-1,1)
ast = np.array(hof_df['AST']).reshape(-1,1)
model = linear_model.LinearRegression()
model.fit(pts, ast)
expected_ast = model.predict(pts)
plt.scatter(pts, ast, label='HOF stats',color='crimson')                           
plt.plot(pts, expected_ast, color = 'maroon', label='HOF player')                    
r1 = np.corrcoef(hof_df['PTS'], hof_df['AST'])
#2021 NBA players
pts = []
ast = []
for i,row in nba2021_df.iterrows():
    # Reject nba players with less than 10 PPG because those aren't players that we should consider against HOF players since 
    # they are not high scoring enough players to consider(<10 PPG is low scoring).
    if float(row['AST']) < 0.0 or np.isnan(row['AST']) or float(row['PTS']) < 10.0 or np.isnan(row['PTS']):
        continue
    pts.append(float(row['PTS']))
    ast.append(float(row['AST']))

r2 = np.corrcoef(pts, ast)
pts = np.array(pts).reshape(-1,1)
ast = np.array(ast).reshape(-1,1)
model = linear_model.LinearRegression()
model.fit(pts, ast)
expected_ast = model.predict(pts)
plt.scatter(pts, ast, label='2021 stats')                           
plt.plot(pts, expected_ast, color = 'navy', label='2021 Player')

plt.title('PPG and APG Relationship among HOF and 2021 players')          
plt.xlabel('Points Per Game')                                   
plt.ylabel('Assists Per Game') 
plt.legend()

<matplotlib.legend.Legend at 0x7fb03feb06d0>


print(r1) #R = .09 , indicating a weak correlation between PPG and APG among HOF players
print(r2) #R = 0.59, indicating a stronger correlation between PPG and APG among current nba players

[[1.         0.10611409]
 [0.10611409 1.        ]]
[[1.         0.57572122]
 [0.57572122 1.        ]]


kobe_stats = get_player_stats(hof_df, 'Kobe Bryant')
categories = ['PTS', 'TRB', 'AST', 'STL', 'BLK']

for i in range(len(categories)):
    fig = plt.figure(figsize=(3, 3))
    plt.bar(['Average', 'Kobe'], [avg_stats[i], kobe_stats[i]], color =['#fdb927', '#552583'],width = 0.6)
    
    plt.xlabel("Player")
    plt.ylabel("Value")
    plt.title("Average HOF Player vs. Kobe Bryant in " + str(categories[i]))
    plt.show()


nba_df = pd.read_csv('all_seasons.csv')
nba_df.head()


# first we have to average stats for players who played multiple seasons, since the data is per season

# some stats are permanent (e.g. height, name, etc.). these on the other hand, vary by season:
stats = ['gp', 'pts', 'reb', 'ast', 'net_rating', 'oreb_pct', 'dreb_pct', 'usg_pct', 'ts_pct', 'ast_pct']

# each group is effectively one player's career stats, tabulated
players = nba_df.groupby('player_name')

# you can try Pandas' aggregate feature here, but we wanted to reduce rows in the existing table based on a subset of the columns, rather than creating an entirely new table 

# Goal: for all players that played multiple seasons (in the DF more than once, make it so that they're only in the DF once, and their stats are their career averages)
mult_seasons = []
first_season = {}   # slight hack so we only calculate each player's mean values once

# Go through every player, mark whoever plays >1 season
for player, player_df in players:
    if len(player_df.index) > 1:
        mult_seasons.append(player)
    for i, r in player_df.iterrows():
        first_season[player] = r.season
        break

# Go through main DF now (instead of the groupby object) and keep only one row per duplicative player to ensure uniqueness + averaging of career stats
for idx, row in nba_df.iterrows():
    # if a player played for multiple seasons, we only want to keep their avg stats
    player = row.player_name
    season = row.season
    if player in mult_seasons:
        if first_season[player] == season: 
            # calculate the mean, this should always be true at the first iteration since we iterate in order, chronologically through the years
            player_df = nba_df[nba_df['player_name'] == player]
            n = player_df.shape[0]            
            for stat in stats:
                nba_df.at[idx, stat] = sum(player_df[stat])/n
        else:
            nba_df = nba_df.drop([idx])

nba_df = nba_df.drop(['season'], axis=1)
nba_df.head()


# now that it's only unique players, lets see if theyre in the HOF or not

nba_df['hof'] = nba_df['player_name'].apply(lambda x: True if hof_df[hof_df['Name'] == x].shape[0] > 0 else False)

# rename some of the columns so that they are consistent with hof_df. This is important for our methods to work
non_hof_df = nba_df[nba_df['hof'] == False]
non_hof_df = non_hof_df.rename(columns={'player_name': 'Name', 'pts': "PTS", 'reb':"TRB", "ast": "AST"})


non_hof_df.head()


# We now have 2,201 non hall of famers in the non_hof_df and 134 hall of famers in the hof_df set
jose = non_hof_df[non_hof_df["Name"] == "Jose Calderon"] #store Jose Calderon's data for later


# Cleaning and preparing the non_hof_df

# Remove undrafted players from data set. We do this because we need ways of filtering players based upon their playing status.

index_names = non_hof_df[non_hof_df['draft_year'] == "Undrafted"].index
non_hof_df.drop(index_names, inplace = True)

# convert draft year values to numeric
non_hof_df["draft_year"] = pd.to_numeric(pd.Series(non_hof_df["draft_year"])) 

# Since there could be potential hall of famers still paying today, we need players in our non hall of fame training
# set to be players who can't possibly be playing today. Vince Carter was the oldest player in the nba last year and
# he was drafted in 1998, so our non_hof_df will have to have players in the set who were drafted before '98, just 
# to be sure. 

# This now gives us 427 non hall of fame players who could not possibly be playing today


#store the players who have been drafted after 1998 in another data set, we will use this at the end
recent_players_df = non_hof_df[non_hof_df["draft_year"] >= 1998]

non_hof_df = non_hof_df[non_hof_df["draft_year"] < 1998]


# Modify the original method to only return points, rebounds, assists. This is because our non hof dataset doeesn't contain data for steals or blocks
def get_player_stats_v2(hof_df, player_name):
    for i, row in hof_df.iterrows():
        if row['Name'] == player_name:
            return [float(row['PTS']), float(row['TRB']), float(row['AST'])]


# Input: The DF, name of player, and a threshold representing the number of categories a player must be better than the average hall of famer at.
# Output: Returns True if player_name is adequate enough to be inducted into the hall of fame
def hofOrNah(player_name, threshold, df):
    playerStats = get_player_stats_v2(df, player_name)
    if playerStats == None:
        return False
    # iterate throgh stats and check if how many categories the given player is better than average at. This is their "score"
    score = 0
    if playerStats[0] > hof_points:
        score += 1
    if playerStats[1] > hof_rebounds:
        score += 1
    if playerStats[2] > hof_assists:
        score += 1

    #if they excel in enough categories over the threshold, return true.
    if score >= threshold:
        return True
    else: 
        return False


#Runs the training model on the hof and non-hof datasets and returns an accuracy percentagee for both classifications
#Input: The number of categories for the threshold (num_categories). Setting print_hof_results to true will display the output.
#Output: A tuple containing two floating point values representing the accuracies of hof and non_hof, respectively.
def train_model(num_categories, print_hof_results = False, print_non_hof_results = False):
    hof_count = 0
    non_hof_names = list(non_hof_df["Name"])
    hof_names = list(hof_df["Name"])

    # iterate through non hall of famers and classify them based on the category parameter
    if print_non_hof_results:
        print("\nHall of Fame Classifications for non Hall of Famers:")
    
    for i in range(int(len(non_hof_names)*1)):
        name = non_hof_names[i]
        if hofOrNah(name, num_categories, non_hof_df) == True:
            hof_count += 1
            if print_non_hof_results:
                print("YES", name, "should be a hall of famer")
        else:
            if print_non_hof_results:
                print("NO",name, "should not be a hall of famer")
    total = len(non_hof_df["Name"])
    accuracy = hof_count/total
    print("Accuracy of classifying non hall of famers:", 100 - (accuracy * 100), "%")
    non_hof_accuracy = 100 - (accuracy * 100)

    #iterate through hall of famers and classify them based on the category parameter
    hof_count = 0
    if print_hof_results:
        print("\nHall of Fame Classifications for Actual Hall of Famers:")

    for i in range(int(len(hof_names)*1)):
        name = hof_names[i]
        if hofOrNah(name, num_categories, hof_df) == True:
            hof_count += 1
            if print_hof_results:
                print("YES", name, "should be a hall of famer")
        else:
            if print_hof_results:
                print("NO",name, "should not be a hall of famer")

    total = len(hof_df["Name"])
    accuracy = hof_count/total
    
    print("Accuracy of classifying hall of famers:", accuracy * 100, "%")
    hof_accuracy = accuracy * 100

    return hof_accuracy, non_hof_accuracy


nhf_accuracy = []
hf_accuracy = []


#Threshold of 1
hf, nhf = train_model(1)
nhf_accuracy.append(nhf)
hf_accuracy.append(hf)

Accuracy of classifying non hall of famers: 92.70588235294117 %
Accuracy of classifying hall of famers: 85.71428571428571 %


#Threshold of 2
hf, nhf = train_model(2)
nhf_accuracy.append(nhf)
hf_accuracy.append(hf)

Accuracy of classifying non hall of famers: 99.05882352941177 %
Accuracy of classifying hall of famers: 45.0 %


#Threshold of 3
hf, nhf = train_model(3)
nhf_accuracy.append(nhf)
hf_accuracy.append(hf)

Accuracy of classifying non hall of famers: 100.0 %
Accuracy of classifying hall of famers: 5.714285714285714 %


#plot the figures
plt.figure(figsize=(14, 8))
plt.subplot(1,2,1)
plt.title("Accuracy of non hall of famer classifications as threshold increases")
plt.bar(range(1,4), nhf_accuracy)
plt.ylim(0, 100)
plt.xlabel("Threshold required")
plt.ylabel("Accuracy Percentage")

plt.subplot(1, 2, 2)
plt.title("Accuracy of hall of famer classifications as threshold increases")
plt.bar(range(1,4), hf_accuracy)
plt.xlabel("Threshold required")
plt.ylabel("Accuracy Percentage")
plt.ylim(0, 100)

(0.0, 100.0)


train_model(1, print_hof_results = True)

Accuracy of classifying non hall of famers: 92.70588235294117 %

Hall of Fame Classifications for Actual Hall of Famers:
YES George Mikan should be a hall of famer
NO Ed Macauley should not be a hall of famer
YES Andy Phillip should be a hall of famer
YES Bob Davies should be a hall of famer
YES Bob Cousy should be a hall of famer
YES Bob Pettit should be a hall of famer
YES Dolph Schayes should be a hall of famer
YES Bill Russell should be a hall of famer
YES Tom Gola should be a hall of famer
YES Bill Sharman should be a hall of famer
YES Elgin Baylor should be a hall of famer
YES Jim Pollard should be a hall of famer
NO Cliff Hagan should not be a hall of famer
NO Joe Fulks should not be a hall of famer
YES Paul Arizin should be a hall of famer
YES Wilt Chamberlain should be a hall of famer
YES Jerry Lucas should be a hall of famer
YES Oscar Robertson should be a hall of famer
YES Jerry West should be a hall of famer
NO Frank Ramsey should not be a hall of famer
YES Slater Martin should be a hall of famer
YES Hal Greer should be a hall of famer
YES Willis Reed should be a hall of famer
YES Jack Twyman should be a hall of famer
YES Dave DeBusschere should be a hall of famer
NO Bill Bradley should not be a hall of famer
NO Sam Jones should not be a hall of famer
YES John Havlicek should be a hall of famer
NO Al Cervi should not be a hall of famer
YES Nate Thurmond should be a hall of famer
YES Billy Cunningham should be a hall of famer
YES Tom Heinsohn should be a hall of famer
NO Bobby Wanzer should not be a hall of famer
YES Pete Maravich should be a hall of famer
NO Bob Houbregs should not be a hall of famer
YES Walt Frazier should be a hall of famer
YES Rick Barry should be a hall of famer
YES Wes Unseld should be a hall of famer
YES Clyde Lovellette should be a hall of famer
YES K.C. Jones should be a hall of famer
YES Lenny Wilkens should be a hall of famer
YES Earl Monroe should be a hall of famer
YES Neil Johnston should be a hall of famer
YES Elvin Hayes should be a hall of famer
YES Dave Bing should be a hall of famer
YES Harry Gallatin should be a hall of famer
YES Dave Cowens should be a hall of famer
YES Tiny Archibald should be a hall of famer
YES Bob Lanier should be a hall of famer
YES Connie Hawkins should be a hall of famer
YES Bill Walton should be a hall of famer
YES Calvin Murphy should be a hall of famer
YES Dick McGuire should be a hall of famer
YES Dan Issel should be a hall of famer
YES Julius Erving should be a hall of famer
YES Walt Bellamy should be a hall of famer
NO Buddy Jeannette should not be a hall of famer
YES Vern Mikkelsen should be a hall of famer
YES Kareem Abdul-Jabbar should be a hall of famer
YES David Thompson should be a hall of famer
YES Gail Goodrich should be a hall of famer
YES George Gervin should be a hall of famer
YES George Yardley should be a hall of famer
YES Bailey Howell should be a hall of famer
YES Alex English should be a hall of famer
YES Arnie Risen should be a hall of famer
YES Larry Bird should be a hall of famer
YES Kevin McHale should be a hall of famer
YES Isiah Thomas should be a hall of famer
YES Bob McAdoo should be a hall of famer
YES Moses Malone should be a hall of famer
NO Drazen Petrovic should not be a hall of famer
YES Magic Johnson should be a hall of famer
NO James Worthy should not be a hall of famer
YES Robert Parish should be a hall of famer
YES Maurice Stokes should be a hall of famer
YES Clyde Drexler should be a hall of famer
YES Charles Barkley should be a hall of famer
YES Joe Dumars should be a hall of famer
YES Dominique Wilkins should be a hall of famer
YES Hakeem Olajuwon should be a hall of famer
YES Patrick Ewing should be a hall of famer
YES Adrian Dantley should be a hall of famer
YES John Stockton should be a hall of famer
YES David Robinson should be a hall of famer
YES Michael Jordan should be a hall of famer
YES Scottie Pippen should be a hall of famer
YES Karl Malone should be a hall of famer
YES Gus Johnson should be a hall of famer
YES Dennis Johnson should be a hall of famer
YES Artis Gilmore should be a hall of famer
YES Chris Mullin should be a hall of famer
NO Arvydas Sabonis should not be a hall of famer
YES Dennis Rodman should be a hall of famer
NO Jamaal Wilkes should not be a hall of famer
YES Chet Walker should be a hall of famer
YES Ralph Sampson should be a hall of famer
YES Reggie Miller should be a hall of famer
YES Mel Daniels should be a hall of famer
YES Roger Brown should be a hall of famer
YES Richie Guerin should be a hall of famer
YES Gary Payton should be a hall of famer
YES Bernard King should be a hall of famer
YES Mitch Richmond should be a hall of famer
YES Alonzo Mourning should be a hall of famer
NO Sarunas Marciulionis should not be a hall of famer
YES Guy Rodgers should be a hall of famer
YES Jo Jo White should be a hall of famer
YES Dikembe Mutombo should be a hall of famer
YES Spencer Haywood should be a hall of famer
YES Louie Dampier should be a hall of famer
YES Zelmo Beaty should be a hall of famer
YES Allen Iverson should be a hall of famer
YES Yao Ming should be a hall of famer
YES Shaquille O'Neal should be a hall of famer
YES Tracy McGrady should be a hall of famer
YES George McGinnis should be a hall of famer
YES Ray Allen should be a hall of famer
YES Maurice Cheeks should be a hall of famer
YES Grant Hill should be a hall of famer
YES Steve Nash should be a hall of famer
YES Dino Radja should be a hall of famer
YES Charlie Scott should be a hall of famer
YES Jason Kidd should be a hall of famer
YES Paul Westphal should be a hall of famer
YES Jack Sikma should be a hall of famer
NO Sidney Moncrief should not be a hall of famer
NO Bobby Jones should not be a hall of famer
YES Vlade Divac should be a hall of famer
NO Charles “Chuck” Cooper should not be a hall of famer
NO Carl Braun should not be a hall of famer
YES Kobe Bryant should be a hall of famer
YES Kevin Garnett should be a hall of famer
YES Tim Duncan should be a hall of famer
YES Chris Webber should be a hall of famer
YES Ben Wallace should be a hall of famer
YES Paul Pierce should be a hall of famer
NO Toni Kukoc should not be a hall of famer
YES Bob Dandridge should be a hall of famer
YES Chris Bosh should be a hall of famer
Accuracy of classifying hall of famers: 85.71428571428571 %

(85.71428571428571, 92.70588235294117)


current_player_names = list(nba2021_df["Player"])
predicted_hall_of_famers = []
for i in range(int(len(current_player_names)*1)):
        name = current_player_names[i]
        if hofOrNah(name, 1, recent_players_df) == True:
            print("YES based on his career averages so far,", name, "will be a hall of famer")
            predicted_hall_of_famers.append(name)

YES based on his career averages so far, Steven Adams will be a hall of famer
YES based on his career averages so far, Bam Adebayo will be a hall of famer
YES based on his career averages so far, LaMarcus Aldridge will be a hall of famer
YES based on his career averages so far, Jarrett Allen will be a hall of famer
YES based on his career averages so far, Giannis Antetokounmpo will be a hall of famer
YES based on his career averages so far, Carmelo Anthony will be a hall of famer
YES based on his career averages so far, D.J. Augustin will be a hall of famer
YES based on his career averages so far, Deandre Ayton will be a hall of famer
YES based on his career averages so far, Lonzo Ball will be a hall of famer
YES based on his career averages so far, Bradley Beal will be a hall of famer
YES based on his career averages so far, Eric Bledsoe will be a hall of famer
YES based on his career averages so far, Devin Booker will be a hall of famer
YES based on his career averages so far, Malcolm Brogdon will be a hall of famer
YES based on his career averages so far, Clint Capela will be a hall of famer
YES based on his career averages so far, Wendell Carter Jr. will be a hall of famer
YES based on his career averages so far, Michael Carter-Williams will be a hall of famer
YES based on his career averages so far, John Collins will be a hall of famer
YES based on his career averages so far, Mike Conley will be a hall of famer
YES based on his career averages so far, DeMarcus Cousins will be a hall of famer
YES based on his career averages so far, Stephen Curry will be a hall of famer
YES based on his career averages so far, Anthony Davis will be a hall of famer
YES based on his career averages so far, DeMar DeRozan will be a hall of famer
YES based on his career averages so far, Spencer Dinwiddie will be a hall of famer
YES based on his career averages so far, Andre Drummond will be a hall of famer
YES based on his career averages so far, Kevin Durant will be a hall of famer
YES based on his career averages so far, Joel Embiid will be a hall of famer
YES based on his career averages so far, De'Aaron Fox will be a hall of famer
YES based on his career averages so far, Markelle Fultz will be a hall of famer
YES based on his career averages so far, Darius Garland will be a hall of famer
YES based on his career averages so far, Paul George will be a hall of famer
YES based on his career averages so far, Rudy Gobert will be a hall of famer
YES based on his career averages so far, Devonte' Graham will be a hall of famer
YES based on his career averages so far, Draymond Green will be a hall of famer
YES based on his career averages so far, Blake Griffin will be a hall of famer
YES based on his career averages so far, James Harden will be a hall of famer
YES based on his career averages so far, Jrue Holiday will be a hall of famer
YES based on his career averages so far, Al Horford will be a hall of famer
YES based on his career averages so far, Dwight Howard will be a hall of famer
YES based on his career averages so far, Andre Iguodala will be a hall of famer
YES based on his career averages so far, Kyrie Irving will be a hall of famer
YES based on his career averages so far, Reggie Jackson will be a hall of famer
YES based on his career averages so far, LeBron James will be a hall of famer
YES based on his career averages so far, DeAndre Jordan will be a hall of famer
YES based on his career averages so far, Zach LaVine will be a hall of famer
YES based on his career averages so far, Kawhi Leonard will be a hall of famer
YES based on his career averages so far, Damian Lillard will be a hall of famer
YES based on his career averages so far, Kevin Love will be a hall of famer
YES based on his career averages so far, Kyle Lowry will be a hall of famer
YES based on his career averages so far, Donovan Mitchell will be a hall of famer
YES based on his career averages so far, Ja Morant will be a hall of famer
YES based on his career averages so far, Jamal Murray will be a hall of famer
YES based on his career averages so far, Victor Oladipo will be a hall of famer
YES based on his career averages so far, Chris Paul will be a hall of famer
YES based on his career averages so far, Elfrid Payton will be a hall of famer
YES based on his career averages so far, Rajon Rondo will be a hall of famer
YES based on his career averages so far, Derrick Rose will be a hall of famer
YES based on his career averages so far, Ricky Rubio will be a hall of famer
YES based on his career averages so far, D'Angelo Russell will be a hall of famer
YES based on his career averages so far, Domantas Sabonis will be a hall of famer
YES based on his career averages so far, Collin Sexton will be a hall of famer
YES based on his career averages so far, Ben Simmons will be a hall of famer
YES based on his career averages so far, Marcus Smart will be a hall of famer
YES based on his career averages so far, Dennis Smith Jr. will be a hall of famer
YES based on his career averages so far, Jeff Teague will be a hall of famer
YES based on his career averages so far, Tristan Thompson will be a hall of famer
YES based on his career averages so far, Karl-Anthony Towns will be a hall of famer
YES based on his career averages so far, Kemba Walker will be a hall of famer
YES based on his career averages so far, John Wall will be a hall of famer
YES based on his career averages so far, T.J. Warren will be a hall of famer
YES based on his career averages so far, Russell Westbrook will be a hall of famer
YES based on his career averages so far, Hassan Whiteside will be a hall of famer
YES based on his career averages so far, Andrew Wiggins will be a hall of famer
YES based on his career averages so far, Zion Williamson will be a hall of famer
YES based on his career averages so far, Trae Young will be a hall of famer


current_player_names = list(nba2021_df["Player"])
predicted_hall_of_famers = []
for i in range(int(len(current_player_names)*1)):
        name = current_player_names[i]
        if hofOrNah(name, 2, recent_players_df) == True:
            print("YES based on his career averages so far,", name, "will be a hall of famer")
            predicted_hall_of_famers.append(name)

YES based on his career averages so far, LaMarcus Aldridge will be a hall of famer
YES based on his career averages so far, Giannis Antetokounmpo will be a hall of famer
YES based on his career averages so far, Bradley Beal will be a hall of famer
YES based on his career averages so far, Devin Booker will be a hall of famer
YES based on his career averages so far, DeMarcus Cousins will be a hall of famer
YES based on his career averages so far, Stephen Curry will be a hall of famer
YES based on his career averages so far, Anthony Davis will be a hall of famer
YES based on his career averages so far, Kevin Durant will be a hall of famer
YES based on his career averages so far, Joel Embiid will be a hall of famer
YES based on his career averages so far, Blake Griffin will be a hall of famer
YES based on his career averages so far, James Harden will be a hall of famer
YES based on his career averages so far, Kyrie Irving will be a hall of famer
YES based on his career averages so far, LeBron James will be a hall of famer
YES based on his career averages so far, Damian Lillard will be a hall of famer
YES based on his career averages so far, Kevin Love will be a hall of famer
YES based on his career averages so far, Donovan Mitchell will be a hall of famer
YES based on his career averages so far, Chris Paul will be a hall of famer
YES based on his career averages so far, Derrick Rose will be a hall of famer
YES based on his career averages so far, Ben Simmons will be a hall of famer
YES based on his career averages so far, Karl-Anthony Towns will be a hall of famer
YES based on his career averages so far, Kemba Walker will be a hall of famer
YES based on his career averages so far, John Wall will be a hall of famer
YES based on his career averages so far, Russell Westbrook will be a hall of famer
YES based on his career averages so far, Trae Young will be a hall of famer


name = "Jose Calderon"
if hofOrNah(name, 1, recent_players_df) == True:
    print("YES based on his career averages so far,", name, "will be a hall of famer")
else:
    print("NO", name, "will not be a hall of famer")

NO Jose Calderon will not be a hall of famer


jose_stats = get_player_stats_v2(jose, "Jose Calderon")
print(jose_stats)

[8.457142857142857, 2.3785714285714286, 5.528571428571427]


plt.figure(figsize=(12,5))
plt.subplot(1, 3,1)
plt.bar(["Average Hall of Famer","Jose Calderon"], [hof_points, jose_stats[0]],color=colors)
plt.title("Points")

plt.subplot(1, 3, 2)
plt.bar(["Average Hall of Famer","Jose Calderon"], [hof_assists, jose_stats[1]],color=colors)
plt.title("Assists")

plt.subplot(1, 3, 3)
plt.bar(["Average Hall of Famer","Jose Calderon"], [hof_rebounds, jose_stats[2]],color=colors)
plt.title("Rebounds")

Text(0.5, 1.0, 'Rebounds')


hof_df


plt.figure(figsize=(33,10))
plt.bar(hof_df["Name"][113:133], hof_df["PTS"][113:133], color = colors)
plt.title("Hall of Famer's Points Per Game Averages")
plt.savefig("ppg_plot")

	Player	Pos	Age	Tm	G	GS	MP	FG	FGA	FG%	...	FT%	ORB	DRB	TRB	AST	STL	BLK	TOV	PF	PTS
0	Precious Achiuwa	PF	21	MIA	28	2	14.6	2.6	4.4	0.590	...	0.561	1.3	2.7	4.0	0.6	0.4	0.5	1.0	1.9	6.5
1	Jaylen Adams	PG	24	MIL	6	0	2.8	0.2	1.3	0.125	...	0.000	0.0	0.5	0.5	0.3	0.0	0.0	0.0	0.2	0.3
2	Steven Adams	C	27	NOP	27	27	28.1	3.5	5.8	0.603	...	0.468	4.3	4.6	8.9	2.1	1.0	0.6	1.7	1.9	8.0
3	Bam Adebayo	C	23	MIA	26	26	33.6	7.4	12.9	0.573	...	0.841	1.9	7.3	9.2	5.3	1.0	1.0	3.0	2.6	19.9
4	LaMarcus Aldridge	C	35	SAS	18	18	26.7	5.9	12.5	0.476	...	0.762	0.8	3.5	4.3	1.9	0.4	0.9	0.9	1.5	14.1

	Year	Name	Category	PTS	TRB	AST	STL	BLK	FG%	3P%	FT%	WS	WS/48
547	1959	George Mikan	Player	23.1	13.4	2.8	NaN	NaN	.404	NaN	.782	108.7	.249
530	1960	Ed Macauley	Player	17.5	7.5	3.2	NaN	NaN	.436	NaN	.761	100.4	.196
513	1961	Andy Phillip	Player	9.1	4.4	5.4	NaN	NaN	.368	NaN	.695	60.8	.101
458	1970	Bob Davies	Player	14.3	2.9	4.9	NaN	NaN	.378	NaN	.759	49.7	.148
452	1971	Bob Cousy	Player	18.4	5.2	7.5	NaN	NaN	.375	NaN	.803	91.1	.139

	Unnamed: 0	player_name	team_abbreviation	age	player_height	player_weight	college	country	draft_year	draft_round	...	pts	reb	ast	net_rating	oreb_pct	dreb_pct	usg_pct	ts_pct	ast_pct	season
0	0	Dennis Rodman	CHI	36.0	198.12	99.790240	Southeastern Oklahoma State	USA	1986	2	...	5.7	16.1	3.1	16.1	0.186	0.323	0.100	0.479	0.113	1996-97
1	1	Dwayne Schintzius	LAC	28.0	215.90	117.933920	Florida	USA	1990	1	...	2.3	1.5	0.3	12.3	0.078	0.151	0.175	0.430	0.048	1996-97
2	2	Earl Cureton	TOR	39.0	205.74	95.254320	Detroit Mercy	USA	1979	3	...	0.8	1.0	0.4	-2.1	0.105	0.102	0.103	0.376	0.148	1996-97
3	3	Ed O'Bannon	DAL	24.0	203.20	100.697424	UCLA	USA	1995	1	...	3.7	2.3	0.6	-8.7	0.060	0.149	0.167	0.399	0.077	1996-97
4	4	Ed Pinckney	MIA	34.0	205.74	108.862080	Villanova	USA	1985	1	...	2.4	2.4	0.2	-11.2	0.109	0.179	0.127	0.611	0.040	1996-97

	Unnamed: 0	player_name	team_abbreviation	age	player_height	player_weight	college	country	draft_year	draft_round	...	gp	pts	reb	ast	net_rating	oreb_pct	dreb_pct	usg_pct	ts_pct	ast_pct
0	0	Dennis Rodman	CHI	36.0	198.12	99.790240	Southeastern Oklahoma State	USA	1986	2	...	42	3.825	14.15	2.125	3.575	0.15125	0.33525	0.07925	0.44575	0.0835
1	1	Dwayne Schintzius	LAC	28.0	215.90	117.933920	Florida	USA	1990	1	...	15	1.500	1.35	0.400	-8.450	0.08950	0.16350	0.17500	0.37000	0.1240
2	2	Earl Cureton	TOR	39.0	205.74	95.254320	Detroit Mercy	USA	1979	3	...	9	0.800	1.00	0.400	-2.100	0.10500	0.10200	0.10300	0.37600	0.1480
3	3	Ed O'Bannon	DAL	24.0	203.20	100.697424	UCLA	USA	1995	1	...	64	3.700	2.30	0.600	-8.700	0.06000	0.14900	0.16700	0.39900	0.0770
4	4	Ed Pinckney	MIA	34.0	205.74	108.862080	Villanova	USA	1985	1	...	27	2.400	2.40	0.200	-11.200	0.10900	0.17900	0.12700	0.61100	0.0400

	Unnamed: 0	Name	team_abbreviation	age	player_height	player_weight	college	country	draft_year	draft_round	...	PTS	TRB	AST	net_rating	oreb_pct	dreb_pct	usg_pct	ts_pct	ast_pct	hof
1	1	Dwayne Schintzius	LAC	28.0	215.90	117.933920	Florida	USA	1990	1	...	1.500000	1.35	0.400000	-8.450000	0.0895	0.163500	0.175	0.370	0.124000	False
2	2	Earl Cureton	TOR	39.0	205.74	95.254320	Detroit Mercy	USA	1979	3	...	0.800000	1.00	0.400000	-2.100000	0.1050	0.102000	0.103	0.376	0.148000	False
3	3	Ed O'Bannon	DAL	24.0	203.20	100.697424	UCLA	USA	1995	1	...	3.700000	2.30	0.600000	-8.700000	0.0600	0.149000	0.167	0.399	0.077000	False
4	4	Ed Pinckney	MIA	34.0	205.74	108.862080	Villanova	USA	1985	1	...	2.400000	2.40	0.200000	-11.200000	0.1090	0.179000	0.127	0.611	0.040000	False
5	5	Eddie Johnson	HOU	38.0	200.66	97.522280	Illinois	USA	1981	2	...	6.866667	1.80	0.833333	-6.866667	0.0250	0.121333	0.238	0.509	0.089333	False

What's in a Name: NBA Hall Of Fame¶

A data science project by Surmud Jamil, Sinaan Younus, & Ritesh Verma¶

First, let's collect our data.¶

Data Parsing¶

Data Collection¶

How have the stats of Hall of Fame inductees changed over the years?¶

Let's focus on points for a second.¶

How do Hall of Famers compare with NBA players today?¶

Your Average Hall of Famer vs. Your Average player today¶

Twice the Power¶

Creating functions¶

How has the 3 point shot changed throughout the history of the NBA? Are today's players better shooters?¶

Today's players have more accuracy¶

Are hall of famers more physical and defensive than today's players?¶

Hall of famer's are more physical¶

How selfish does a HOF player have to be?¶

Let's now zone in on a comparison entirely within the group of Hall of Fame players.¶

How does Kobe Compare?¶

Machine Learning¶

How can we determine whether a player should be inducted?¶

Trying out the model¶

Plotting the accuracy as the threshold parameter changes¶

Training Results¶

Let's see which of the actual hall of famers are classified into the hall of fame, according to our model.¶

How did it do?¶

Which of Today's NBA players will make it to the Hall of Fame?¶

Which list do you agree with?¶

Exploratory Analysis: Is Jose Calderon a Hall of Famer?¶

Jose Calderon vs The Average Hall of Famer¶

Jose Calderon Findings¶

Conclusion¶

What's in a Name: NBA Hall Of Fame¶

A data science project by Surmud Jamil, Sinaan Younus, & Ritesh Verma¶

First, let's collect our data.¶

Data Parsing¶

Data Collection¶

How have the stats of Hall of Fame inductees changed over the years?¶

Let's focus on points for a second.¶

How do Hall of Famers compare with NBA players today?¶

Your Average Hall of Famer vs. Your Average player today¶

Twice the Power¶

Creating functions¶

How has the 3 point shot changed throughout the history of the NBA? Are today's players better shooters?¶

Today's players have more accuracy¶

Are hall of famers more physical and defensive than today's players?¶

Hall of famer's are more physical¶

How selfish does a HOF player have to be?¶

Sharing is caring (nowadays at least)¶

Let's now zone in on a comparison entirely within the group of Hall of Fame players.¶

How does Kobe Compare?¶

Machine Learning¶

How can we determine whether a player should be inducted?¶

Trying out the model¶

Plotting the accuracy as the threshold parameter changes¶

Training Results¶

Let's see which of the actual hall of famers are classified into the hall of fame, according to our model.¶

How did it do?¶

Which of Today's NBA players will make it to the Hall of Fame?¶

Which list do you agree with?¶

Exploratory Analysis: Is Jose Calderon a Hall of Famer?¶

Jose Calderon vs The Average Hall of Famer¶

Jose Calderon Findings¶

Conclusion¶