!pip install billboard.py pandas spotipy


# data collection
import billboard
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import re
import urllib.parse
import pandas as pd


# visualization
import matplotlib.pyplot as plt
import seaborn as sns

# hypothesis testing
from scipy.stats import pearsonr

# machine learning
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error


def get_year_end_songs(year):
    chart_name = 'hot-100-songs'
    chart = billboard.ChartData(f'{chart_name}', year=year)
    songs = [{"title": song.title, "artist": song.artist, "year": year} for song in chart]
    return songs


import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

client_id = '92b8fcb838ea4e97b6a8ebb14b8b7ef4'
client_secret = 'Secret_Key'

client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)


def clean_artist_name(artist):
    pattern = re.compile(r' Featuring | x | X | & | \+ | \/ |, | With | Duet With ')
    primary_artist = re.split(pattern, artist)[0].strip()
    return primary_artist

def add_genres_to_songs(songs):
    for song in songs:
        # Escaping characters for URL
        song_title = urllib.parse.quote(song['title'])

        cleaned_artist = clean_artist_name(song['artist'])
        results = sp.search(q=f"track:{song_title} artist:{cleaned_artist}", type="track", limit=1)

        if results["tracks"]["items"]:
            track = results["tracks"]["items"][0]
            artist_id = track["artists"][0]["id"]
            artist_details = sp.artist(artist_id)

            song["genre"] = ", ".join(artist_details["genres"])
        else:
            song["genre"] = "Unknown"
    return songs


years = range(2006, 2023)
all_songs = []

for year in years:
    songs_for_year = get_year_end_songs(year)
    all_songs.extend(add_genres_to_songs(songs_for_year))

df_all_songs_initial = pd.DataFrame(all_songs)
df_all_songs_initial.head()


filename = "billboard_songs.xlsx"
df_all_songs_initial.to_excel(filename, index=False, engine='openpyxl')


url = "https://raw.githubusercontent.com/MinJi-Kim710/data/main/billboard_songs.xlsx%20-%20Sheet1.csv"
df_all_songs = pd.read_csv(url)

df_all_songs.head()


def split_artists(artist):
    delimiters = [" & ", " x ", " X ", " With ", " Featuring ", " Duet With ", " + ", "/", ","]
    for delimiter in delimiters:
        if delimiter in artist:
            return [artist.strip() for artist in artist.split(delimiter)]
    return [artist]  # if no delimiter is found, return the artist string as a list


df_all_songs['split_artist'] = df_all_songs['artist'].apply(split_artists)

# Explode this new column to have separate rows for each artist
df_split_artist = df_all_songs.explode('split_artist')
df_split_artist.head()


# Convert the "genre" column into a list of genres
df_all_songs['genre'] = df_all_songs['genre'].str.split(', ')

# Explode the "genre" column to create separate rows for each genre
df_split_genre = df_all_songs.explode('genre')
df_split_genre.head()


def get_mood(artist, track):
    results = sp.search(q=f'track:{track} artist:{artist}', type="track", limit=1)

    # Check if results were returned
    if not results['tracks']['items']:
        return None

    track_id = results['tracks']['items'][0]['id']

    # Fetch audio features
    audio_features = sp.audio_features(track_id)[0]

    if not audio_features:
        return None

    valence = audio_features.get('valence')
    if valence is None:
        return None

    # Classify mood based on valence
    if valence >= 0.7:
        return "Happy"
    elif 0.4 <= valence < 0.7:
        return "Neutral"
    else:
        return "Sad"


# Add a mood column
df_split_artist['mood'] = df_split_artist.apply(lambda row: get_mood(row['split_artist'], row['title']), axis=1)

# Remove duplicates based on 'title' column
df_cleaned = df_split_artist.drop_duplicates(subset='title', keep='first')
df_cleaned.head()


filename = "billboard_songs_with_mood.xlsx"
df_cleaned.to_excel(filename, index=False, engine='openpyxl')


url = "https://raw.githubusercontent.com/MinJi-Kim710/data/main/billboard_songs_with_mood.csv"
df_music = pd.read_csv(url)

df_music.head()


url = "https://raw.githubusercontent.com/MinJi-Kim710/data/main/USA_GDP_Unemployment.csv"
df_economy = pd.read_csv(url)

df_economy.head()


# List all genres
exploded_genres = df_split_genre['genre'].str.split(', ').explode()
unique_genres = exploded_genres.unique()
genre_list = list(unique_genres)
genre_list.sort()
print(genre_list)

['acoustic pop', 'adult standards', 'afro r&b', 'afrobeats', 'afrofuturism', 'afropop', 'alabama rap', 'alt z', 'alternative dance', 'alternative hip hop', 'alternative metal', 'alternative r&b', 'alternative rock', 'american folk revival', 'arkansas hip hop', 'art pop', 'art rock', 'asian american hip hop', 'atl hip hop', 'atl trap', 'australian country', 'australian dance', 'australian electropop', 'australian hip hop', 'australian indie', 'australian pop', 'azonto', 'azontobeats', 'barbadian pop', 'baroque pop', 'baton rouge rap', 'bedroom pop', 'bedroom soul', 'big room', 'black americana', 'blues rock', 'boy band', 'british soul', 'bronx hip hop', 'brooklyn drill', 'brostep', 'cali rap', 'canadian contemporary r&b', 'canadian hip hop', 'canadian latin', 'canadian old school hip hop', 'canadian pop', 'canadian rock', 'canadian trap', 'candy pop', 'cartoon', 'ccm', 'celtic rock', 'chicago bop', 'chicago drill', 'chicago rap', 'chicano rap', "children's music", 'christian alternative rock', 'christian music', 'classic oklahoma country', 'classic rock', 'classic texas country', 'colombian pop', 'comedy rap', 'comic', 'complextro', 'conscious hip hop', 'contemporary country', 'contemporary r&b', 'country', 'country dawn', 'country pop', 'country rap', 'country road', 'country rock', 'crunk', 'dance pop', 'dance rock', 'dancehall', 'danish pop', 'deep dance pop', 'deep underground hip hop', 'detroit hip hop', 'dfw rap', 'dirty south rap', 'dong-yo', 'downtempo', 'drill', 'dutch edm', 'dutch pop', 'east coast hip hop', 'easy listening', 'edm', 'electro', 'electro house', 'electronic', 'electronic trap', 'electropop', 'electropowerpop', 'emo', 'emo rap', 'escape room', 'etherpop', 'eurodance', 'europop', 'fake', 'filter house', 'florida drill', 'florida rap', 'folk-pop', 'funk metal', 'funk rock', 'futuristic swag', 'g funk', 'gambian hip hop', 'gangster rap', 'gauze pop', 'gen z singer-songwriter', 'german techno', 'ghanaian hip hop', 'girl group', 'glam metal', 'glam rock', 'grime', 'hard rock', 'hardcore hip hop', 'harlem hip hop', 'heartland rock', 'hip hop', 'hip pop', 'hollywood', 'house', 'houston rap', 'hyphy', 'idol', 'indie pop rap', 'indie poptimism', 'indie r&b', 'indie rock', 'indie rock italiano', 'indie soul', 'indietronica', 'irish rock', 'irish singer-songwriter', 'italian pop', 'k-pop', 'k-pop boy group', 'k-rap', 'kentucky hip hop', 'korean old school hip hop', 'la indie', 'latin', 'latin hip hop', 'latin pop', 'latin viral pop', 'lds youth', 'lgbtq+ hip hop', 'lilith', 'lounge', 'lullaby', 'melbourne bounce international', 'mellow gold', 'melodic rap', 'memphis hip hop', 'metropopolis', 'mexican classic rock', 'mexican pop', 'miami hip hop', 'minnesota hip hop', 'mississippi hip hop', 'modern alternative pop', 'modern alternative rock', 'modern blues rock', 'modern country pop', 'modern country rock', 'modern folk rock', 'modern rock', 'modern uplift', 'moombahton', 'movie tunes', 'neo mellow', 'neo soul', 'neo-singer-songwriter', 'neo-synthpop', 'neon pop punk', 'new jersey rap', 'new jersey underground rap', 'new orleans rap', 'new rave', 'new wave pop', 'nigerian hip hop', 'nigerian pop', 'north carolina hip hop', 'nu metal', 'nyc rap', 'nz pop', 'oakland hip hop', 'ohio hip hop', 'old school atlanta hip hop', 'orchestral soundtrack', 'outlaw country', 'permanent wave', 'philly rap', 'piano rock', 'pinoy hip hop', 'pittsburgh rap', 'pixie', 'plugg', 'pluggnb', 'political hip hop', 'pop', 'pop dance', 'pop edm', 'pop emo', 'pop punk', 'pop r&b', 'pop rap', 'pop rock', 'pop soul', 'portland hip hop', 'post-grunge', 'post-teen pop', 'pov: indie', 'progressive electro house', 'progressive house', 'puerto rican pop', 'punk', 'queens hip hop', 'quiet storm', 'r&b', 'rage rap', 'rap', 'rap conscient', 'rap kreyol', 'rap latina', 'rap metal', 'reggae fusion', 'reggaeton', 'reggaeton colombiano', 'rhode island rap', 'rock', 'rockabilly', 'romanian house', 'romanian pop', 'sad lo-fi', 'sad rap', 'san marcos tx indie', 'scam rap', 'scandipop', 'scottish singer-songwriter', 'screamo', 'seattle hip hop', 'shiver pop', 'show tunes', 'singer-songwriter', 'singer-songwriter pop', 'slap house', 'sleaze rock', 'soul', 'soundtrack', 'south african rock', 'south carolina hip hop', 'southern hip hop', 'st louis rap', 'stomp and holler', 'stomp pop', 'swedish electropop', 'swedish pop', 'swedish synthpop', 'talent show', 'teen pop', 'tennessee hip hop', 'texas latin rap', 'trap', 'trap latino', 'trap queen', 'tropical house', 'uk americana', 'uk contemporary r&b', 'uk dance', 'uk funky', 'uk pop', 'underground hip hop', 'urban contemporary', 'urbano latino', 'viral pop', 'viral rap', 'viral trap', 'virgin islands reggae', 'west coast rap', 'wrestling']


genre_mapping = {
    'acoustic pop': 'Pop',
    'adult standards': 'Pop',
    'afro r&b': 'R&B',
    'afrobeats': 'World Music',
    'afrofuturism': 'World Music',
    'afropop': 'World Music',
    'alabama rap': 'Hip Hop',
    'alt z': 'Alternative',
    'alternative dance': 'Alternative',
    'alternative hip hop': 'Hip Hop',
    'alternative metal': 'Metal',
    'alternative r&b': 'R&B',
    'alternative rock': 'Rock',
    'american folk revival': 'Folk',
    'arkansas hip hop': 'Hip Hop',
    'art pop': 'Pop',
    'art rock': 'Rock',
    'asian american hip hop': 'Hip Hop',
    'atl hip hop': 'Hip Hop',
    'atl trap': 'Hip Hop',
    'australian country': 'Country',
    'australian dance': 'Dance',
    'australian electropop': 'Electronica',
    'australian hip hop': 'Hip Hop',
    'australian indie': 'Indie',
    'australian pop': 'Pop',
    'azonto': 'World Music',
    'azontobeats': 'World Music',
    'barbadian pop': 'Pop',
    'baroque pop': 'Pop',
    'baton rouge rap': 'Hip Hop',
    'bedroom pop': 'Pop',
    'bedroom soul': 'R&B',
    'big room': 'Electronica',
    'black americana': 'Americana',
    'blues rock': 'Rock',
    'boy band': 'Pop',
    'british soul': 'Soul',
    'bronx hip hop': 'Hip Hop',
    'brooklyn drill': 'Hip Hop',
    'brostep': 'Electronica',
    'cali rap': 'Hip Hop',
    'canadian contemporary r&b': 'R&B',
    'canadian hip hop': 'Hip Hop',
    'canadian latin': 'World Music',
    'canadian old school hip hop': 'Hip Hop',
    'canadian pop': 'Pop',
    'canadian rock': 'Rock',
    'canadian trap': 'Hip Hop',
    'candy pop': 'Pop',
    'cartoon': 'Miscellaneous',
    'ccm': 'Christian',
    'celtic rock': 'Rock',
    'chicago bop': 'Hip Hop',
    'chicago drill': 'Hip Hop',
    'chicago rap': 'Hip Hop',
    'chicano rap': 'Hip Hop',
    "children's music": 'Miscellaneous',
    'christian alternative rock': 'Christian',
    'christian music': 'Christian',
    'classic oklahoma country': 'Country',
    'classic rock': 'Rock',
    'classic texas country': 'Country',
    'colombian pop': 'Pop',
    'comedy rap': 'Hip Hop',
    'comic': 'Miscellaneous',
    'complextro': 'Electronica',
    'conscious hip hop': 'Hip Hop',
    'contemporary country': 'Country',
    'contemporary r&b': 'R&B',
    'country': 'Country',
    'country dawn': 'Country',
    'country pop': 'Country',
    'country rap': 'Hip Hop',
    'country road': 'Country',
    'country rock': 'Rock',
    'crunk': 'Hip Hop',
    'dance pop': 'Dance',
    'dance rock': 'Dance',
    'dancehall': 'World Music',
    'danish pop': 'Pop',
    'deep dance pop': 'Dance',
    'deep underground hip hop': 'Hip Hop',
    'detroit hip hop': 'Hip Hop',
    'dfw rap': 'Hip Hop',
    'dirty south rap': 'Hip Hop',
    'dong-yo': 'World Music',
    'downtempo': 'Electronica',
    'drill': 'Hip Hop',
    'dutch edm': 'Electronica',
    'dutch pop': 'Pop',
    'east coast hip hop': 'Hip Hop',
    'easy listening': 'Pop',
    'edm': 'Electronica',
    'electro': 'Electronica',
    'electro house': 'Electronica',
    'electronic': 'Electronica',
    'electronic trap': 'Electronica',
    'electropop': 'Electronica',
    'electropowerpop': 'Electronica',
    'emo': 'Rock',
    'emo rap': 'Hip Hop',
    'escape room': 'Alternative',
    'etherpop': 'Electronica',
    'eurodance': 'Dance',
    'europop': 'Pop',
    'fake': 'Miscellaneous',
    'filter house': 'Electronica',
    'florida drill': 'Hip Hop',
    'florida rap': 'Hip Hop',
    'folk-pop': 'Folk',
    'funk metal': 'Metal',
    'funk rock': 'Rock',
    'futuristic swag': 'Hip Hop',
    'g funk': 'Hip Hop',
    'gambian hip hop': 'Hip Hop',
    'gangster rap': 'Hip Hop',
    'gauze pop': 'Pop',
    'gen z singer-songwriter': 'Singer-Songwriter',
    'german techno': 'Electronica',
    'ghanaian hip hop': 'Hip Hop',
    'girl group': 'Pop',
    'glam metal': 'Metal',
    'glam rock': 'Rock',
    'grime': 'Hip Hop',
    'hard rock': 'Rock',
    'hardcore hip hop': 'Hip Hop',
    'harlem hip hop': 'Hip Hop',
    'heartland rock': 'Rock',
    'hip hop': 'Hip Hop',
    'hip pop': 'Hip Hop',
    'hollywood': 'Miscellaneous',
    'house': 'Electronica',
    'houston rap': 'Hip Hop',
    'hyphy': 'Hip Hop',
    'idol': 'Pop',
    'indie pop rap': 'Indie',
    'indie poptimism': 'Indie',
    'indie r&b': 'Indie',
    'indie rock': 'Rock',
    'indie rock italiano': 'Rock',
    'indie soul': 'Soul',
    'indietronica': 'Electronica',
    'irish rock': 'Rock',
    'irish singer-songwriter': 'Singer-Songwriter',
    'italian pop': 'Pop',
    'k-pop': 'World Music',
    'k-pop boy group': 'World Music',
    'k-rap': 'Hip Hop',
    'kentucky hip hop': 'Hip Hop',
    'korean old school hip hop': 'Hip Hop',
    'la indie': 'Indie',
    'latin': 'World Music',
    'latin hip hop': 'Hip Hop',
    'latin pop': 'Pop',
    'latin viral pop': 'Pop',
    'lds youth': 'Christian',
    'lgbtq+ hip hop': 'Hip Hop',
    'lilith': 'Pop',
    'lounge': 'Easy Listening',
    'lullaby': 'Miscellaneous',
    'melbourne bounce international': 'Electronica',
    'mellow gold': 'Pop',
    'melodic rap': 'Hip Hop',
    'memphis hip hop': 'Hip Hop',
    'metropopolis': 'Pop',
    'mexican classic rock': 'Rock',
    'mexican pop': 'Pop',
    'miami hip hop': 'Hip Hop',
    'minnesota hip hop': 'Hip Hop',
    'mississippi hip hop': 'Hip Hop',
    'modern alternative pop': 'Pop',
    'modern alternative rock': 'Rock',
    'modern blues rock': 'Rock',
    'modern country pop': 'Country',
    'modern country rock': 'Country',
    'modern folk rock': 'Folk',
    'modern rock': 'Rock',
    'modern uplift': 'Pop',
    'moombahton': 'Electronica',
    'movie tunes': 'Soundtrack',
    'neo mellow': 'Pop',
    'neo soul': 'Soul',
    'neo-singer-songwriter': 'Singer-Songwriter',
    'neo-synthpop': 'Electronica',
    'neon pop punk': 'Pop',
    'new jersey rap': 'Hip Hop',
    'new jersey underground rap': 'Hip Hop',
    'new orleans rap': 'Hip Hop',
    'new rave': 'Electronica',
    'new wave pop': 'Pop',
    'nigerian hip hop': 'Hip Hop',
    'nigerian pop': 'Pop',
    'north carolina hip hop': 'Hip Hop',
    'nu metal': 'Metal',
    'nyc rap': 'Hip Hop',
    'nz pop': 'Pop',
    'oakland hip hop': 'Hip Hop',
    'ohio hip hop': 'Hip Hop',
    'old school atlanta hip hop': 'Hip Hop',
    'orchestral soundtrack': 'Soundtrack',
    'outlaw country': 'Country',
    'permanent wave': 'Rock',
    'philly rap': 'Hip Hop',
    'piano rock': 'Rock',
    'pinoy hip hop': 'Hip Hop',
    'pittsburgh rap': 'Hip Hop',
    'pixie': 'Pop',
    'plugg': 'Hip Hop',
    'pluggnb': 'R&B',
    'political hip hop': 'Hip Hop',
    'pop': 'Pop',
    'pop dance': 'Pop',
    'pop edm': 'Electronica',
    'pop emo': 'Pop',
    'pop punk': 'Rock',
    'pop r&b': 'R&B',
    'pop rap': 'Hip Hop',
    'pop rock': 'Rock',
    'pop soul': 'Soul',
    'portland hip hop': 'Hip Hop',
    'post-grunge': 'Rock',
    'post-teen pop': 'Pop',
    'pov: indie': 'Indie',
    'progressive electro house': 'Electronica',
    'progressive house': 'Electronica',
    'puerto rican pop': 'Pop',
    'punk': 'Rock',
    'queens hip hop': 'Hip Hop',
    'quiet storm': 'R&B',
    'r&b': 'R&B',
    'rage rap': 'Hip Hop',
    'rap': 'Hip Hop',
    'rap conscient': 'Hip Hop',
    'rap kreyol': 'Hip Hop',
    'rap latina': 'Hip Hop',
    'rap metal': 'Metal',
    'reggae fusion': 'World Music',
    'reggaeton': 'World Music',
    'reggaeton colombiano': 'World Music',
    'rhode island rap': 'Hip Hop',
    'rock': 'Rock',
    'rockabilly': 'Rock',
    'romanian house': 'Electronica',
    'romanian pop': 'Pop',
    'sad lo-fi': 'Lo-fi',
    'sad rap': 'Hip Hop',
    'san marcos tx indie': 'Indie',
    'scam rap': 'Hip Hop',
    'scandipop': 'Pop',
    'scottish singer-songwriter': 'Singer-Songwriter',
    'screamo': 'Rock',
    'seattle hip hop': 'Hip Hop',
    'shiver pop': 'Pop',
    'show tunes': 'Soundtrack',
    'singer-songwriter': 'Singer-Songwriter',
    'singer-songwriter pop': 'Pop',
    'slap house': 'Electronica',
    'sleaze rock': 'Rock',
    'soul': 'Soul',
    'soundtrack': 'Soundtrack',
    'south african rock': 'Rock',
    'south carolina hip hop': 'Hip Hop',
    'southern hip hop': 'Hip Hop',
    'st louis rap': 'Hip Hop',
    'stomp and holler': 'Rock',
    'stomp pop': 'Pop',
    'swedish electropop': 'Electronica',
    'swedish pop': 'Pop',
    'swedish synthpop': 'Electronica',
    'talent show': 'Miscellaneous',
    'teen pop': 'Pop',
    'tennessee hip hop': 'Hip Hop',
    'texas latin rap': 'Hip Hop',
    'trap': 'Hip Hop',
    'trap latino': 'Hip Hop',
    'trap queen': 'Hip Hop',
    'tropical house': 'Electronica',
    'uk americana': 'Country',
    'uk contemporary r&b': 'R&B',
    'uk dance': 'Electronica',
    'uk funky': 'Electronica',
    'uk pop': 'Pop',
    'underground hip hop': 'Hip Hop',
    'urban contemporary': 'R&B',
    'urbano latino': 'World Music',
    'viral pop': 'Pop',
    'viral rap': 'Hip Hop',
    'viral trap': 'Hip Hop',
    'virgin islands reggae': 'World Music',
    'west coast rap': 'Hip Hop',
    'wrestling': 'Miscellaneous'
}


def map_genre(genre):
    return genre_mapping.get(genre, genre)  # if genre not in mapping, return the genre itself

df_split_genre['broad_genre'] = df_split_genre['genre'].apply(map_genre)


# Count occurrences of each broad genre per year
genre_counts_per_year = df_split_genre.groupby(['year', 'broad_genre']).size().reset_index(name='count')

plt.figure(figsize=(15,10))
sns.lineplot(data=genre_counts_per_year, x='year', y='count', hue='broad_genre')
plt.title('Popularity of Broad Genres Over Time')
plt.legend(loc='upper left')
plt.show()


df_split_artist


# Count the number of songs per artist per year
artist_counts_per_year = df_split_artist.groupby(['year', 'split_artist']).size().reset_index(name='count')

# For each year, get the artist with the most songs
top_artists_per_year = artist_counts_per_year.loc[artist_counts_per_year.groupby('year')['count'].idxmax()]
top_artists_per_year


# Plotting
plt.figure(figsize=(12, 8))
plt.bar(top_artists_per_year['year'], top_artists_per_year['count'], color='skyblue')
plt.xticks(top_artists_per_year['year'])
plt.xlabel('Year')
plt.ylabel('Number of Songs')
plt.title('Top Artist Per Year Based on Song Count')

# Adding artist names as labels on top of the bars
for index, row in top_artists_per_year.iterrows():
    plt.text(row['year'], row['count'] + 0.2, row['split_artist'], ha='center')

plt.tight_layout()
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()


# Plotting
plt.figure(figsize=(12, 6))

# First Y-axis for GDP
plt.plot(df_economy['year'], df_economy['GDP growth (%)'], label='GDP Growth Rate', color='blue')
plt.ylabel('GDP Growth Rate (%)', color='blue')

# Create a second Y-axis for Unemployment Rate
ax2 = plt.gca().twinx()
ax2.plot(df_economy['year'], df_economy['unemployment Rate (%)'], label='Unemployment Rate', color='red')
ax2.set_ylabel('Unemployment Rate (%)', color='red')

# Setting X-axis label and title
plt.xlabel('Year')
plt.title('U.S. GDP Growth Rate and Unemployment Rate (2006-2022)')

plt.grid(True)
plt.show()


# Convert mood to numeric
mood_mapping = {
    'Happy': 1,
    'Neutral': 0,
    'Sad': -1
}
df_music['mood_numeric'] = df_music['mood'].map(mood_mapping)

# Calculate the average mood per year
avg_mood_per_year_mean = df_music.groupby('year')['mood_numeric'].mean()


plt.figure(figsize=(12, 6))
plt.plot(df_economy['year'], avg_mood_per_year_mean, marker='o')
plt.title('Average Mood Per Year')
plt.xlabel('Year')
plt.ylabel('Average Mood')
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.axhline(0, color='black',linewidth=0.5)
plt.tight_layout()
plt.show()


# Convert the 'year' column to int64 in both DataFrames
genre_counts_per_year['year'] = genre_counts_per_year['year'].astype(int)
df_economy['year'] = df_economy['year'].astype(int)

# Add GDP Growth Rate to the dataFrame
genre_counts_economy_per_year = genre_counts_per_year.merge(df_economy, on='year')


genres = genre_counts_per_year['broad_genre'].unique()
for genre in genres:
    genre_df = genre_counts_economy_per_year[genre_counts_economy_per_year['broad_genre'] == genre]

    # Check if genre_df has at least 2 rows
    if len(genre_df) >= 2:
        correlation, p_value = pearsonr(genre_df['GDP growth (%)'], genre_df['count'])
        print(f"Correlation between GDP Growth Rate and {genre}: {correlation:.2f} / P-value is {p_value:.2f}")
    else:
        print(f"Insufficient data to compute correlation for {genre}")

Correlation between GDP Growth Rate and Country: 0.05 / P-value is 0.84
Correlation between GDP Growth Rate and Dance: -0.22 / P-value is 0.39
Correlation between GDP Growth Rate and Electronica: 0.20 / P-value is 0.45
Correlation between GDP Growth Rate and Hip Hop: -0.07 / P-value is 0.79
Correlation between GDP Growth Rate and Indie: 0.19 / P-value is 0.48
Correlation between GDP Growth Rate and Metal: -0.23 / P-value is 0.66
Correlation between GDP Growth Rate and Miscellaneous: -0.28 / P-value is 0.38
Correlation between GDP Growth Rate and Pop: 0.14 / P-value is 0.60
Correlation between GDP Growth Rate and R&B: -0.20 / P-value is 0.45
Correlation between GDP Growth Rate and Rock: -0.32 / P-value is 0.24
Correlation between GDP Growth Rate and Singer-Songwriter: 0.52 / P-value is 0.37
Correlation between GDP Growth Rate and Soul: -0.21 / P-value is 0.48
Correlation between GDP Growth Rate and World Music: 0.45 / P-value is 0.17
Correlation between GDP Growth Rate and Alternative: -0.31 / P-value is 0.50
Correlation between GDP Growth Rate and Christian: 1.00 / P-value is 1.00
Correlation between GDP Growth Rate and Folk: -0.55 / P-value is 0.34
Correlation between GDP Growth Rate and Americana: 0.27 / P-value is 0.60
Correlation between GDP Growth Rate and Soundtrack: nan / P-value is nan
Insufficient data to compute correlation for Lo-fi
Correlation between GDP Growth Rate and Easy Listening: -1.00 / P-value is 1.00

/usr/local/lib/python3.10/dist-packages/scipy/stats/_stats_py.py:4424: ConstantInputWarning: An input array is constant; the correlation coefficient is not defined.
  warnings.warn(stats.ConstantInputWarning(msg))


# Convert mood to numeric
mood_mapping = {
    'Happy': 1,
    'Neutral': 0,
    'Sad': -1
}
df_music['mood_numeric'] = df_music['mood'].map(mood_mapping)

# Calculate the average mood per year
avg_mood_per_year_mean = df_music.groupby('year')['mood_numeric'].mean()

correlation_gdp_mood, p_value_gdp_mood = pearsonr(df_economy['GDP growth (%)'], avg_mood_per_year_mean)
print(f"Correlation between GDP Growth Rate and mood: {correlation_gdp_mood:.2f} / P-value is {p_value_gdp_mood:.2f}")

Correlation between GDP Growth Rate and mood: 0.14 / P-value is 0.59


genres = genre_counts_per_year['broad_genre'].unique()
for genre in genres:
    genre_df = genre_counts_economy_per_year[genre_counts_economy_per_year['broad_genre'] == genre]

    # Check if genre_df has at least 2 rows
    if len(genre_df) >= 2:
        correlation, p_value = pearsonr(genre_df['unemployment Rate (%)'], genre_df['count'])
        print(f"Correlation between Unemployment Rate and {genre}: {correlation:.2f} / P-value is {p_value:.2f}")
    else:
        print(f"Insufficient data to compute correlation for {genre}")

Correlation between Unemployment Rate and Country: 0.29 / P-value is 0.26
Correlation between Unemployment Rate and Dance: 0.67 / P-value is 0.00
Correlation between Unemployment Rate and Electronica: -0.06 / P-value is 0.83
Correlation between Unemployment Rate and Hip Hop: -0.36 / P-value is 0.15
Correlation between Unemployment Rate and Indie: -0.47 / P-value is 0.06
Correlation between Unemployment Rate and Metal: -0.78 / P-value is 0.07
Correlation between Unemployment Rate and Miscellaneous: 0.26 / P-value is 0.41
Correlation between Unemployment Rate and Pop: 0.15 / P-value is 0.55
Correlation between Unemployment Rate and R&B: 0.23 / P-value is 0.38
Correlation between Unemployment Rate and Rock: -0.01 / P-value is 0.97
Correlation between Unemployment Rate and Singer-Songwriter: -0.73 / P-value is 0.16
Correlation between Unemployment Rate and Soul: -0.11 / P-value is 0.70
Correlation between Unemployment Rate and World Music: -0.49 / P-value is 0.13
Correlation between Unemployment Rate and Alternative: -0.22 / P-value is 0.63
Correlation between Unemployment Rate and Christian: -1.00 / P-value is 1.00
Correlation between Unemployment Rate and Folk: 0.51 / P-value is 0.38
Correlation between Unemployment Rate and Americana: -0.59 / P-value is 0.22
Correlation between Unemployment Rate and Soundtrack: nan / P-value is nan
Insufficient data to compute correlation for Lo-fi
Correlation between Unemployment Rate and Easy Listening: -1.00 / P-value is 1.00

/usr/local/lib/python3.10/dist-packages/scipy/stats/_stats_py.py:4424: ConstantInputWarning: An input array is constant; the correlation coefficient is not defined.
  warnings.warn(stats.ConstantInputWarning(msg))


correlation_gdp_mood, p_value_gdp_mood = pearsonr(df_economy['unemployment Rate (%)'], avg_mood_per_year_mean)
print(f"Correlation between Unemployment Rate and mood: {correlation_gdp_mood:.2f} / P-value is {p_value_gdp_mood:.2f}")

Correlation between Unemployment Rate and mood: 0.39 / P-value is 0.12


import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Merge dataframes on year
merged_data = df_music.merge(df_economy, on='year')

# Calculate proportion of happy songs per year
grouped_data = merged_data.groupby('year').agg({
    'GDP growth (%)': 'first',
    'unemployment Rate (%)': 'first',
    'mood_numeric': 'mean'
}).reset_index()

# Features and target
X = grouped_data[['GDP growth (%)', 'unemployment Rate (%)']]
y = grouped_data['mood_numeric']

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict on test set
y_pred = model.predict(X_test)

# Calculate and print the error
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Predict proportion of happy songs for a new GDP Rate and Unemployment Rate
gdp_rate = 2.5  # example value
unemployment_rate = 5.0  # example value

predicted_happy_proportion = model.predict([[gdp_rate, unemployment_rate]])
print(f"Predicted proportion of happy songs for GDP Rate {gdp_rate} and Unemployment Rate {unemployment_rate}: {predicted_happy_proportion[0]}")

Mean Squared Error: 0.018437208194942795
Predicted proportion of happy songs for GDP Rate 2.5 and Unemployment Rate 5.0: -0.07709018491389735

/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names
  warnings.warn(

	title	artist	year	genre
0	Bad Day	Daniel Powter	2006	canadian pop, neo mellow, pop rock
1	Temperature	Sean Paul	2006	dance pop, dancehall, pop, pop rap
2	Promiscuous	Nelly Furtado Featuring Timbaland	2006	canadian latin, canadian pop, dance pop, pop
3	You're Beautiful	James Blunt	2006	neo mellow, pop rock
4	Hips Don't Lie	Shakira Featuring Wyclef Jean	2006	colombian pop, dance pop, latin pop, pop

	title	artist	year	genre	split_artist
0	Bad Day	Daniel Powter	2006	canadian pop, neo mellow, pop rock	Daniel Powter
1	Temperature	Sean Paul	2006	dance pop, dancehall, pop, pop rap	Sean Paul
2	Promiscuous	Nelly Furtado Featuring Timbaland	2006	canadian latin, canadian pop, dance pop, pop	Nelly Furtado
2	Promiscuous	Nelly Furtado Featuring Timbaland	2006	canadian latin, canadian pop, dance pop, pop	Timbaland
3	You're Beautiful	James Blunt	2006	neo mellow, pop rock	James Blunt

	year	GDP growth (%)	unemployment Rate (%)
0	2006	2.782811	4.62
1	2007	2.010508	4.62
2	2008	0.122188	5.78
3	2009	-2.599888	9.25
4	2010	2.708857	9.63

	year	split_artist	count
15	2006	Chris Brown	4
100	2007	Akon	6
265	2008	T-Pain	7
289	2009	Beyonce	5
404	2010	Ke$ha	5
515	2011	Lil Wayne	5
619	2012	Rihanna	6
648	2013	Bruno Mars	3
775	2014	Iggy Azalea	4
904	2015	Meghan Trainor	5
967	2016	Drake	7
1105	2017	Migos	5
1168	2018	Drake	8
1240	2019	Ariana Grande	5
1409	2020	Roddy Ricch	6
1459	2021	Doja Cat	5
1537	2022	Bad Bunny	7

U.S. Economy's Impact on Music Trends: 2006-2022 Analysis¶

Introduction¶

Data Collection & Processing: Music Data¶

Setup¶

Use billboard.py to Fetch Songs¶

Setup Spotify API & Add the Genre Column¶

Add the Mood Column¶

Data Collection & Processing: Economic Data¶

Data Visualization¶

Genre Popularity¶

Artist Dominance¶

The Music Mood trend¶

Interpretation Based On the Graphs¶

Hypothesis Testing¶

Correlation For GDP Growth Rate¶

Interpretation¶

Interpretation¶

Correlation For Unemployment Rate¶

Interpretation¶

Interpretation¶

Machine Learning¶

Interpretation¶

Conclusion¶