import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

data = pd.read_csv("Credit Card Year End041124.csv")

data.head()

to_drop = ['Master Category', 'Payment Method', 'Unnamed: 8']
data.drop(columns=to_drop, inplace=True)

# function that outputs the month given a date formatted 'mm/dd/yyyy'

def month(date):
    month_names = {
        '01': 'January',
        '02': 'February',
        '03': 'March',
        '04': 'April',
        '05': 'May',
        '06': 'June',
        '07': 'July',
        '08': 'August',
        '09': 'September',
        '10': 'October',
        '11': 'November',
        '12': 'December'
    }
    month_num = date[:2]
    return month_names.get(month_num, None)

# also need a function to convert the amount, a string, to an int so we can do math with it

def convert_to_int(amount):
    amount_float = float(amount.replace('$', ''))
    return int(amount_float)

months = []
for date in np.array(data['Date']):
    months.append(month(date))
month_data = data[['Date', 'Amount']].copy()
month_data['Month'] = months

amounts_ints = []
for amount in np.array(month_data['Amount']):
    amounts_ints.append(convert_to_int(amount))

month_data['Amount'] = amounts_ints
# Gonna do this for the master data as well 
data['Amount'] = amounts_ints

month_data = month_data['Amount'].groupby(month_data['Month']).sum()
month_data = month_data.reset_index()

month_order = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 
               'September', 'October', 'November', 'December']

month_data['Month'] = pd.Categorical(month_data['Month'], categories=month_order, ordered=True)
month_data = month_data.sort_values(by='Month')
month_data

plt.bar(month_data['Month'], month_data['Amount'], color = '#00a79d')
plt.xticks(fontsize=5)
plt.xlabel('Month')
plt.ylabel('Amount (dollars)')
plt.title('Spending by month in 2023')

Text(0.5, 1.0, 'Spending by month in 2023')

total = categories['Amount'].sum()
print("I spent a total of {} dollars in 2023.".format(total))

I spent a total of 8892 dollars in 2023.

categories = data['Amount'].groupby(data['Subcategory']).sum().reset_index().sort_values(by='Amount', ascending = False)
categories.head()

to_plot = categories[["Amount"]]
to_plot.index = categories['Subcategory']
plot = to_plot.plot.pie(y='Amount', figsize=(7, 7), autopct='%1.1f%%', wedgeprops={
    'linewidth': 1, 'edgecolor': 'white'}, fontsize=8, pctdistance=0.85)

plot.get_legend().remove()

data.head()

def subcategory(category, purchase):
    if any(keyword in purchase for keyword in ["WILLOW", "COFFEE"]):
        return "Coffee Shops" #it categorized a tea spot as a bars/alcohol. 
    if any(keyword in purchase for keyword in ["FOOD COLLECTIVE", "FOXTROT ", "FRUIT FARM", "PRETA", "FARMERS", 
                                              "QUAKER VALLEY FOODS", "TOKYO FISH"]):
        return "Groceries"
    if "BART" in purchase:
        return "Public Transportation"
    if "GOLF CLUB" in purchase:
        return "Recreation/Fitness/Clubs"
    if "GODADDY" in purchase:
        return "Domain"
    if "BERKELEY PARK" in purchase:
        return "Fine"
    if "EVERLANE" in purchase:
        return "Clothing/Accessories"
    if "SOUTH BLOCK" in purchase:
        return "Other Food/Drink"
    return category

def fix_categories(subcategory):
    if "Fast Food" in subcategory:
        return "Coffee Shops" # it categorized all my coffee places as fast food
    return subcategory

data['Subcategory'] = data.apply(lambda row: subcategory(row['Subcategory'], row['Payee']), axis=1)
data['Subcategory'] = data['Subcategory'].apply(fix_categories)

#IT WORKED!

data

categories_fixed = data['Amount'].groupby(data['Subcategory']).sum().reset_index().sort_values(by='Amount', ascending = False)
to_plot2 = categories_fixed[["Amount"]]
to_plot2.index = categories_fixed['Subcategory']
plot2 = to_plot2.plot.pie(y='Amount', figsize=(7, 7), autopct='%1.1f%%', wedgeprops={
    'linewidth': 1, 'edgecolor': 'white'}, fontsize=8, pctdistance=0.85)

plot2.get_legend().remove()

num_categories = len(data['Subcategory'].unique())
cmap = plt.get_cmap('tab20')

fig, axs = plt.subplots(1, 2, figsize=(14, 7))

specific_colors = {
    'Gasoline': 'red',
    'Groceries': 'hotpink',
    'Parking/Tolls': 'blue',
    'Public Transportation': 'orange',
    'Taxi': 'purple',
    'Domain': 'brown',
    'Recreation/Fitness/Clubs': 'pink',
    'Arts': 'cyan',
    'Books/Magazines/Newspapers': 'magenta',
    'Movies': 'yellow',
    'Fine': 'lightblue',
    'Coffee Shops': 'lightgreen',
    'Bars/Alcohol': 'lightcoral',
    'Restaurants': 'lightskyblue',
    'Other Food/Drink': 'brown',
    'Eyecare': 'lightpink',
    'Pharmacy': 'lightyellow',
    'Other Health': 'lightgrey',
    'Décor/Furnishings': 'lavender',
    'Education': 'lightseagreen',
    'Other Miscellaneous': 'lightsteelblue',
    'Clothing/Accessories': 'lightgreen',
    'Hair Care/Beauty Supply': 'yellow',
    'Spa/Massage': 'lightgoldenrodyellow',
    'Veterinarian': 'lightgrey',
    'Toys/Hobbies': 'lightpink',
    'Other Shopping': 'lightgrey',
    'Airfare': 'lightblue',
    'Lodging': 'red'
}

to_plot.plot.pie(ax=axs[0], y='Amount', autopct='%1.1f%%', wedgeprops={
    'linewidth': 1, 'edgecolor': 'white'}, fontsize=8, pctdistance=0.85, colors=[specific_colors.get(label, 'grey') for label in to_plot.index]).get_legend().remove()

axs[0].set_title('WF categories')
axs[0].set_ylabel('')  

to_plot2.plot.pie(ax=axs[1], y='Amount', autopct='%1.1f%%', wedgeprops={
    'linewidth': 1, 'edgecolor': 'white'}, fontsize=8, pctdistance=0.85, colors=[specific_colors.get(label, 'grey') for label in to_plot2.index]).get_legend().remove()

axs[1].set_title('My categories')
axs[1].set_ylabel('')

Text(0, 0.5, '')

fig, axs = plt.subplots(1, 2, figsize=(14, 7))

# Plot the first bar chart on the left subplot
to_plot.plot.bar(ax=axs[0])
axs[0].set_title('Wells Fargo Categorization')
axs[0].set_ylabel('Amount')

# Plot the second bar chart on the right subplot
to_plot2.plot.bar(ax=axs[1])
axs[1].set_title('My (accurate) Categorization')
axs[1].set_ylabel('Amount')

# Show the plots
plt.tight_layout()
plt.show()

top_ten = categories_fixed.head(10)
plt.bar(top_ten['Subcategory'], top_ten['Amount'])
plt.xlabel('Subcategory')
plt.ylabel('Amount (dollars)')
plt.xticks(fontsize=8, rotation=45, ha='right')
bars1 = plt.bar(top_ten['Subcategory'], top_ten['Amount'])
for bar in bars1:
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, yval, '${:.2f}'.format(yval), va='bottom', ha='center', fontsize=7.5)

location = data['Amount'].groupby(data['Location']).sum().reset_index()
location.head()

def fix_location(location):
    if  any(keyword in location for keyword in ["DC", "GA", "WA"]):
        return "DC"
    if any(keyword in location for keyword in ["New York", "NEW YORK", "NY"]):
        return "New York"
    if "Berkeley" in location:
        return "Berkeley"
    if ",CA" in location:
        return "California"
    if "MD" in location:
        return "Maryland"
    if "VA" in location:
        return "Virginia"
    if "AZ" in location:
        return "Delhi" #(edge case)
    if "CD" in location:
        return "Canada"
    if "PA" in location:
        return "Pennsylvania"  
    if "NJ" in location:
        return "New Jersey"
    return loc

fixed_locations = []
for loc in np.array(location["Location"]):
    fixed_locations.append(fix_location(loc))

location['Location'] = fixed_locations
location = location['Amount'].groupby(location['Location']).sum().reset_index()
location = location.sort_values(by='Amount', ascending=False)
location

pie = location[["Amount"]]
pie.index = location['Location']

plot = pie.plot.pie(y='Amount', figsize=(7, 7), autopct='%1.1f%%', wedgeprops={
    'linewidth': 1, 'edgecolor': 'white'}, fontsize=10, pctdistance=0.85)
plot.get_legend().remove()

plt.figure(figsize=(10, 6))
plt.bar(location['Location'], location['Amount'])
plt.xticks(fontsize=8)
plt.xlabel('Location')
plt.ylabel('Amount (dollars)')
plt.title('Spending by location')
bars = plt.bar(location['Location'], location['Amount'])
for bar in bars:
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2, yval, '${:.2f}'.format(yval), va='bottom', ha='center')

	Master Category	Subcategory	Date	Location	Payee	Description	Payment Method	Amount	Unnamed: 8
0	Auto/Transportation	Gasoline	12/05/2023	BRENTWOOD ,CA	7-ELEVEN	7-ELEVEN 38723 BRENTWOOD ,CA	WELLS FARGO ACTIVE CASH VISA(R) CARD ...3737	$30.00	NaN
1	Auto/Transportation	Gasoline	08/14/2023	Washington ,DC	DUPONT CIRCLE FARMERS	SQ *DUPONT CIRCLE FARMERS Washington ,DC	WELLS FARGO ACTIVE CASH VISA(R) CARD ...3737	$3.50	NaN
2	Auto/Transportation	Gasoline	08/14/2023	WASHINGTON ,DC	FOXTROT MARKET	FOXTROT MARKET-MT VERNON WASHINGTON ,DC	WELLS FARGO ACTIVE CASH VISA(R) CARD ...3737	$5.39	NaN
3	Auto/Transportation	Gasoline	08/12/2023	WASHINGTON ,DC	FOXTROT MARKET	FOXTROT MARKET - DUPONT WASHINGTON ,DC	WELLS FARGO ACTIVE CASH VISA(R) CARD ...3737	$13.77	NaN
4	Auto/Transportation	Gasoline	08/06/2023	Brooklyn ,NY	WILLIAMS FRUIT FARM	SQ *WILLIAMS FRUIT FARM Brooklyn ,NY	WELLS FARGO ACTIVE CASH VISA(R) CARD ...3737	$7.00	NaN

	Subcategory	Date	Location	Payee	Description	Amount
0	Gasoline	12/05/2023	BRENTWOOD ,CA	7-ELEVEN	7-ELEVEN 38723 BRENTWOOD ,CA	30
1	Gasoline	08/14/2023	Washington ,DC	DUPONT CIRCLE FARMERS	SQ *DUPONT CIRCLE FARMERS Washington ,DC	3
2	Gasoline	08/14/2023	WASHINGTON ,DC	FOXTROT MARKET	FOXTROT MARKET-MT VERNON WASHINGTON ,DC	5
3	Gasoline	08/12/2023	WASHINGTON ,DC	FOXTROT MARKET	FOXTROT MARKET - DUPONT WASHINGTON ,DC	13
4	Gasoline	08/06/2023	Brooklyn ,NY	WILLIAMS FRUIT FARM	SQ *WILLIAMS FRUIT FARM Brooklyn ,NY	7

	Subcategory	Date	Location	Payee	Description	Amount
0	Gasoline	12/05/2023	BRENTWOOD ,CA	7-ELEVEN	7-ELEVEN 38723 BRENTWOOD ,CA	30
1	Groceries	08/14/2023	Washington ,DC	DUPONT CIRCLE FARMERS	SQ *DUPONT CIRCLE FARMERS Washington ,DC	3
2	Groceries	08/14/2023	WASHINGTON ,DC	FOXTROT MARKET	FOXTROT MARKET-MT VERNON WASHINGTON ,DC	5
3	Groceries	08/12/2023	WASHINGTON ,DC	FOXTROT MARKET	FOXTROT MARKET - DUPONT WASHINGTON ,DC	13
4	Groceries	08/06/2023	Brooklyn ,NY	WILLIAMS FRUIT FARM	SQ *WILLIAMS FRUIT FARM Brooklyn ,NY	7
...	...	...	...	...	...	...
545	Other Food/Drink	07/17/2023	Washington ,DC	SOUTH BLOCK	SQ *SOUTH BLOCK - LOGAN C Washington ,DC	21
546	Other Food/Drink	07/16/2023	Washington ,DC	SOUTH BLOCK	SQ *SOUTH BLOCK - LOGAN C Washington ,DC	9
547	Other Food/Drink	07/04/2023	Washington ,DC	SOUTH BLOCK	SQ *SOUTH BLOCK - LOGAN C Washington ,DC	9
548	Other Food/Drink	07/01/2023	Washington ,DC	SOUTH BLOCK	SQ *SOUTH BLOCK - UNION M Washington ,DC	9
549	Other Food/Drink	06/25/2023	Washington ,DC	SOUTH BLOCK	SQ *SOUTH BLOCK - LOGAN C Washington ,DC	9

	Location	Amount
0	120-36845156 ,CA	60
1	124-08994737 ,DC	62
2	151-05901648 ,CA	11
3	151-85480316 ,NY	223
4	161-79554467 ,NY	17

2023 Credit Card Expense data¶

Jesus the categories are messed up.¶

Cleaning¶

Monthly spending trends¶

Spending categories broken down¶

(assuming WF categories are right... they're not)¶

Ok...¶

Categorizing correctly¶

Let's compare WF's categories with mine¶

Top ten things I spent money on¶

What city/state/country did I spend the most money?¶

Summary¶

	Month	Amount
3	February	185
6	March	259
0	April	270
7	May	392
5	June	763
4	July	2381
1	August	1933
10	September	532
9	October	1009
8	November	718
2	December	450

	Subcategory	Amount
28	Restaurants	1787
11	Groceries	1530
4	Clothing/Accessories	1119
12	Hair Care/Beauty Supply	702
26	Recreation/Fitness/Clubs	400

	Location	Amount
1	California	4677
3	DC	2402
7	New York	935
0	Berkeley	724
8	Pennsylvania	41
6	New Jersey	36
4	Delhi	31
5	Maryland	20
9	Virginia	20
2	Canada	6