notebook life analysis
0 month ago     0 month ago     0 words  0 min
import pandas as pd
import sqlite3
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import matplotlib.dates as mdates
import os
from matplotlib.ticker import FuncFormatter
from scipy.interpolate import make_interp_spline
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.patches import Rectangle
import matplotlib.gridspec as gridspec
import calendar

%matplotlib inline
plt.style.use('default')
def load_data_from_db(db_path, year='2024'):
    try:
        conn = sqlite3.connect(db_path)
        query = f"SELECT * FROM combined_tracks WHERE begin_date LIKE '{year}%'"
        df = pd.read_sql_query(query, conn)
        conn.close()
        return df
    except sqlite3.Error as e:
        print(f"Database error: {e}")
        return pd.DataFrame()
    except Exception as e:
        print(f"Error: {e}")
        return pd.DataFrame()
# Load data from SQLite database
df = load_data_from_db('/users/shasharma/personal/combined_tracks_2024-new.db')
print(f"Loaded {len(df)} records from database")

# Convert date strings to datetime objects with ISO format
df['begin_date'] = pd.to_datetime(df['begin_date'], format='ISO8601')
df['end_date'] = pd.to_datetime(df['end_date'], format='ISO8601')
df['duration'] = (df['end_date'] - df['begin_date']).dt.total_seconds()

# Filter out very short sessions (less than 3 second)
df = df[df['duration'] >= 1]
df = df.sort_values('begin_date')

print("\nBasic statistics:")
print(df.describe())
Loaded 441067 records from database

Basic statistics:
            track_id       duration
count  388928.000000  388928.000000
mean    84038.278381      41.719078
std     45999.042766     200.988350
min         1.000000       1.000000
25%     47506.000000       3.000000
50%     80410.500000       6.018000
75%    115156.000000      23.000000
max    193407.000000   32725.245000
def merge_fragmented_sessions(df, time_threshold=5, consider_title=False):
    """
    Merge sessions that are fragmented due to brief interruptions (e.g., alt-tabbing).
    
    Parameters:
    - df: DataFrame containing the sessions
    - time_threshold: Maximum time gap (in seconds) to consider as an interruption
    - consider_title: Whether to consider window title in addition to app name
    
    Returns:
    - DataFrame with merged sessions
    """
    if consider_title:
        df['app_title'] = df['app'] + " - " + df['title']
        key_field = 'app_title'
    else:
        key_field = 'app'
    
    df_sorted = df.sort_values(['begin_date'])
    sessions = []
    
    for key, group in df_sorted.groupby(key_field):
        group = group.sort_values('begin_date')
        
        current_session = {
            'app': group.iloc[0]['app'],
            'title': group.iloc[0]['title'],
            'begin_date': group.iloc[0]['begin_date'],
            'end_date': group.iloc[0]['end_date'],
            'device': group.iloc[0]['device'],
            'task_name': group.iloc[0]['task_name']
        }
        
        for i in range(1, len(group)):
            row = group.iloc[i]
            time_diff = (row['begin_date'] - current_session['end_date']).total_seconds()
            
            if time_diff <= time_threshold:
                current_session['end_date'] = row['end_date']
            else:
                current_session['duration'] = (current_session['end_date'] - current_session['begin_date']).total_seconds()
                sessions.append(current_session)
                
                current_session = {
                    'app': row['app'],
                    'title': row['title'],
                    'begin_date': row['begin_date'],
                    'end_date': row['end_date'],
                    'device': row['device'],
                    'task_name': row['task_name']
                }
        
        current_session['duration'] = (current_session['end_date'] - current_session['begin_date']).total_seconds()
        sessions.append(current_session)
    
    return pd.DataFrame(sessions)

def handle_overlapping_sessions(df):
    """
    Process the dataframe to handle overlapping sessions across devices and calculate
    accurate total usage time without double counting overlaps.
    """
    # Sort by begin_date to process chronologically
    sorted_df = df.sort_values('begin_date').copy()
    
    # Convert to list of sessions with begin and end times
    sessions = sorted_df[['begin_date', 'end_date', 'app', 'device', 'title']].values.tolist()
    
    # Merge overlapping time intervals
    merged_intervals = []
    if sessions:
        current = sessions[0].copy()
        for session in sessions[1:]:
            # If current session overlaps with next one
            if current[1] >= session[0]:
                # Extend the current session if needed
                current[1] = max(current[1], session[1])
            else:
                # No overlap, add current to results and start new current
                merged_intervals.append(current)
                current = session.copy()
        # Add the last session
        merged_intervals.append(current)
    
    # Calculate total non-overlapping time
    total_time = sum((interval[1] - interval[0]).total_seconds() for interval in merged_intervals)
    
    return pd.DataFrame(merged_intervals, columns=['begin_date', 'end_date', 'app', 'device', 'title']), total_time
# Merge fragmented sessions
merged_df_app_only = merge_fragmented_sessions(df, consider_title=False)
merged_df_with_title = merge_fragmented_sessions(df, consider_title=True)

print(f"Original number of records: {len(df)}")
print(f"Records after merging (app only): {len(merged_df_app_only)}")
print(f"Records after merging (app + title): {len(merged_df_with_title)}")

# Use app-only merged data for analysis
merged_df = merged_df_app_only
non_overlapping_df, total_non_overlapping_time = handle_overlapping_sessions(merged_df)
total_hours_non_overlapping = total_non_overlapping_time / 3600

print(f"Total hours tracked (with potential overlaps): {merged_df['duration'].sum() / 3600:.2f}")
print(f"Total hours tracked (removing overlaps): {total_hours_non_overlapping:.2f}")
print(f"Difference due to overlaps: {(merged_df['duration'].sum() / 3600) - total_hours_non_overlapping:.2f} hours")
Original number of records: 388928
Records after merging (app only): 156529
Records after merging (app + title): 336784
Total hours tracked (with potential overlaps): 4518.28
Total hours tracked (removing overlaps): 4400.94
Difference due to overlaps: 117.34 hours
# Extract date parts for time-based analysis
merged_df = merged_df[merged_df['app'] != 'IDLE']

merged_df['date'] = merged_df['begin_date'].dt.date
merged_df['hour'] = merged_df['begin_date'].dt.hour
merged_df['day_of_week'] = merged_df['begin_date'].dt.dayofweek
merged_df['week'] = merged_df['begin_date'].dt.isocalendar().week
merged_df['month'] = merged_df['begin_date'].dt.month

def calculate_daily_usage(df):
    daily_usage = df.groupby('date')['duration'].sum().reset_index()
    daily_usage['hours'] = daily_usage['duration'] / 3600
    return daily_usage

def calculate_weekly_usage(df):
    weekly_usage = df.groupby(['week'])['duration'].sum().reset_index()
    weekly_usage['hours'] = weekly_usage['duration'] / 3600
    return weekly_usage

def calculate_hourly_distribution(df):
    hourly_dist = df.groupby('hour')['duration'].sum().reset_index()
    hourly_dist['percentage'] = hourly_dist['duration'] / hourly_dist['duration'].sum() * 100
    return hourly_dist

def calculate_day_of_week_distribution(df):
    day_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    dow_dist = df.groupby('day_of_week')['duration'].sum().reset_index()
    dow_dist['day_name'] = dow_dist['day_of_week'].apply(lambda x: day_names[x])
    dow_dist['percentage'] = dow_dist['duration'] / dow_dist['duration'].sum() * 100
    dow_dist['hours'] = dow_dist['duration'] / 3600
    return dow_dist

daily_usage = calculate_daily_usage(merged_df)
weekly_usage = calculate_weekly_usage(merged_df)
hourly_dist = calculate_hourly_distribution(merged_df)
dow_dist = calculate_day_of_week_distribution(merged_df)
/var/folders/57/v2_7d8m567n654mwmkhgh06h0000gp/T/ipykernel_27008/2608182983.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_df['date'] = merged_df['begin_date'].dt.date
/var/folders/57/v2_7d8m567n654mwmkhgh06h0000gp/T/ipykernel_27008/2608182983.py:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_df['hour'] = merged_df['begin_date'].dt.hour
/var/folders/57/v2_7d8m567n654mwmkhgh06h0000gp/T/ipykernel_27008/2608182983.py:6: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_df['day_of_week'] = merged_df['begin_date'].dt.dayofweek
/var/folders/57/v2_7d8m567n654mwmkhgh06h0000gp/T/ipykernel_27008/2608182983.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_df['week'] = merged_df['begin_date'].dt.isocalendar().week
/var/folders/57/v2_7d8m567n654mwmkhgh06h0000gp/T/ipykernel_27008/2608182983.py:8: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_df['month'] = merged_df['begin_date'].dt.month
def set_figure_properties(fig, ax, bg_color, text_color, grid_color):
    ax.set_facecolor(bg_color)
    fig.set_facecolor(bg_color)
    for spine in ax.spines.values():
        spine.set_color(grid_color)
    ax.tick_params(colors=text_color)
    ax.xaxis.label.set_color(text_color)
    ax.yaxis.label.set_color(text_color)
    ax.title.set_color(text_color)
    return fig, ax

def get_theme_colors(theme='light'):
    if theme == 'light':
        plt.style.use('default')
        return {
            'bg_color': 'white',
            'text_color': '#333333',
            'grid_color': '#E0E0E0',
            'color_palette': 'viridis',
            'accent_color': '#1E88E5',
            'alpha': 0.8
        }
    else:
        plt.style.use('dark_background')
        return {
            'bg_color': '#121212',
            'text_color': '#E0E0E0',
            'grid_color': '#333333',
            'color_palette': 'plasma',
            'accent_color': '#00E5FF',
            'alpha': 0.9
        }

os.makedirs('visualizations_light', exist_ok=True)
os.makedirs('visualizations_dark', exist_ok=True)
def plot_hourly_distribution(hourly_dist, theme='light', show_plot=True):
    colors = get_theme_colors(theme)
    
    fig, ax = plt.subplots(figsize=(14, 8))
    
    custom_cmap = sns.color_palette(colors['color_palette'], as_cmap=True)
    norm = plt.Normalize(0, 23)
    color_values = [custom_cmap(norm(h)) for h in hourly_dist['hour']]
    
    for i, (hour, row) in enumerate(hourly_dist.iterrows()):
        rect = Rectangle((hour-0.4, 0), 0.8, row['percentage'], 
                        facecolor=color_values[i], alpha=colors['alpha'], edgecolor='none')
        ax.add_patch(rect)
    
    for i, (hour, row) in enumerate(hourly_dist.iterrows()):
        reflection = Rectangle((hour-0.4, 0), 0.8, row['percentage']*0.05, 
                              facecolor='white', alpha=0.1, edgecolor='none')
        ax.add_patch(reflection)
    
    x = hourly_dist['hour']
    y = hourly_dist['percentage']
    x_smooth = np.linspace(x.min(), x.max(), 300)
    spline = make_interp_spline(x, y, k=3)  # A cubic spline
    y_smooth = spline(x_smooth)
    
    ax.plot(x_smooth, y_smooth, color=colors['accent_color'], linewidth=2.5, alpha=0.7)
    
    ax.set_xlabel('Hour of Day', fontsize=12, fontweight='bold')
    ax.set_ylabel('Percentage of Total Usage', fontsize=12, fontweight='bold')
    ax.set_title('Computer Usage Distribution by Hour of Day (2024)', fontsize=16, fontweight='bold')
    ax.grid(axis='y', linestyle='--', alpha=0.4, color=colors['grid_color'])
    
    ax.set_xticks(range(0, 24))
    ax.set_xticklabels([f'{h:02d}:00' for h in range(0, 24)], rotation=45)
    
    ax.set_ylim(0, hourly_dist['percentage'].max() * 1.15)
    
    for i, row in hourly_dist.iterrows():
        if row['percentage'] > hourly_dist['percentage'].max() * 0.1:  # Only label significant bars
            ax.text(row['hour'], row['percentage'] + 0.5, f'{row["percentage"]:.1f}%', 
                   ha='center', va='bottom', color=colors['text_color'], fontsize=9, fontweight='bold')
    
    fig, ax = set_figure_properties(fig, ax, colors['bg_color'], colors['text_color'], colors['grid_color'])
    plt.tight_layout()
    
    output_dir = f'visualizations_{theme}'
    plt.savefig(f'{output_dir}/hourly_distribution.png', dpi=300, bbox_inches='tight')
    
    if show_plot:
        plt.show()
    else:
        plt.close()
    
    return fig
def plot_day_hour_heatmap(merged_df, theme='light', show_plot=True):
    colors = get_theme_colors(theme)
    fig, ax = plt.subplots(figsize=(20, 8))
    
    day_hour_data = []
    for day_of_week in range(7):
        day_df = merged_df[merged_df['day_of_week'] == day_of_week].copy()
        for hour in range(24):
            hour_df = day_df[day_df['hour'] == hour].copy()
            if not hour_df.empty:
                _, non_overlapping_time = handle_overlapping_sessions(hour_df)
                hours_value = non_overlapping_time / 3600
                day_hour_data.append({
                    'day_of_week': day_of_week,
                    'hour': hour,
                    'hours': hours_value
                })
    
    day_hour_df = pd.DataFrame(day_hour_data)
    
    heat_data = day_hour_df.pivot(index='day_of_week', columns='hour', values='hours').fillna(0)
    
    for hour in range(24):
        if hour not in heat_data.columns:
            heat_data[hour] = 0
    
    heat_data = heat_data.sort_index(axis=1)
    
    day_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    heat_data = heat_data.reindex(range(7))
    
    if theme == 'light':
        cmap = 'YlOrRd'
    else:
        cmap = sns.color_palette(colors['color_palette'], as_cmap=True)
    
    heatmap = sns.heatmap(
        heat_data, 
        cmap=cmap, 
        annot=True,
        fmt='.1f',
        linewidths=0.5,
        linecolor='white' if theme == 'light' else 'gray',
        ax=ax,
        cbar=False,
    )
    
    ax.set_title('Computer Usage by Day of Week and Hour of Day (2024)', fontsize=16, fontweight='bold')
    ax.set_xlabel('Hour of Day', fontsize=12, fontweight='bold')
    ax.set_ylabel('Day of Week', fontsize=12, fontweight='bold')
    
    time_labels = [f'{int(h):02d}:00' for h in range(24)]
    ax.set_xticklabels(time_labels, rotation=45, ha='right')  # Align like in example
    
    ax.set_yticklabels(day_names)
    
    fig.set_facecolor(colors['bg_color'])
    ax.set_facecolor(colors['bg_color'])
    
    plt.tight_layout()
    
    output_dir = f'visualizations_{theme}'
    plt.savefig(f'{output_dir}/day_hourly_heatmap.png', dpi=300, bbox_inches='tight')
    
    if show_plot:
        plt.show()
    else:
        plt.close()
    
    print("\nHeatmap Statistics:")
    
    total_hours = heat_data.sum().sum()
    print(f"Total hours logged: {total_hours:.2f}")
    
    print("\nTop 5 most active time slots:")
    
    # Stack the dataframe to get (day, hour) pairs
    stacked_data = heat_data.stack().reset_index()
    stacked_data.columns = ['day_of_week', 'hour', 'hours']
    stacked_data['day_name'] = stacked_data['day_of_week'].apply(lambda x: day_names[x])
    
    # Get top 5
    top_5 = stacked_data.nlargest(5, 'hours')
    for _, row in top_5.iterrows():
        print(f"{row['day_name']} at {int(row['hour']):02d}:00 - {row['hours']:.2f} hours")
    
    return fig
def plot_monthly_usage(merged_df, theme='light', show_plot=True):
    colors = get_theme_colors(theme)
    
    fig, ax = plt.subplots(figsize=(14, 8))
    
    merged_df['month_name'] = merged_df['begin_date'].dt.strftime('%b')
    merged_df['month_num'] = merged_df['begin_date'].dt.month
    
    monthly_data = merged_df.groupby('month_num').agg({
        'duration': 'sum',
        'month_name': 'first'
    }).reset_index()
    
    monthly_data['hours'] = monthly_data['duration'] / 3600
    monthly_data = monthly_data.sort_values('month_num')
    
    bars = ax.bar(monthly_data['month_name'], monthly_data['hours'], 
                 color=sns.color_palette(colors['color_palette'], len(monthly_data)),
                 alpha=colors['alpha'], width=0.7)
    
    ax.plot(monthly_data['month_name'], monthly_data['hours'], 'o-', 
            color=colors['accent_color'], linewidth=2.5, markersize=8)
    
    avg_monthly = monthly_data['hours'].mean()
    avg_line_color = '#E53935' if theme == 'light' else '#FF5252'
    ax.axhline(y=avg_monthly, color=avg_line_color, 
               linestyle='--', linewidth=2, alpha=0.7,
               label=f'Monthly Average: {avg_monthly:.1f}h')
    
    ax.set_xlabel('Month', fontsize=12, fontweight='bold')
    ax.set_ylabel('Hours', fontsize=12, fontweight='bold')
    ax.set_title('Monthly Computer Usage (2024)', fontsize=16, fontweight='bold')
    ax.grid(axis='y', linestyle='--', alpha=0.4, color=colors['grid_color'])
    
    for bar in bars:
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height + 0.5,
               f'{height:.1f}h', ha='center', va='bottom', color=colors['text_color'], 
               fontsize=10, fontweight='bold')
    
    ax.legend(loc='best', facecolor=colors['bg_color'], edgecolor=colors['grid_color'])
    
    fig, ax = set_figure_properties(fig, ax, colors['bg_color'], colors['text_color'], colors['grid_color'])
    plt.tight_layout()
    
    output_dir = f'visualizations_{theme}'
    plt.savefig(f'{output_dir}/monthly_usage.png', dpi=300, bbox_inches='tight')
    
    if show_plot:
        plt.show()
    else:
        plt.close()
    
    return fig
def plot_calendar_heatmap(merged_df, theme='light', show_plot=True):
    colors = get_theme_colors(theme)
    
    # Calculate daily total durations in seconds, then convert to hours
    daily_data = []
    for date in sorted(merged_df['date'].unique()):
        day_df = merged_df[merged_df['date'] == date]
        # Get the duration in seconds and convert to hours
        day_sessions, duration_seconds = handle_overlapping_sessions(day_df)
        hours = duration_seconds / 3600  # Convert seconds to hours
        daily_data.append({
            'date': date,
            'hours': hours
        })
    
    daily_df = pd.DataFrame(daily_data)
    print(daily_df)
    
    start_date = datetime(2024, 1, 1).date()
    end_date = datetime(2024, 12, 31).date()
    all_dates = [start_date + timedelta(days=i) for i in range((end_date - start_date).days + 1)]
    full_daily_df = pd.DataFrame({'date': all_dates})
    full_daily_df = full_daily_df.merge(daily_df, on='date', how='left').fillna(0)
    full_daily_df['month'] = [d.month for d in full_daily_df['date']]
    full_daily_df['day'] = [d.day for d in full_daily_df['date']]
    full_daily_df['weekday'] = [d.weekday() for d in full_daily_df['date']]
    
    if theme == 'light':
        cmap = LinearSegmentedColormap.from_list('github', [
            '#ebedf0',
            '#9be9a8',
            '#40c463',
            '#30a14e',
            '#216e39'
        ])
    else:
        cmap = LinearSegmentedColormap.from_list('github_dark', [
            '#161b22',
            '#0e4429',
            '#006d32',
            '#26a641',
            '#39d353'
        ])
    
    max_hours = max(8, full_daily_df['hours'].max())
    activity_levels = [0, max_hours/4, max_hours/2, max_hours*3/4, max_hours]
    
    fig = plt.figure(figsize=(16, 12))
    fig.patch.set_facecolor(colors['bg_color'])
    
    gs = gridspec.GridSpec(4, 3, figure=fig, hspace=0.3, wspace=0.2)
    
    weekdays = ['M', 'T', 'W', 'T', 'F', 'S', 'S']
    
    for month_idx in range(1, 13):
        month_data = full_daily_df[full_daily_df['month'] == month_idx]
        month_name = calendar.month_abbr[month_idx]
        row = (month_idx - 1) // 3
        col = (month_idx - 1) % 3
        ax = fig.add_subplot(gs[row, col])
        ax.set_facecolor(colors['bg_color'])

        first_day = datetime(2024, month_idx, 1).date()
        first_weekday = first_day.weekday()  # Monday is 0
        days_in_month = calendar.monthrange(2024, month_idx)[1]
        weeks_needed = (days_in_month + first_weekday + 6) // 7
        square_size = 0.95
        padding = 0.05

        ax.set_xlim(-0.5, 7 * (square_size + padding))
        ax.set_ylim(weeks_needed * (square_size + padding) - padding + 0.5, -0.5)
        
        for day in range(1, days_in_month + 1):
            date_obj = datetime(2024, month_idx, day).date()
            weekday = date_obj.weekday()
            week = (day + first_weekday - 1) // 7
            
            day_data = month_data[month_data['day'] == day]
            hours = 0 if day_data.empty else day_data.iloc[0]['hours']
            
            color_idx = np.sum(hours > np.array(activity_levels)) - 1
            color_idx = max(0, color_idx)  # Ensure non-negative
            color = cmap(color_idx / (len(activity_levels) - 1))
            
            x = weekday * (square_size + padding)
            y = week * (square_size + padding)
            
            # Draw square
            rect = Rectangle((x, y), square_size, square_size, 
                            facecolor=color, edgecolor='none', alpha=0.9)
            ax.add_patch(rect)
            
            ax.text(x + square_size/2, y + square_size*0.35, str(day), 
                   ha='center', va='center', color=colors['text_color'],
                   fontsize=6, fontweight='bold')
            
            if hours > 0:
                ax.text(x + square_size/2, y + square_size*0.7, f"{hours:.1f}", 
                       ha='center', va='center', color=colors['text_color'],
                       fontsize=5, alpha=0.7)
        
        for i, day in enumerate(weekdays):
            ax.text(i * (square_size + padding) + square_size/2, -0.3, day,
                   ha='center', va='center', color=colors['text_color'],
                   fontsize=7, alpha=0.7)
            
        ax.set_title(month_name, fontsize=10, fontweight='bold', 
                    color=colors['text_color'], pad=2)
    
        ax.set_xticks([])
        ax.set_yticks([])
        ax.axis('off')
    
    fig.suptitle('Daily Computer Usage (2024)', fontsize=14, 
                fontweight='bold', color=colors['text_color'], y=0.98)
    
    total_days = len(full_daily_df)
    active_days = len(full_daily_df[full_daily_df['hours'] > 0])
    total_hours = full_daily_df['hours'].sum()
    avg_hours = total_hours / active_days if active_days > 0 else 0
    
    stats_text = (
        f"Days tracked: {active_days}/{total_days} ({active_days/total_days*100:.1f}%) | "
        f"Total: {total_hours:.1f}h | "
        f"Avg: {avg_hours:.1f}h/day"
    )
    
    fig.text(0.5, 0.01, stats_text, ha='center', 
            color=colors['text_color'], fontsize=9, alpha=0.8)
    
    plt.tight_layout(rect=[0, 0.02, 0.90, 0.96])
    
    output_dir = f'visualizations_{theme}'
    plt.savefig(f'{output_dir}/calendar_heatmap.png', dpi=300, bbox_inches='tight')
    
    if show_plot:
        plt.show()
    else:
        plt.close()
    
    return fig, full_daily_df
def create_all_visualizations(daily_usage, weekly_usage, hourly_dist, dow_dist, merged_df, theme='light', show_plots=True):
    figures = []
    
    print(f"Creating {theme} theme visualizations...")
    figures.append(plot_hourly_distribution(hourly_dist, theme, show_plots))
    figures.append(plot_monthly_usage(merged_df, theme, show_plots))
    figures.append(plot_day_hour_heatmap(merged_df, theme, show_plots))

    fig, _ = plot_calendar_heatmap(merged_df, theme)
    figures.append(fig)
    
    return figures
light_figures = create_all_visualizations(daily_usage, weekly_usage, hourly_dist, dow_dist, merged_df, 'light', True)
dark_figures = create_all_visualizations(daily_usage, weekly_usage, hourly_dist, dow_dist, merged_df, 'dark', True)

print(f"Total hours tracked (removing overlaps): {total_hours_non_overlapping:.2f}")

print("\nAverage daily computer usage: {:.2f} hours".format(daily_usage['hours'].mean()))
print("Most active day of week: {}".format(dow_dist.sort_values('hours', ascending=False).iloc[0]['day_name']))
print("Most active hour of day: {:02.0f}:00".format(hourly_dist.sort_values('duration', ascending=False).iloc[0]['hour']))

print("\nVisualizations have been displayed in the notebook and saved in 'visualizations_light' and 'visualizations_dark' folders.")
Creating light theme visualizations...
/var/folders/57/v2_7d8m567n654mwmkhgh06h0000gp/T/ipykernel_27008/1705010403.py:6: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_df['month_name'] = merged_df['begin_date'].dt.strftime('%b')
/var/folders/57/v2_7d8m567n654mwmkhgh06h0000gp/T/ipykernel_27008/1705010403.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_df['month_num'] = merged_df['begin_date'].dt.month
Heatmap Statistics:
Total hours logged: 3941.72

Top 5 most active time slots:
Tuesday at 22:00 - 58.84 hours
Monday at 22:00 - 53.28 hours
Wednesday at 22:00 - 53.06 hours
Thursday at 22:00 - 52.50 hours
Friday at 22:00 - 51.56 hours
           date      hours
0    2024-01-01   6.378916
1    2024-01-02  11.594200
2    2024-01-03   9.123391
3    2024-01-04   9.567604
4    2024-01-05   9.505766
..          ...        ...
358  2024-12-27   5.198889
359  2024-12-28   2.259722
360  2024-12-29   2.357222
361  2024-12-30   3.870000
362  2024-12-31   3.170556

[363 rows x 2 columns]
/var/folders/57/v2_7d8m567n654mwmkhgh06h0000gp/T/ipykernel_27008/2321284170.py:131: UserWarning: This figure includes Axes that are not compatible with tight_layout, so results might be incorrect.
  plt.tight_layout(rect=[0, 0.02, 0.90, 0.96])
Creating dark theme visualizations...
/var/folders/57/v2_7d8m567n654mwmkhgh06h0000gp/T/ipykernel_27008/1705010403.py:6: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_df['month_name'] = merged_df['begin_date'].dt.strftime('%b')
/var/folders/57/v2_7d8m567n654mwmkhgh06h0000gp/T/ipykernel_27008/1705010403.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_df['month_num'] = merged_df['begin_date'].dt.month
Heatmap Statistics:
Total hours logged: 3941.72

Top 5 most active time slots:
Tuesday at 22:00 - 58.84 hours
Monday at 22:00 - 53.28 hours
Wednesday at 22:00 - 53.06 hours
Thursday at 22:00 - 52.50 hours
Friday at 22:00 - 51.56 hours
           date      hours
0    2024-01-01   6.378916
1    2024-01-02  11.594200
2    2024-01-03   9.123391
3    2024-01-04   9.567604
4    2024-01-05   9.505766
..          ...        ...
358  2024-12-27   5.198889
359  2024-12-28   2.259722
360  2024-12-29   2.357222
361  2024-12-30   3.870000
362  2024-12-31   3.170556

[363 rows x 2 columns]
/var/folders/57/v2_7d8m567n654mwmkhgh06h0000gp/T/ipykernel_27008/2321284170.py:131: UserWarning: This figure includes Axes that are not compatible with tight_layout, so results might be incorrect.
  plt.tight_layout(rect=[0, 0.02, 0.90, 0.96])
Total hours tracked (removing overlaps): 4400.94

Average daily computer usage: 11.06 hours
Most active day of week: Saturday
Most active hour of day: 22:00

Visualizations have been displayed in the notebook and saved in 'visualizations_light' and 'visualizations_dark' folders.