"""
教育用再現コード: 2024年 統計データ分析コンペティション 総務大臣賞（高校生）
=================================================================
論文タイトル：食の外部化における地域特性
著者：佐々木万悠子（雙葉高等学校）

【分析概要】
  データ：政府統計の総合窓口 SSDSE-B-2026（社会・人口統計体系 都道府県データ）
          家計調査（二人以上の世帯）、人口、旅館営業施設数、延べ宿泊者数 等

  Step1. 相関行列ヒートマップ（食料費・教養娯楽費・被服費割合と人口・高齢化率）
  Step2. 散布図：高齢化率 vs 食料費割合（47都道府県・回帰直線付き）
  Step3. 箱ひげ図：都市規模別（地方・中規模・都市）の食料費・教養娯楽費割合比較
  Step4. 時系列：食料費割合・教養娯楽費割合の推移（2012〜2023年、全国平均）

  Key findings:
    - 高齢化率が高い道府県ほど教養娯楽費割合が低い（r=−0.503）
    - コロナ禍（2020〜21年）で食料費割合が上昇し、教養娯楽費割合が急減
    - 都市部の教養娯楽費割合は地方より高い（外食・娯楽志向）
    - 食料費割合は都市規模による差が小さいが、教養娯楽費に顕著な地域差

【データサイエンス学習ポイント】
  1. 家計消費支出を分母とした構成比データの活用
  2. 相関係数の解釈：因果ではなく関連の強さ（多重共線性・構成比制約に注意）
  3. 高齢化率・ホテル密度など社会変化指標の代理変数としての意義
  4. コロナ禍（2020〜21年）が消費行動に与えた構造変化の可視化
  5. 都市規模別グループ比較（箱ひげ図）による地域差の定量的評価

【データ】SSDSE-B-2026.csv（実公的データ）
  出典：政府統計の総合窓口（e-Stat）/ 統計数理研究所 SSDSE
=================================================================
"""

# ============================================================
# 【データの準備】実行前に以下のデータファイルを用意してください
#
#   必要ファイル:
#     ・SSDSE-B-2026.csv
#       → data/raw/SSDSE-B-2026.csv に配置
#
#   ダウンロード先:
#     https://www.nstac.go.jp/use/literacy/ssdse/
#     （SSDSE-B（社会・人口統計体系 都道府県データ） の CSV をダウンロード）
#
#   フォルダ配置（プロジェクトルートからの相対パス）:
#     code/                ← このスクリプトの場所
#     data/raw/            ← CSV ファイルをここに配置
#     html/figures/        ← 図の出力先（自動生成）
#
#   実行方法（ファイルを一切編集せず実行可能）:
#     python3 code/2024_H1_daijin.py
# ============================================================


import pandas as pd
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

# ──────────────────────────────────────────────────────────────
# 共通設定
# ──────────────────────────────────────────────────────────────
plt.rcParams['font.family'] = 'Hiragino Sans'
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['figure.dpi'] = 150

import os
DATA_PATH = 'data/raw/SSDSE-B-2026.csv'
FIG_DIR = 'html/figures'
os.makedirs(FIG_DIR, exist_ok=True)

# ================================================================
# ■ Step 0. データ読み込み（SSDSE-B-2026）
# ================================================================

print("=" * 65)
print("■ データ読み込み（SSDSE-B-2026 実公的データ）")
print("=" * 65)

df_raw = pd.read_csv(DATA_PATH, encoding='cp932', header=1)

# 都道府県レベル行のみ抽出（地域コード = R + 5桁数字）
df_b = df_raw[df_raw['地域コード'].str.match(r'^R\d{5}$', na=False)].copy()
df_b = df_b.reset_index(drop=True)

# 必要列を数値変換
numeric_cols = [
    '年度',
    '総人口',
    '65歳以上人口',
    '合計特殊出生率',
    '消費支出（二人以上の世帯）',
    '食料費（二人以上の世帯）',
    '教養娯楽費（二人以上の世帯）',
    '被服及び履物費（二人以上の世帯）',
    'その他の消費支出（二人以上の世帯）',
    '旅館営業施設数（ホテルを含む）',
    '延べ宿泊者数',
]
for col in numeric_cols:
    df_b[col] = pd.to_numeric(df_b[col], errors='coerce')

print(f"読み込み完了: {len(df_b)} 行（{df_b['年度'].nunique()}年 × {df_b['地域コード'].nunique()}都道府県）")
print(f"対象年度: {sorted(df_b['年度'].unique())}")

# ================================================================
# ■ Step 1. 派生変数の計算
# ================================================================

# 比率変数の構築（構成比 = 各費目 / 消費支出）
df_b['food_ratio']     = df_b['食料費（二人以上の世帯）']       / df_b['消費支出（二人以上の世帯）']
df_b['leisure_ratio']  = df_b['教養娯楽費（二人以上の世帯）']    / df_b['消費支出（二人以上の世帯）']
df_b['clothing_ratio'] = df_b['被服及び履物費（二人以上の世帯）'] / df_b['消費支出（二人以上の世帯）']
df_b['other_ratio']    = df_b['その他の消費支出（二人以上の世帯）'] / df_b['消費支出（二人以上の世帯）']

# ホテル密度（施設数 / 人口 × 10,000）
df_b['hotel_per_pop']  = df_b['旅館営業施設数（ホテルを含む）'] / df_b['総人口'] * 10000

# 高齢化率
df_b['aging_rate']     = df_b['65歳以上人口'] / df_b['総人口']

# 2023年断面データ（最新年で都道府県比較）
df_2023 = df_b[df_b['年度'] == 2023].copy().reset_index(drop=True)

print(f"\n2023年断面データ: {len(df_2023)} 都道府県")
print("\n記述統計（主要変数・2023年）:")
desc_cols = ['food_ratio', 'leisure_ratio', 'clothing_ratio', 'hotel_per_pop', 'aging_rate']
print(df_2023[desc_cols].describe().round(4))

# ================================================================
# ■ 相関分析
# ================================================================
print("\n" + "=" * 65)
print("■ 相関分析（2023年 47都道府県）")
print("=" * 65)

corr_cols = ['food_ratio', 'leisure_ratio', 'clothing_ratio',
             'hotel_per_pop', 'aging_rate', '合計特殊出生率']
corr_labels = ['食料費\n割合', '教養娯楽\n費割合', '被服費\n割合',
               'ホテル\n密度', '高齢化率', '合計特殊\n出生率']
corr_mat = df_2023[corr_cols].corr()

print("\n相関行列（2023年）:")
print(corr_mat.round(3))

r_aging_food, p_aging_food = stats.pearsonr(df_2023['aging_rate'], df_2023['food_ratio'])
r_aging_leis, p_aging_leis = stats.pearsonr(df_2023['aging_rate'], df_2023['leisure_ratio'])
print(f"\n高齢化率 × 食料費割合:    r={r_aging_food:.3f}, p={p_aging_food:.4f}")
print(f"高齢化率 × 教養娯楽費割合: r={r_aging_leis:.3f}, p={p_aging_leis:.4f}")

# 都市規模グループ分類（人口三分位）
pop_q33 = df_2023['総人口'].quantile(1/3)
pop_q67 = df_2023['総人口'].quantile(2/3)
df_2023['urban_group'] = pd.cut(
    df_2023['総人口'],
    bins=[0, pop_q33, pop_q67, float('inf')],
    labels=['地方（人口少）', '中規模', '都市（人口多）'],
    include_lowest=True
)

print("\n都市規模グループ別平均（2023年）:")
grp_stats = df_2023.groupby('urban_group', observed=True)[['food_ratio', 'leisure_ratio', 'aging_rate']].mean()
print(grp_stats.round(4))

# ================================================================
# ■ 図の生成（4枚）
# ================================================================

print("\n" + "=" * 65)
print("■ 図の生成（4枚）")
print("=" * 65)

# ─── 図1: 相関行列ヒートマップ ──────────────────────────────────
print("図1: 相関行列ヒートマップを作成中...")

fig1, axes1 = plt.subplots(1, 2, figsize=(14, 6))
fig1.suptitle('家計消費費目割合と地域変数の相関分析（2023年 47都道府県）',
              fontsize=13, fontweight='bold')

# 左: ヒートマップ
ax1a = axes1[0]
mat = corr_mat.values
im = ax1a.imshow(mat, cmap='RdBu_r', vmin=-1, vmax=1, aspect='auto')
plt.colorbar(im, ax=ax1a, shrink=0.8, label='Pearson r')
ax1a.set_xticks(range(len(corr_cols)))
ax1a.set_yticks(range(len(corr_cols)))
ax1a.set_xticklabels(corr_labels, fontsize=9)
ax1a.set_yticklabels(corr_labels, fontsize=9)
for i in range(len(corr_cols)):
    for j in range(len(corr_cols)):
        v = mat[i, j]
        ax1a.text(j, i, f'{v:.2f}', ha='center', va='center',
                  fontsize=9, color='white' if abs(v) > 0.6 else 'black',
                  fontweight='bold')
ax1a.set_title('相関行列（消費費目割合 × 地域変数）', fontsize=10, fontweight='bold')

# 右: 教養娯楽費割合との相関棒グラフ
ax1b = axes1[1]
target_vars   = ['food_ratio', 'clothing_ratio', 'hotel_per_pop', 'aging_rate', '合計特殊出生率']
target_labels2 = ['食料費割合', '被服費割合', 'ホテル密度\n（施設/万人）', '高齢化率', '合計特殊出生率']
corr_vals = [df_2023['leisure_ratio'].corr(df_2023[v]) for v in target_vars]
bar_cols = ['#E53935' if v < 0 else '#1565C0' for v in corr_vals]
sorted_idx = sorted(range(len(corr_vals)), key=lambda i: corr_vals[i])
ax1b.barh(
    range(len(target_vars)),
    [corr_vals[i] for i in sorted_idx],
    color=[bar_cols[i] for i in sorted_idx],
    alpha=0.85, edgecolor='white'
)
ax1b.set_yticks(range(len(target_vars)))
ax1b.set_yticklabels([target_labels2[i] for i in sorted_idx], fontsize=10)
ax1b.axvline(0, color='black', linewidth=1.0)
ax1b.set_xlabel('Pearson 相関係数（教養娯楽費割合との相関）', fontsize=10)
ax1b.set_title('教養娯楽費割合との相関係数\n（高齢化率との強い負の相関）',
               fontsize=10, fontweight='bold')
ax1b.grid(axis='x', alpha=0.3)
for i, idx in enumerate(sorted_idx):
    v = corr_vals[idx]
    r_val, p_val = stats.pearsonr(df_2023['leisure_ratio'], df_2023[target_vars[idx]])
    sig = '***' if p_val < 0.001 else '**' if p_val < 0.01 else '*' if p_val < 0.05 else ''
    if sig:
        offset = 0.02 if v > 0 else -0.02
        ha = 'left' if v > 0 else 'right'
        ax1b.text(v + offset, i, sig, va='center', ha=ha, fontsize=10, fontweight='bold')

plt.tight_layout()
fig1.savefig(os.path.join(FIG_DIR, '2024_H1_fig1_corr.png'), bbox_inches='tight', dpi=150)
plt.close(fig1)
print("  → 2024_H1_fig1_corr.png 保存完了")

# ─── 図2: 高齢化率 vs 食料費割合 散布図 ──────────────────────────
print("図2: 散布図（高齢化率 vs 食料費割合）を作成中...")

fig2, axes2 = plt.subplots(1, 2, figsize=(14, 6))
fig2.suptitle('高齢化率と家計消費費目割合の関係（2023年 47都道府県）',
              fontsize=13, fontweight='bold')

# 左: 高齢化率 vs 食料費割合
ax2a = axes2[0]
group_colors_map = {'地方（人口少）': '#1565C0', '中規模': '#43A047', '都市（人口多）': '#E65100'}
colors2 = [group_colors_map.get(str(g), '#888888') for g in df_2023['urban_group']]
ax2a.scatter(df_2023['aging_rate'] * 100, df_2023['food_ratio'] * 100,
             c=colors2, s=80, alpha=0.85, edgecolors='white', linewidth=0.5, zorder=3)
# 回帰直線
x2a = df_2023['aging_rate'].values
y2a = df_2023['food_ratio'].values
sl2a, ic2a, rv2a, pv2a, _ = stats.linregress(x2a, y2a)
x2a_line = [x2a.min(), x2a.max()]
y2a_line = [ic2a + sl2a * x for x in x2a_line]
ax2a.plot([v * 100 for v in x2a_line], [v * 100 for v in y2a_line],
          '--', color='#333333', linewidth=2,
          label=f'回帰直線 r={rv2a:.3f}{"*" if pv2a < 0.05 else ""}')
# 注目都道府県ラベル
highlight2 = ['秋田県', '東京都', '神奈川県', '愛媛県', '沖縄県']
for _, row in df_2023[df_2023['都道府県'].isin(highlight2)].iterrows():
    ax2a.annotate(row['都道府県'],
                  (row['aging_rate'] * 100, row['food_ratio'] * 100),
                  fontsize=8.5, fontweight='bold', color='#333333',
                  xytext=(5, 4), textcoords='offset points')
ax2a.set_xlabel('高齢化率（65歳以上人口 / 総人口 × 100, %）', fontsize=10)
ax2a.set_ylabel('食料費割合（食料費 / 消費支出 × 100, %）', fontsize=10)
ax2a.set_title('高齢化率 vs 食料費割合\n（r={:.3f}, p={:.4f}）'.format(rv2a, pv2a),
               fontsize=10, fontweight='bold')
patches2a = [mpatches.Patch(color=c, alpha=0.85, label=g)
             for g, c in group_colors_map.items()]
ax2a.legend(handles=patches2a + [plt.Line2D([0],[0], color='#333333', linestyle='--',
                                              label=f'回帰直線 r={rv2a:.3f}')],
            fontsize=8.5)
ax2a.grid(True, alpha=0.3)

# 右: 高齢化率 vs 教養娯楽費割合
ax2b = axes2[1]
ax2b.scatter(df_2023['aging_rate'] * 100, df_2023['leisure_ratio'] * 100,
             c=colors2, s=80, alpha=0.85, edgecolors='white', linewidth=0.5, zorder=3)
x2b = df_2023['aging_rate'].values
y2b = df_2023['leisure_ratio'].values
sl2b, ic2b, rv2b, pv2b, _ = stats.linregress(x2b, y2b)
x2b_line = [x2b.min(), x2b.max()]
y2b_line = [ic2b + sl2b * x for x in x2b_line]
ax2b.plot([v * 100 for v in x2b_line], [v * 100 for v in y2b_line],
          '--', color='#333333', linewidth=2,
          label=f'回帰直線 r={rv2b:.3f}')
highlight2b = ['秋田県', '神奈川県', '東京都', '長崎県', '沖縄県']
for _, row in df_2023[df_2023['都道府県'].isin(highlight2b)].iterrows():
    ax2b.annotate(row['都道府県'],
                  (row['aging_rate'] * 100, row['leisure_ratio'] * 100),
                  fontsize=8.5, fontweight='bold', color='#333333',
                  xytext=(5, 4), textcoords='offset points')
ax2b.set_xlabel('高齢化率（65歳以上人口 / 総人口 × 100, %）', fontsize=10)
ax2b.set_ylabel('教養娯楽費割合（教養娯楽費 / 消費支出 × 100, %）', fontsize=10)
ax2b.set_title('高齢化率 vs 教養娯楽費割合\n（r={:.3f}, p={:.4f}）'.format(rv2b, pv2b),
               fontsize=10, fontweight='bold')
ax2b.legend(handles=patches2a + [plt.Line2D([0],[0], color='#333333', linestyle='--',
                                              label=f'回帰直線 r={rv2b:.3f}')],
            fontsize=8.5)
ax2b.grid(True, alpha=0.3)

plt.tight_layout()
fig2.savefig(os.path.join(FIG_DIR, '2024_H1_fig2_scatter.png'), bbox_inches='tight', dpi=150)
plt.close(fig2)
print("  → 2024_H1_fig2_scatter.png 保存完了")

# ─── 図3: 都市規模別の箱ひげ図 ──────────────────────────────────
print("図3: 都市規模別の箱ひげ図を作成中...")

GROUP_ORDER = ['地方（人口少）', '中規模', '都市（人口多）']
GROUP_COLORS = {'地方（人口少）': '#1565C0', '中規模': '#43A047', '都市（人口多）': '#E65100'}

fig3, axes3 = plt.subplots(1, 2, figsize=(14, 6))
fig3.suptitle('都市規模グループ別 消費費目割合の分布比較（2023年 47都道府県）',
              fontsize=13, fontweight='bold')

def boxplot_group(ax, target_col, ylabel, title):
    data_by_group = [
        df_2023[df_2023['urban_group'] == g][target_col].dropna().values * 100
        for g in GROUP_ORDER
    ]
    bp = ax.boxplot(data_by_group, patch_artist=True, notch=False,
                    medianprops=dict(color='white', linewidth=2.5),
                    whiskerprops=dict(linewidth=1.5),
                    capprops=dict(linewidth=1.5))
    for patch, g in zip(bp['boxes'], GROUP_ORDER):
        patch.set_facecolor(GROUP_COLORS[g])
        patch.set_alpha(0.8)
    ax.set_xticklabels(GROUP_ORDER, fontsize=10)
    ax.set_ylabel(ylabel, fontsize=10)
    ax.set_title(title, fontsize=10, fontweight='bold')
    ax.grid(axis='y', alpha=0.3)
    # 個別データ点（ジッター代わりに等間隔描画）
    for k, (g, data) in enumerate(zip(GROUP_ORDER, data_by_group)):
        n = len(data)
        xs = [k + 1 + (i - n/2) * 0.03 for i in range(n)]
        ax.scatter(xs, data, color=GROUP_COLORS[g], s=30, alpha=0.6,
                   edgecolors='white', linewidth=0.4, zorder=4)
    # 平均値マーク
    for k, data in enumerate(data_by_group):
        if len(data) > 0:
            ax.scatter(k + 1, data.mean(), marker='D', color='gold',
                       s=60, zorder=5, edgecolors='#333', linewidth=0.8)

boxplot_group(axes3[0], 'food_ratio',
              '食料費割合（食料費 / 消費支出 × 100, %）',
              '都市規模グループ別 食料費割合の分布\n（◆=平均値、箱内白線=中央値）')
boxplot_group(axes3[1], 'leisure_ratio',
              '教養娯楽費割合（教養娯楽費 / 消費支出 × 100, %）',
              '都市規模グループ別 教養娯楽費割合の分布\n（都市部で高い外食・娯楽志向）')

# 統計量を出力
print("\n都市規模グループ別 記述統計（%換算）:")
for col, name in [('food_ratio', '食料費割合'), ('leisure_ratio', '教養娯楽費割合')]:
    print(f"\n  {name}:")
    for g in GROUP_ORDER:
        vals = df_2023[df_2023['urban_group'] == g][col].dropna() * 100
        print(f"    {g}: mean={vals.mean():.2f}%, median={vals.median():.2f}%, n={len(vals)}")

plt.tight_layout()
fig3.savefig(os.path.join(FIG_DIR, '2024_H1_fig3_type.png'), bbox_inches='tight', dpi=150)
plt.close(fig3)
print("  → 2024_H1_fig3_type.png 保存完了")

# ─── 図4: 時系列トレンド（2012〜2023年） ──────────────────────────
print("図4: 時系列トレンドを作成中...")

# 年別全国平均の算出
yearly_avg = df_b.groupby('年度')[['food_ratio', 'leisure_ratio', 'clothing_ratio']].mean()
years_ts = yearly_avg.index.tolist()

# 都市規模別の時系列：最新年(2023)の都市規模分類を全年に適用
urban_group_map = dict(zip(df_2023['都道府県'], df_2023['urban_group']))
df_b['urban_group_fixed'] = df_b['都道府県'].map(urban_group_map)
yearly_urban = df_b.groupby(['年度', 'urban_group_fixed'], observed=True)[
    ['food_ratio', 'leisure_ratio']
].mean().reset_index()

fig4, axes4 = plt.subplots(1, 2, figsize=(14, 6))
fig4.suptitle('食料費・教養娯楽費割合の推移（2012〜2023年）', fontsize=13, fontweight='bold')

# 左: 全国平均の食料費・教養娯楽費・被服費割合
ax4a = axes4[0]
ax4a.plot(years_ts, yearly_avg['food_ratio'] * 100,
          'o-', color='#E53935', linewidth=2.5, markersize=7, label='食料費割合（内食志向）')
ax4a.plot(years_ts, yearly_avg['leisure_ratio'] * 100,
          's-', color='#1565C0', linewidth=2.5, markersize=7, label='教養娯楽費割合（外食・娯楽）')
ax4a.plot(years_ts, yearly_avg['clothing_ratio'] * 100,
          '^-', color='#43A047', linewidth=2.0, markersize=6, label='被服費割合')
ax4a.axvspan(2020, 2021.5, alpha=0.12, color='gray')
ax4a.axvline(2020, color='gray', linestyle='--', linewidth=1.5, label='COVID-19 拡大（2020年）')
ax4a.set_xlabel('年度', fontsize=11)
ax4a.set_ylabel('割合（費目 / 消費支出 × 100, %）', fontsize=10)
ax4a.set_title('全国平均：消費費目割合の推移\n（コロナ禍で食料費↑、教養娯楽費↓）',
               fontsize=10, fontweight='bold')
ax4a.legend(fontsize=9)
ax4a.grid(True, alpha=0.3)
ax4a.set_xticks(years_ts)
ax4a.set_xticklabels([str(y) for y in years_ts], rotation=45, fontsize=8)

# 2019→2020の変化をアノテーション
food_2019 = yearly_avg.loc[2019, 'food_ratio'] * 100
food_2020 = yearly_avg.loc[2020, 'food_ratio'] * 100
leis_2019 = yearly_avg.loc[2019, 'leisure_ratio'] * 100
leis_2020 = yearly_avg.loc[2020, 'leisure_ratio'] * 100
ax4a.annotate(f'食料費\n+{food_2020 - food_2019:.1f}%pt',
              xy=(2020, food_2020), xytext=(2020.3, food_2020 + 0.5),
              fontsize=8, color='#E53935',
              arrowprops=dict(arrowstyle='->', color='#E53935'))
ax4a.annotate(f'教養娯楽費\n{leis_2020 - leis_2019:+.1f}%pt',
              xy=(2020, leis_2020), xytext=(2018.5, leis_2020 - 0.5),
              fontsize=8, color='#1565C0',
              arrowprops=dict(arrowstyle='->', color='#1565C0'))

# 右: 都市規模別の教養娯楽費割合の推移
ax4b = axes4[1]
style_map = {
    '地方（人口少）': ('-',  '#1565C0', 'o', '地方（人口少）'),
    '中規模':         ('--', '#43A047', 's', '中規模'),
    '都市（人口多）': ('-',  '#E65100', '^', '都市（人口多）'),
}
for grp, (ls, col, mk, lbl) in style_map.items():
    grp_data = yearly_urban[yearly_urban['urban_group_fixed'] == grp].sort_values('年度')
    ax4b.plot(grp_data['年度'], grp_data['leisure_ratio'] * 100,
              linestyle=ls, color=col, marker=mk, markersize=6,
              linewidth=2.0, label=lbl, alpha=0.9)
ax4b.axvspan(2020, 2021.5, alpha=0.12, color='gray')
ax4b.axvline(2020, color='gray', linestyle='--', linewidth=1.5, label='COVID-19 拡大')
ax4b.set_xlabel('年度', fontsize=11)
ax4b.set_ylabel('教養娯楽費割合（%）', fontsize=10)
ax4b.set_title('都市規模別 教養娯楽費割合の推移\n（都市部でコロナ前後の変化が大きい）',
               fontsize=10, fontweight='bold')
ax4b.legend(fontsize=9)
ax4b.grid(True, alpha=0.3)
ax4b.set_xticks(years_ts)
ax4b.set_xticklabels([str(y) for y in years_ts], rotation=45, fontsize=8)

plt.tight_layout()
fig4.savefig(os.path.join(FIG_DIR, '2024_H1_fig4_trend.png'), bbox_inches='tight', dpi=150)
plt.close(fig4)
print("  → 2024_H1_fig4_trend.png 保存完了")

print("\n" + "=" * 65)
print("全図の生成完了（4枚）")
print("=" * 65)
print(f"\n保存先: {FIG_DIR}")
print("  2024_H1_fig1_corr.png    - 相関行列と教養娯楽費割合との相関係数")
print("  2024_H1_fig2_scatter.png - 高齢化率 vs 食料費・教養娯楽費割合 散布図")
print("  2024_H1_fig3_type.png    - 都市規模グループ別 費目割合の箱ひげ図")
print("  2024_H1_fig4_trend.png   - 食料費・教養娯楽費割合の時系列推移")
print(f"\nデータ出典: SSDSE-B-2026（政府統計の総合窓口 e-Stat / 統計数理研究所）")