# -*- coding: utf-8 -*-
"""낙찰가 예측 XGBoost 학습 스크립트
- 낙찰 결과(final_bid_price)가 100건 이상 쌓이면 실행
- 매월 1회 cron 등록 권장: 0 5 1 * *  python models/train_bid_model.py
"""
import sys, os, pickle
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from db.models import get_conn

MODEL_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), "bid_model.pkl")
MIN_SAMPLES = 100


def run():
    import pandas as pd
    from xgboost import XGBRegressor
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import mean_absolute_percentage_error

    conn = get_conn()
    with conn.cursor() as cur:
        cur.execute(
            """SELECT i.appraisal_price, i.min_bid_price, i.fail_count,
                      i.area_building, i.property_type, i.final_bid_price,
                      s.market_price_est
               FROM auction_items i
               LEFT JOIN item_scores s ON s.item_id=i.id
               WHERE i.final_bid_price IS NOT NULL AND i.appraisal_price IS NOT NULL""")
        rows = cur.fetchall()
    conn.close()

    if len(rows) < MIN_SAMPLES:
        print(f"학습 데이터 부족: {len(rows)}건 (최소 {MIN_SAMPLES}건). 규칙기반 유지.")
        return

    df = pd.DataFrame(rows)
    df["ptype"] = df["property_type"].apply(lambda x: hash(x) % 100)
    df["market_price"] = df["market_price_est"].fillna(0)
    X = df[["appraisal_price", "min_bid_price", "fail_count",
            "area_building", "market_price", "ptype"]].fillna(0)
    y = df["final_bid_price"]

    X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.2, random_state=42)
    model = XGBRegressor(n_estimators=300, max_depth=6, learning_rate=0.05,
                         subsample=0.9, random_state=42)
    model.fit(X_tr, y_tr)
    mape = mean_absolute_percentage_error(y_te, model.predict(X_te))
    print(f"학습 완료: {len(df)}건, 검증 MAPE={mape:.3f}")

    with open(MODEL_PATH, "wb") as f:
        pickle.dump(model, f)
    print(f"모델 저장: {MODEL_PATH}")


if __name__ == "__main__":
    run()
