# -*- coding: utf-8 -*-
"""국토교통부 실거래가 공개시스템 OpenAPI 수집 (공공데이터포털 공식 API)
- 아파트/오피스텔/연립다세대 매매 실거래가
- 최근 6개월분을 지역코드별로 수집하여 market_prices 적재
"""
import sys, os, time
import requests
import xml.etree.ElementTree as ET
from datetime import datetime, date, timedelta

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from db.models import get_config, get_conn, log_stage

ENDPOINTS = {
    "아파트": "https://apis.data.go.kr/1613000/RTMSDataSvcAptTradeDev/getRTMSDataSvcAptTradeDev",
    "오피스텔": "https://apis.data.go.kr/1613000/RTMSDataSvcOffiTrade/getRTMSDataSvcOffiTrade",
    "연립다세대": "https://apis.data.go.kr/1613000/RTMSDataSvcRHTrade/getRTMSDataSvcRHTrade",
}
TYPE_MAP = {"아파트": "아파트", "오피스텔": "오피스텔",
            "연립다세대": "다세대주택"}   # market_prices.property_type 저장값


def fetch_month(endpoint, api_key, region_code, ym):
    params = {
        "serviceKey": api_key, "LAWD_CD": region_code, "DEAL_YMD": ym,
        "pageNo": 1, "numOfRows": 1000,
    }
    r = requests.get(endpoint, params=params, timeout=30)
    r.raise_for_status()
    root = ET.fromstring(r.text)
    rows = []
    for it in root.iter("item"):
        def g(*tags):
            for t in tags:
                el = it.find(t)
                if el is not None and el.text:
                    return el.text.strip()
            return None
        price = g("dealAmount", "거래금액")
        if not price:
            continue
        rows.append({
            "complex_name": g("aptNm", "offiNm", "mhouseNm", "아파트", "연립다세대") or "",
            "area": float(g("excluUseAr", "전용면적") or 0),
            "trade_price": int(price.replace(",", "")) * 10000,
            "trade_date": f"{g('dealYear','년')}-{int(g('dealMonth','월')):02d}-{int(g('dealDay','일') or 1):02d}",
        })
    return rows


def run():
    cfg = get_config()
    started = datetime.now()
    api_key = cfg["molit"]["api_key"]
    months = []
    d = date.today().replace(day=1)
    for _ in range(6):
        months.append(d.strftime("%Y%m"))
        d = (d - timedelta(days=1)).replace(day=1)

    total = 0
    conn = get_conn()
    try:
        with conn.cursor() as cur:
            for region_code in cfg["molit"]["region_codes"]:
                for api_type, endpoint in ENDPOINTS.items():
                    ptype = TYPE_MAP[api_type]
                    for ym in months:
                        # 이미 해당 월 데이터가 있으면 최신 월만 갱신
                        cur.execute(
                            "SELECT COUNT(*) c FROM market_prices "
                            "WHERE region_code=%s AND property_type=%s "
                            "AND DATE_FORMAT(trade_date,'%%Y%%m')=%s",
                            (region_code, ptype, ym))
                        if cur.fetchone()["c"] > 0 and ym != months[0]:
                            continue
                        try:
                            rows = fetch_month(endpoint, api_key, region_code, ym)
                        except Exception as e:
                            print(f"[WARN] molit {region_code}/{api_type}/{ym}: {e}")
                            continue
                        for row in rows:
                            cur.execute(
                                """INSERT INTO market_prices
                                   (region_code, property_type, complex_name, area,
                                    trade_price, trade_date)
                                   VALUES (%s,%s,%s,%s,%s,%s)""",
                                (region_code, ptype, row["complex_name"][:95],
                                 row["area"], row["trade_price"], row["trade_date"]))
                            total += 1
                        time.sleep(0.3)
        log_stage("molit", "success", total, started_at=started)
        print(f"[molit] 실거래가 {total}건 적재")
    except Exception as e:
        log_stage("molit", "failed", total, str(e), started)
        raise
    finally:
        conn.close()


if __name__ == "__main__":
    run()
