카테고리 없음
light GBM
백준파이썬개발자:프로젝트골드
2024. 3. 14. 17:14
반응형
https://www.kaggle.com/code/dangnguyen97/0-38006-lightgbm
라이브러리 임포트
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from darts import TimeSeries
from darts.dataprocessing import Pipeline
from darts.dataprocessing.transformers import Scaler, InvertibleMapper, StaticCovariatesTransformer
from darts.dataprocessing.transformers.missing_values_filler import MissingValuesFiller
from darts.metrics import rmsle
from darts.models import LinearRegressionModel, LightGBMModel, XGBModel, CatBoostModel
from darts.models.filtering.moving_average_filter import MovingAverageFilter
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from tqdm.notebook import tqdm_notebook
plt.style.use("ggplot")
plt.rcParams["font.size"] = 15
COLORS = list(sns.color_palette())
내용출력함수 정의
# helper function to print messages
def cprint(title, *args):
print(
"="*len(title), title, "="*len(title),
*args,
sep="\n",
)
데이터 받기
PATH = "/kaggle/input/store-sales-time-series-forecasting"
train = pd.read_csv(os.path.join(PATH, "train.csv"), parse_dates=["date"])
test = pd.read_csv(os.path.join(PATH, "test.csv"), parse_dates=["date"])
oil = pd.read_csv(os.path.join(PATH, "oil.csv"), parse_dates=["date"]).rename(columns={"dcoilwtico": "oil"})
store = pd.read_csv(os.path.join(PATH, "stores.csv"))
transaction = pd.read_csv(os.path.join(PATH, "transactions.csv"), parse_dates=["date"])
holiday = pd.read_csv(os.path.join(PATH, "holidays_events.csv"), parse_dates=["date"])
train.head(5)
데이터 분석 출력
num_family = train.family.nunique()
num_store = train.store_nbr.nunique()
num_ts = train.groupby(["store_nbr", "family"]).ngroups
train_start = train.date.min().date()
train_end = train.date.max().date()
num_train_date = train.date.nunique()
train_len = (train_end - train_start).days + 1
test_start = test.date.min().date()
test_end = test.date.max().date()
num_test_date = test.date.nunique()
test_len = (test_end - test_start).days + 1
cprint(
"Basic information of data",
f"Number of family types : {num_family}",
f"Number of stores : {num_store}",
f"Number of store-family pairs: {num_family * num_store}",
f"Number of target series : {num_ts}",
"",
f"Number of unique train dates: {num_train_date}",
f"Train date range : {train_len} days from {train_start} to {train_end}",
f"Number of unique test dates : {num_test_date}",
f"Test date range : {test_len} days from {test_start} to {test_end}",
)
=========================
Basic information of data
=========================
Number of family types : 33
Number of stores : 54
Number of store-family pairs: 1782
Number of target series : 1782
Number of unique train dates: 1684
Train date range : 1688 days from 2013-01-01 to 2017-08-15
Number of unique test dates : 16
Test date range : 16 days from 2017-08-16 to 2017-08-31
반응형