diff --git a/generator/generate.cs b/generator/generate.cs index 5e3b196..ec79533 100644 --- a/generator/generate.cs +++ b/generator/generate.cs @@ -1,6 +1,7 @@ #:package MySqlConnector@2.3.7 using System.Text; +using System.Globalization; using MySqlConnector; // ── Config ──────────────────────────────────────────────────────────────────── @@ -45,7 +46,7 @@ async Task BulkInsert(string table, string columns, List valueTuples) } string S(string? s) => s == null ? "NULL" : $"'{s.Replace("'", "''")}'"; -string N(object? n) => n == null ? "NULL" : n.ToString()!; +string N(object? n) => n == null ? "NULL" : (n is IFormattable f) ? f.ToString(null, CultureInfo.InvariantCulture) : n.ToString()!; string D(DateTime d) => $"'{d:yyyy-MM-dd}'"; string DT(DateTime d) => $"'{d:yyyy-MM-dd HH:mm:ss}'"; @@ -160,7 +161,7 @@ var roomTypes = new (string Code, string Desc, decimal BaseRate, bool Smoking)[] }; await BulkInsert("room_type", "code, description, standard_rate, smoking_yn", - roomTypes.Select(rt => $"({S(rt.Code)},{S(rt.Desc)},{rt.BaseRate},0)").ToList()); + roomTypes.Select(rt => $"({S(rt.Code)},{S(rt.Desc)},{N(rt.BaseRate)},0)").ToList()); var roomTypeIds = new Dictionary(); { @@ -194,7 +195,7 @@ foreach (var rt in roomTypes) foreach (var rp in ratePeriods) { var rate = Math.Round(rt.BaseRate * rp.Multiplier, 2); - prrRows.Add($"({roomTypeIds[rt.Code]},{ratePeriodIds[rp.Code]},{rate})"); + prrRows.Add($"({roomTypeIds[rt.Code]},{ratePeriodIds[rp.Code]},{N(rate)})"); } await BulkInsert("period_room_rate", "room_type_id, rate_period_id, rate", prrRows); @@ -514,7 +515,7 @@ while (bookingsDone < BOOKING_COUNT) int ratePeriodId = monthToRatePeriodId[dfrom.Month]; decimal nightly = rateMap[(roomTypeId, ratePeriodId)]; decimal total = Math.Round(nightly * nights, 2); - roomBookingRows.Add($"({bookingId},{roomId},{D(dfrom)},{D(dto)},{nightly},{total})"); + roomBookingRows.Add($"({bookingId},{roomId},{D(dfrom)},{D(dto)},{N(nightly)},{N(total)})"); } } } diff --git a/sql/datamart_schema.sql b/sql/datamart_schema.sql index f4912e0..a3aa752 100644 --- a/sql/datamart_schema.sql +++ b/sql/datamart_schema.sql @@ -1,179 +1,187 @@ --- ============================================================================= --- HOTEL RESERVATIONS — DATA MART (STAR SCHEMA) --- Target: Oracle (university lab schema) --- ============================================================================= +create table ETL_WATERMARK +( + ENTITY_NAME VARCHAR2(50) not null + constraint PK_ETL_WATERMARK + primary key, + LAST_KEY NUMBER(20) default 0 not null, + LAST_RUN_TS TIMESTAMP(6) default SYSTIMESTAMP +) +/ --- ----------------------------------------------------------------------------- --- ETL CONTROL TABLE --- Tracks incremental load watermarks per entity. --- ----------------------------------------------------------------------------- +create table STG_HOTEL +( + HOTEL_ID NUMBER(10) not null, + HOTEL_CODE VARCHAR2(20) not null, + HOTEL_NAME VARCHAR2(150) not null, + CITY VARCHAR2(100) not null, + COUNTRY_CODE CHAR(2) not null, + COUNTRY_NAME VARCHAR2(100) not null, + CURRENCY VARCHAR2(10) not null, + CHAIN_CODE VARCHAR2(10), + CHAIN_NAME VARCHAR2(100), + STAR_RATING NUMBER(1) not null, + STAR_DESCRIPTION VARCHAR2(20) +) +/ -CREATE TABLE ETL_WATERMARK ( - entity_name VARCHAR2(50) NOT NULL, - last_key NUMBER(20,0) DEFAULT 0 NOT NULL, - last_run_ts TIMESTAMP DEFAULT SYSTIMESTAMP, - CONSTRAINT pk_etl_wm PRIMARY KEY (entity_name) -); +create table DIM_DATE +( + DATE_KEY NUMBER(8) not null + constraint PK_DIM_DATE + primary key, + FULL_DATE DATE not null, + YEAR NUMBER(4) not null, + QUARTER NUMBER(1) not null, + MONTH NUMBER(2) not null, + MONTH_NAME VARCHAR2(10) not null, + WEEK_NUMBER NUMBER(2) not null, + DAY_OF_MONTH NUMBER(2) not null, + DAY_NAME VARCHAR2(10) not null, + IS_WEEKEND NUMBER(1) not null + constraint CK_DIM_DATE_WEEKEND + check (is_weekend IN (0, 1)), + IS_BUSINESS_DAY NUMBER(1) not null + constraint CK_DIM_DATE_BUSINESS + check (is_business_day IN (0, 1)), + SEASON VARCHAR2(10) not null +) +/ -INSERT INTO ETL_WATERMARK (entity_name, last_key) VALUES ('FACT_ROOM_BOOKING', 0); -COMMIT; +create table DIM_HOTEL +( + HOTEL_KEY NUMBER(10) default "IPZ19438"."ISEQ$$_303891".nextval generated as identity + constraint PK_DIM_HOTEL + primary key, + SOURCE_HOTEL_ID NUMBER(10) not null, + HOTEL_CODE VARCHAR2(20) not null, + HOTEL_NAME VARCHAR2(150) not null, + CITY VARCHAR2(100) not null, + COUNTRY_CODE CHAR(2) not null, + COUNTRY_NAME VARCHAR2(100) not null, + CURRENCY VARCHAR2(10) not null, + CHAIN_CODE VARCHAR2(10), + CHAIN_NAME VARCHAR2(100), + STAR_RATING NUMBER(1) not null, + STAR_DESCRIPTION VARCHAR2(20), + EFFECTIVE_DATE DATE not null, + EXPIRY_DATE DATE, + IS_CURRENT NUMBER(1) default 1 not null + constraint CK_DIM_HOTEL_CURRENT + check (is_current IN (0, 1)) +) +/ --- ----------------------------------------------------------------------------- --- STAGING TABLES --- NiFi loads raw MySQL data here first; SCD logic runs in pure SQL after. --- Truncated at the start of each ETL run. --- ----------------------------------------------------------------------------- +create table DIM_ROOM +( + ROOM_KEY NUMBER(10) generated as identity + constraint PK_DIM_ROOM + primary key, + SOURCE_ROOM_ID NUMBER(10) not null + constraint UQ_DIM_ROOM + unique, + HOTEL_KEY NUMBER(10) not null + constraint FK_DIM_ROOM_HOTEL + references DIM_HOTEL, + ROOM_NUMBER VARCHAR2(10) not null, + FLOOR NUMBER(3) not null, + ROOM_TYPE_CODE VARCHAR2(20) not null, + ROOM_TYPE_DESCRIPTION VARCHAR2(100) not null, + SMOKING_YN NUMBER(1) not null + constraint CK_DIM_ROOM_SMOKING + check (smoking_yn IN (0, 1)), + STANDARD_RATE NUMBER(10, 2) not null +) +/ -CREATE TABLE STG_HOTEL ( - hotel_id NUMBER(10,0) NOT NULL, - chain_code VARCHAR2(10), - country_code CHAR(2) NOT NULL, - star_code NUMBER(1,0) NOT NULL, - code VARCHAR2(20) NOT NULL, - name VARCHAR2(150) NOT NULL, - city VARCHAR2(100) NOT NULL -); +create table DIM_GUEST +( + GUEST_KEY NUMBER(10) generated as identity + constraint PK_DIM_GUEST + primary key, + SOURCE_GUEST_ID NUMBER(10) not null + constraint UQ_DIM_GUEST + unique, + GUEST_NAME VARCHAR2(150) not null, + CITY VARCHAR2(100), + COUNTRY_CODE CHAR(2), + COUNTRY_NAME VARCHAR2(100) +) +/ --- ----------------------------------------------------------------------------- --- DIMENSION TABLES --- ----------------------------------------------------------------------------- +create table FACT_ROOM_BOOKING +( + FACT_ID NUMBER(10) default "IPZ19438"."ISEQ$$_303902".nextval generated as identity + constraint PK_FACT_ROOM_BOOKING + primary key, + SOURCE_RB_ID NUMBER(10) not null + constraint UQ_FACT_ROOM_BOOKING_SRC + unique, + HOTEL_KEY NUMBER(10) not null + constraint FK_FACT_HOTEL + references DIM_HOTEL, + ROOM_KEY NUMBER(10) not null + constraint FK_FACT_ROOM + references DIM_ROOM, + GUEST_KEY NUMBER(10) not null + constraint FK_FACT_GUEST + references DIM_GUEST, + BOOKING_CREATED_DATE_KEY NUMBER(8) not null + constraint FK_FACT_BOOKING_DATE + references DIM_DATE, + CHECKIN_DATE_KEY NUMBER(8) not null + constraint FK_FACT_CHECKIN_DATE + references DIM_DATE, + CHECKOUT_DATE_KEY NUMBER(8) not null + constraint FK_FACT_CHECKOUT_DATE + references DIM_DATE, + BOOKING_STATUS VARCHAR2(20) not null, + BOOKING_COUNT NUMBER(1) default 1 not null + constraint CK_FACT_BOOKING_COUNT + check (booking_count = 1), + NIGHTS_STAYED NUMBER(4) not null, + NIGHTLY_RATE NUMBER(10, 2) not null, + TOTAL_AMOUNT NUMBER(12, 2) not null +) +/ --- YYYYMMDD integer key — cheap date range predicates, no JOIN to calendar needed -CREATE TABLE DIM_DATE ( - date_key NUMBER(8,0) NOT NULL, - full_date DATE NOT NULL, - year NUMBER(4,0) NOT NULL, - quarter NUMBER(1,0) NOT NULL, - month NUMBER(2,0) NOT NULL, - month_name VARCHAR2(10) NOT NULL, - week_number NUMBER(2,0) NOT NULL, - day_of_month NUMBER(2,0) NOT NULL, - day_name VARCHAR2(10) NOT NULL, - is_weekend NUMBER(1,0) NOT NULL, - is_business_day NUMBER(1,0) NOT NULL, - season VARCHAR2(10) NOT NULL, - CONSTRAINT pk_dim_date PRIMARY KEY (date_key), - CONSTRAINT ck_dim_date_wknd CHECK (is_weekend IN (0,1)), - CONSTRAINT ck_dim_date_bday CHECK (is_business_day IN (0,1)) -); +create table STG_GUEST +( + SOURCE_GUEST_ID NUMBER(10) not null, + GUEST_NAME VARCHAR2(150) not null, + CITY VARCHAR2(100), + COUNTRY_CODE CHAR(2), + COUNTRY_NAME VARCHAR2(100) +) +/ --- SCD Type 1 — country attributes are stable; just overwrite if anything changes -CREATE TABLE DIM_COUNTRY ( - country_key NUMBER(10,0) GENERATED ALWAYS AS IDENTITY, - country_id NUMBER(10,0) NOT NULL, - code CHAR(2) NOT NULL, - name VARCHAR2(100) NOT NULL, - currency VARCHAR2(10) NOT NULL, - CONSTRAINT pk_dim_country PRIMARY KEY (country_key), - CONSTRAINT uq_dim_cntry_id UNIQUE (country_id) -); +create table STG_ROOM +( + SOURCE_ROOM_ID NUMBER(10) not null, + HOTEL_CODE VARCHAR2(20) not null, + ROOM_NUMBER VARCHAR2(10) not null, + FLOOR NUMBER(3) not null, + ROOM_TYPE_CODE VARCHAR2(20) not null, + ROOM_TYPE_DESCRIPTION VARCHAR2(100) not null, + SMOKING_YN NUMBER(1) not null, + STANDARD_RATE NUMBER(10, 2) not null, + HOTEL_ID NUMBER(10) +) +/ --- SCD Type 1 — star rating lookup, never changes -CREATE TABLE DIM_STAR_RATING ( - star_rating_key NUMBER(10,0) GENERATED ALWAYS AS IDENTITY, - star_rating_id NUMBER(10,0) NOT NULL, - code NUMBER(1,0) NOT NULL, - description VARCHAR2(20) NOT NULL, - CONSTRAINT pk_dim_star PRIMARY KEY (star_rating_key), - CONSTRAINT uq_dim_star_id UNIQUE (star_rating_id) -); +create table STG_ROOM_BOOKING +( + SOURCE_RB_ID NUMBER(10) not null, + GUEST_ID NUMBER(10) not null, + BOOKING_CREATED_DATE DATE not null, + CHECKIN_DATE DATE not null, + CHECKOUT_DATE DATE not null, + BOOKING_STATUS VARCHAR2(20) not null, + BOOKING_COUNT NUMBER(1) default 1 not null, + NIGHTS_STAYED NUMBER(4) not null, + NIGHTLY_RATE NUMBER(10, 2) not null, + TOTAL_AMOUNT NUMBER(12, 2) not null, + HOTEL_ID NUMBER(10) not null, + ROOM_ID NUMBER(10) not null +) +/ --- SCD Type 1 — chain name/code rarely changes; overwrite -CREATE TABLE DIM_HOTEL_CHAIN ( - hotel_chain_key NUMBER(10,0) GENERATED ALWAYS AS IDENTITY, - hotel_chain_id NUMBER(10,0) NOT NULL, - code VARCHAR2(10) NOT NULL, - name VARCHAR2(100) NOT NULL, - CONSTRAINT pk_dim_chain PRIMARY KEY (hotel_chain_key), - CONSTRAINT uq_dim_chain_id UNIQUE (hotel_chain_id) -); - --- SCD Type 2 — hotels can change star rating or chain affiliation over time. --- source_hotel_id is the natural key from MySQL; hotel_key is the surrogate. --- One hotel can have multiple rows; IS_CURRENT=1 row is the active version. --- FACT_ROOM_BOOKING links to the hotel version current at check-in date. -CREATE TABLE DIM_HOTEL ( - hotel_key NUMBER(10,0) GENERATED ALWAYS AS IDENTITY, - source_hotel_id NUMBER(10,0) NOT NULL, - hotel_chain_key NUMBER(10,0), - country_key NUMBER(10,0) NOT NULL, - star_rating_key NUMBER(10,0) NOT NULL, - code VARCHAR2(20) NOT NULL, - name VARCHAR2(150) NOT NULL, - city VARCHAR2(100) NOT NULL, - -- SCD2 versioning - effective_date DATE NOT NULL, - expiry_date DATE, - is_current NUMBER(1,0) DEFAULT 1 NOT NULL, - CONSTRAINT pk_dim_hotel PRIMARY KEY (hotel_key), - CONSTRAINT ck_dh_current CHECK (is_current IN (0,1)), - CONSTRAINT fk_dh_chain FOREIGN KEY (hotel_chain_key) REFERENCES DIM_HOTEL_CHAIN (hotel_chain_key), - CONSTRAINT fk_dh_country FOREIGN KEY (country_key) REFERENCES DIM_COUNTRY (country_key), - CONSTRAINT fk_dh_star FOREIGN KEY (star_rating_key) REFERENCES DIM_STAR_RATING (star_rating_key) -); - --- SCD Type 1 — room type/floor rarely changes; upsert is sufficient -CREATE TABLE DIM_ROOM ( - room_key NUMBER(10,0) GENERATED ALWAYS AS IDENTITY, - room_id NUMBER(10,0) NOT NULL, - hotel_key NUMBER(10,0) NOT NULL, - room_number VARCHAR2(10) NOT NULL, - floor NUMBER(3,0) NOT NULL, - room_type_code VARCHAR2(20) NOT NULL, - room_type_desc VARCHAR2(100) NOT NULL, - smoking_yn NUMBER(1,0) NOT NULL, - standard_rate NUMBER(10,2) NOT NULL, - CONSTRAINT pk_dim_room PRIMARY KEY (room_key), - CONSTRAINT uq_dim_room_id UNIQUE (room_id), - CONSTRAINT fk_dr_hotel FOREIGN KEY (hotel_key) REFERENCES DIM_HOTEL (hotel_key), - CONSTRAINT ck_dim_room_smk CHECK (smoking_yn IN (0,1)) -); - --- SCD Type 1 — guest contact details are overwritten if they change -CREATE TABLE DIM_GUEST ( - guest_key NUMBER(10,0) GENERATED ALWAYS AS IDENTITY, - guest_id NUMBER(10,0) NOT NULL, - country_key NUMBER(10,0), - name VARCHAR2(150) NOT NULL, - city VARCHAR2(100), - CONSTRAINT pk_dim_guest PRIMARY KEY (guest_key), - CONSTRAINT uq_dim_guest_id UNIQUE (guest_id), - CONSTRAINT fk_dg_country FOREIGN KEY (country_key) REFERENCES DIM_COUNTRY (country_key) -); - --- ----------------------------------------------------------------------------- --- FACT TABLE --- ----------------------------------------------------------------------------- - --- Grain: one row per room_booking. --- source_rb_id: natural key from MySQL — used for idempotent incremental loads. --- hotel_key: points to the DIM_HOTEL version active at check-in (SCD2 lookup). -CREATE TABLE FACT_ROOM_BOOKING ( - fact_id NUMBER(10,0) GENERATED ALWAYS AS IDENTITY, - source_rb_id NUMBER(10,0) NOT NULL, - -- dimension FKs - hotel_key NUMBER(10,0) NOT NULL, - hotel_chain_key NUMBER(10,0), - room_key NUMBER(10,0) NOT NULL, - guest_key NUMBER(10,0) NOT NULL, - country_key NUMBER(10,0), - star_rating_key NUMBER(10,0) NOT NULL, - checkin_date_key NUMBER(8,0) NOT NULL, - checkout_date_key NUMBER(8,0) NOT NULL, - -- degenerate dimension - booking_status VARCHAR2(20) NOT NULL, - -- measures - nights_stayed NUMBER(4,0) NOT NULL, - nightly_rate NUMBER(10,2) NOT NULL, - total_amount NUMBER(12,2) NOT NULL, - CONSTRAINT pk_fact_rb PRIMARY KEY (fact_id), - CONSTRAINT uq_fact_rb_src UNIQUE (source_rb_id), - CONSTRAINT fk_frb_hotel FOREIGN KEY (hotel_key) REFERENCES DIM_HOTEL (hotel_key), - CONSTRAINT fk_frb_chain FOREIGN KEY (hotel_chain_key) REFERENCES DIM_HOTEL_CHAIN (hotel_chain_key), - CONSTRAINT fk_frb_room FOREIGN KEY (room_key) REFERENCES DIM_ROOM (room_key), - CONSTRAINT fk_frb_guest FOREIGN KEY (guest_key) REFERENCES DIM_GUEST (guest_key), - CONSTRAINT fk_frb_country FOREIGN KEY (country_key) REFERENCES DIM_COUNTRY (country_key), - CONSTRAINT fk_frb_star FOREIGN KEY (star_rating_key) REFERENCES DIM_STAR_RATING (star_rating_key), - CONSTRAINT fk_frb_checkin FOREIGN KEY (checkin_date_key) REFERENCES DIM_DATE (date_key), - CONSTRAINT fk_frb_checkout FOREIGN KEY (checkout_date_key) REFERENCES DIM_DATE (date_key) -);