From 348a074a3a3ba11c9dffaff0064fa045ce7aea82 Mon Sep 17 00:00:00 2001 From: StewKI Date: Sun, 17 May 2026 21:23:55 +0200 Subject: [PATCH] datamart updated --- sql/datamart_schema.sql | 74 +++++++++++++++++++++++++++++++++-------- 1 file changed, 60 insertions(+), 14 deletions(-) diff --git a/sql/datamart_schema.sql b/sql/datamart_schema.sql index a3f0b66..f4912e0 100644 --- a/sql/datamart_schema.sql +++ b/sql/datamart_schema.sql @@ -1,9 +1,39 @@ -- ============================================================================= -- HOTEL RESERVATIONS — DATA MART (STAR SCHEMA) -- Target: Oracle (university lab schema) --- Based on A.24 Revenue Data Mart — Dimensional Modelling by Example -- ============================================================================= +-- ----------------------------------------------------------------------------- +-- ETL CONTROL TABLE +-- Tracks incremental load watermarks per entity. +-- ----------------------------------------------------------------------------- + +CREATE TABLE ETL_WATERMARK ( + entity_name VARCHAR2(50) NOT NULL, + last_key NUMBER(20,0) DEFAULT 0 NOT NULL, + last_run_ts TIMESTAMP DEFAULT SYSTIMESTAMP, + CONSTRAINT pk_etl_wm PRIMARY KEY (entity_name) +); + +INSERT INTO ETL_WATERMARK (entity_name, last_key) VALUES ('FACT_ROOM_BOOKING', 0); +COMMIT; + +-- ----------------------------------------------------------------------------- +-- STAGING TABLES +-- NiFi loads raw MySQL data here first; SCD logic runs in pure SQL after. +-- Truncated at the start of each ETL run. +-- ----------------------------------------------------------------------------- + +CREATE TABLE STG_HOTEL ( + hotel_id NUMBER(10,0) NOT NULL, + chain_code VARCHAR2(10), + country_code CHAR(2) NOT NULL, + star_code NUMBER(1,0) NOT NULL, + code VARCHAR2(20) NOT NULL, + name VARCHAR2(150) NOT NULL, + city VARCHAR2(100) NOT NULL +); + -- ----------------------------------------------------------------------------- -- DIMENSION TABLES -- ----------------------------------------------------------------------------- @@ -21,12 +51,13 @@ CREATE TABLE DIM_DATE ( day_name VARCHAR2(10) NOT NULL, is_weekend NUMBER(1,0) NOT NULL, is_business_day NUMBER(1,0) NOT NULL, - season VARCHAR2(10) NOT NULL, -- Peak / High / Low / Off + season VARCHAR2(10) NOT NULL, CONSTRAINT pk_dim_date PRIMARY KEY (date_key), CONSTRAINT ck_dim_date_wknd CHECK (is_weekend IN (0,1)), CONSTRAINT ck_dim_date_bday CHECK (is_business_day IN (0,1)) ); +-- SCD Type 1 — country attributes are stable; just overwrite if anything changes CREATE TABLE DIM_COUNTRY ( country_key NUMBER(10,0) GENERATED ALWAYS AS IDENTITY, country_id NUMBER(10,0) NOT NULL, @@ -37,6 +68,7 @@ CREATE TABLE DIM_COUNTRY ( CONSTRAINT uq_dim_cntry_id UNIQUE (country_id) ); +-- SCD Type 1 — star rating lookup, never changes CREATE TABLE DIM_STAR_RATING ( star_rating_key NUMBER(10,0) GENERATED ALWAYS AS IDENTITY, star_rating_id NUMBER(10,0) NOT NULL, @@ -46,6 +78,7 @@ CREATE TABLE DIM_STAR_RATING ( CONSTRAINT uq_dim_star_id UNIQUE (star_rating_id) ); +-- SCD Type 1 — chain name/code rarely changes; overwrite CREATE TABLE DIM_HOTEL_CHAIN ( hotel_chain_key NUMBER(10,0) GENERATED ALWAYS AS IDENTITY, hotel_chain_id NUMBER(10,0) NOT NULL, @@ -55,22 +88,31 @@ CREATE TABLE DIM_HOTEL_CHAIN ( CONSTRAINT uq_dim_chain_id UNIQUE (hotel_chain_id) ); +-- SCD Type 2 — hotels can change star rating or chain affiliation over time. +-- source_hotel_id is the natural key from MySQL; hotel_key is the surrogate. +-- One hotel can have multiple rows; IS_CURRENT=1 row is the active version. +-- FACT_ROOM_BOOKING links to the hotel version current at check-in date. CREATE TABLE DIM_HOTEL ( hotel_key NUMBER(10,0) GENERATED ALWAYS AS IDENTITY, - hotel_id NUMBER(10,0) NOT NULL, + source_hotel_id NUMBER(10,0) NOT NULL, hotel_chain_key NUMBER(10,0), country_key NUMBER(10,0) NOT NULL, star_rating_key NUMBER(10,0) NOT NULL, code VARCHAR2(20) NOT NULL, name VARCHAR2(150) NOT NULL, city VARCHAR2(100) NOT NULL, - CONSTRAINT pk_dim_hotel PRIMARY KEY (hotel_key), - CONSTRAINT uq_dim_hotel_id UNIQUE (hotel_id), - CONSTRAINT fk_dh_chain FOREIGN KEY (hotel_chain_key) REFERENCES DIM_HOTEL_CHAIN (hotel_chain_key), - CONSTRAINT fk_dh_country FOREIGN KEY (country_key) REFERENCES DIM_COUNTRY (country_key), - CONSTRAINT fk_dh_star FOREIGN KEY (star_rating_key) REFERENCES DIM_STAR_RATING (star_rating_key) + -- SCD2 versioning + effective_date DATE NOT NULL, + expiry_date DATE, + is_current NUMBER(1,0) DEFAULT 1 NOT NULL, + CONSTRAINT pk_dim_hotel PRIMARY KEY (hotel_key), + CONSTRAINT ck_dh_current CHECK (is_current IN (0,1)), + CONSTRAINT fk_dh_chain FOREIGN KEY (hotel_chain_key) REFERENCES DIM_HOTEL_CHAIN (hotel_chain_key), + CONSTRAINT fk_dh_country FOREIGN KEY (country_key) REFERENCES DIM_COUNTRY (country_key), + CONSTRAINT fk_dh_star FOREIGN KEY (star_rating_key) REFERENCES DIM_STAR_RATING (star_rating_key) ); +-- SCD Type 1 — room type/floor rarely changes; upsert is sufficient CREATE TABLE DIM_ROOM ( room_key NUMBER(10,0) GENERATED ALWAYS AS IDENTITY, room_id NUMBER(10,0) NOT NULL, @@ -87,6 +129,7 @@ CREATE TABLE DIM_ROOM ( CONSTRAINT ck_dim_room_smk CHECK (smoking_yn IN (0,1)) ); +-- SCD Type 1 — guest contact details are overwritten if they change CREATE TABLE DIM_GUEST ( guest_key NUMBER(10,0) GENERATED ALWAYS AS IDENTITY, guest_id NUMBER(10,0) NOT NULL, @@ -102,11 +145,13 @@ CREATE TABLE DIM_GUEST ( -- FACT TABLE -- ----------------------------------------------------------------------------- --- Grain: one row per room_booking --- Revenue measures: nightly_rate, total_amount, nights_stayed +-- Grain: one row per room_booking. +-- source_rb_id: natural key from MySQL — used for idempotent incremental loads. +-- hotel_key: points to the DIM_HOTEL version active at check-in (SCD2 lookup). CREATE TABLE FACT_ROOM_BOOKING ( fact_id NUMBER(10,0) GENERATED ALWAYS AS IDENTITY, - -- foreign keys + source_rb_id NUMBER(10,0) NOT NULL, + -- dimension FKs hotel_key NUMBER(10,0) NOT NULL, hotel_chain_key NUMBER(10,0), room_key NUMBER(10,0) NOT NULL, @@ -115,19 +160,20 @@ CREATE TABLE FACT_ROOM_BOOKING ( star_rating_key NUMBER(10,0) NOT NULL, checkin_date_key NUMBER(8,0) NOT NULL, checkout_date_key NUMBER(8,0) NOT NULL, - -- degenerate dimensions + -- degenerate dimension booking_status VARCHAR2(20) NOT NULL, -- measures nights_stayed NUMBER(4,0) NOT NULL, nightly_rate NUMBER(10,2) NOT NULL, total_amount NUMBER(12,2) NOT NULL, CONSTRAINT pk_fact_rb PRIMARY KEY (fact_id), + CONSTRAINT uq_fact_rb_src UNIQUE (source_rb_id), CONSTRAINT fk_frb_hotel FOREIGN KEY (hotel_key) REFERENCES DIM_HOTEL (hotel_key), CONSTRAINT fk_frb_chain FOREIGN KEY (hotel_chain_key) REFERENCES DIM_HOTEL_CHAIN (hotel_chain_key), CONSTRAINT fk_frb_room FOREIGN KEY (room_key) REFERENCES DIM_ROOM (room_key), CONSTRAINT fk_frb_guest FOREIGN KEY (guest_key) REFERENCES DIM_GUEST (guest_key), CONSTRAINT fk_frb_country FOREIGN KEY (country_key) REFERENCES DIM_COUNTRY (country_key), CONSTRAINT fk_frb_star FOREIGN KEY (star_rating_key) REFERENCES DIM_STAR_RATING (star_rating_key), - CONSTRAINT fk_frb_checkin FOREIGN KEY (checkin_date_key) REFERENCES DIM_DATE (date_key), - CONSTRAINT fk_frb_checkout FOREIGN KEY (checkout_date_key) REFERENCES DIM_DATE (date_key) + CONSTRAINT fk_frb_checkin FOREIGN KEY (checkin_date_key) REFERENCES DIM_DATE (date_key), + CONSTRAINT fk_frb_checkout FOREIGN KEY (checkout_date_key) REFERENCES DIM_DATE (date_key) );