I'm trying to get some insight in this room for optimization for a SQL query (BigQuery). I have this segment of a WHERE clause that needs to include all instances of where h.isEntrance is TRUE or where h.hitNumber = 1. I've tested it back and forth with CASE statements, and with OR statements for them, and the results aren't wholly conclusive.
It seems like the CASE is faster for shorter data pulls, and the OR is faster for longer data pulls, but that doesn't make sense to me.
Is there a difference between these or is it likely something else driving this difference?
Is one faster/is there another better option for incorporating this logical requirement into my query?
Below the statement is my full query for context in case that's helpful.
Also open to any other optimizations I may have overlooked within this query as lowering the runtime for this query is paramount to its usefulness.
Thanks!
WHERE h.type = 'PAGE' AND
(CASE
WHEN h.isEntrance = TRUE THEN 'True'
WHEN h.hitNumber = 1 THEN 'True'
ELSE 'False'
END) = 'True'
SELECT
date,
Medium,
CASE
WHEN REGEXP_Contains(landingpage,r'(?i)sweater|(?i)hoodie')
THEN "Apparel | Sweaters"
WHEN REGEXP_Contains(landingpage,r'(?i)sneaker')
AND NOT REGEXP_CONTAINS(landingpage,r'(?i)active')
THEN "Footwear | Sneakers"
WHEN REGEXP_Contains(landingpage,r'(?i)coats|(?i)jackets')
THEN "Apparel | Outerwear"
WHEN REGEXP_Contains(landingpage,r'(?i)sandal')
THEN "Footwear | Sandals"
WHEN REGEXP_Contains(landingpage,r'(?i)handbags|(?i)bag|(?i)tote')
THEN "Handbags"
WHEN REGEXP_CONTAINS(landingpage,r'(?i)boots')
THEN "Footwear | Boots"
WHEN REGEXP_Contains(landingpage,r'(?i)beauty|(?i)lipstick|(?i)makeup|(?i)powder|(?i)foundation|(?i)cologne|(?i)dyson-airwrap')
THEN "Beauty"
WHEN REGEXP_Contains(landingpage,r'(?i)dresses|(?i)dress|(?i)cocktail')
AND NOT REGEXP_CONTAINS(landingpage,r'(?i)shoe|(?i)shaker|(?i)glasses|(?i)coaster')
THEN "Apparel | Dresses"
WHEN REGEXP_Contains(landingpage,r'(?i)/home|(?i)home decor|(?i)bath|(?i)bedding|(?i)kitchen|(?i)table|(?i)pantry|(?i)electronics|(?i)asweet|(?i)vacuum')
THEN "Home"
WHEN REGEXP_Contains(landingpage,r'(?i)sandal')
THEN "Footwear | Sandals"
WHEN REGEXP_Contains(landingpage,r'(?i)handbags|(?i)bag|(?i)tote')
THEN "Handbags"
WHEN REGEXP_Contains(landingpage,r'(?i)denim|(?i)jeans')
THEN "Apparel | Denim"
WHEN REGEXP_Contains(landingpage,r'(?i)activewear|(?i)active|(?i)legging|(?i)sports-bra|(?i)tennis-skirt|(?i)skort')
THEN "Apparel | Activewear"
WHEN REGEXP_Contains(landingpage,r'(?i)swim')
THEN "Apparel | Swimwear"
WHEN REGEXP_Contains(landingpage,r'(?i)accessories')
AND NOT REGEXP_Contains(landingpage,r'(?i)jewelry')
THEN "Accessories"
WHEN REGEXP_Contains(landingpage,r'(?i)jewelry|(?i)watch')
THEN "Jewelry"
WHEN REGEXP_Contains(landingpage,r'(?i)shoe|(?i)boot|(?i)flat|(?i)heels|(?i)slie|(?i)slide|(?i)oxford|(?i)loafer|(?i)slipper|(?i)sneaker')
THEN "Footwear"
WHEN REGEXP_Contains(landingpage,r'(?i)clothing|(?i)lingerie|(?i)t-shirt|(?i)blouse|(?i)top|(?i)bra')
AND NOT REGEXP_CONTAINS(landingpage,r'(?i)active')
THEN "Apparel"
ELSE "Other"
END AS Segment,
CASE
WHEN REGEXP_CONTAINS(Campaign,r'(?i)_display_|(?i)retention|(?i)acquisition')
THEN 'Display'
WHEN REGEXP_CONTAINS(Campaign,r'(?i)_m_')
AND REGEXP_CONTAINS(Campaign,r'(?i)low_nd|(?i)mid_nd|(?i)_mid_ex_')
AND Medium != "organic"
THEN 'N'
WHEN REGEXP_CONTAINS(Campaign,r'(?i)low_nd|(?i)pla_shopping|(?i)mid_nd|(?i)_mid_ex_')
AND NOT REGEXP_CONTAINS(Campaign,r'(?i)_n_')
AND Medium !="organic"
THEN ''
WHEN REGEXP_CONTAINS(Campaign,r'(?i)nr_|(?i)pla_shopping|(?i)hl_|(?i) nr ')
AND NOT REGEXP_CONTAINS(Campaign,r'(?i)_n_')
AND Medium !="organic"
THEN ''
WHEN REGEXP_CONTAINS(Campaign,r'(?i)_terms')
AND Medium !="organic"
THEN ''
WHEN Medium = "organic" THEN ""
ELSE ""
END AS Site,
SUM(newUsers) AS newUsers,
SUM(Sessions) AS Sessions,
TRUNC(SUM(Revenue)/1000000,2) AS Revenue
FROM (
SELECT
date,
trafficSource.campaign AS Campaign,
trafficSource.medium AS Medium,
SUM(totals.transactionRevenue) AS Revenue,
SUM(totals.newVisits) AS newUsers,
SUM(totals.visits) AS Sessions,
h.page.pagePath AS landingPage
FROM
`database.ga_sessions_20*`,
UNNEST(hits) AS h
WHERE
h.type = 'PAGE' AND
CASE
WHEN h.isEntrance = TRUE THEN 'True'
WHEN h.hitNumber = 1 THEN 'True'
ELSE 'False'
END = 'True'
--h.isEntrance = TRUE OR h.hitNumber = 1
AND REGEXP_CONTAINS(trafficSource.medium, r'cpc|organic')
AND PARSE_DATE('%y%m%d', _TABLE_SUFFIX) BETWEEN
DATE_ADD(LAST_DAY(DATE_SUB(CURRENT_DATE(), INTERVAL 2 MONTH),MONTH), INTERVAL 1 DAY) AND
LAST_DAY(DATE_SUB(current_date(), INTERVAL 1 MONTH),MONTH)
GROUP BY
date,
Medium,
landingpage,
Campaign
)
GROUP BY
date,
Segment,
Medium,
Site
ORDER BY
Segment