Are CASE statements or OR statements faster in a WHERE clause? (SQL/BigQuery)
11:05 08 Jun 2021

I'm trying to get some insight in this room for optimization for a SQL query (BigQuery). I have this segment of a WHERE clause that needs to include all instances of where h.isEntrance is TRUE or where h.hitNumber = 1. I've tested it back and forth with CASE statements, and with OR statements for them, and the results aren't wholly conclusive.

It seems like the CASE is faster for shorter data pulls, and the OR is faster for longer data pulls, but that doesn't make sense to me.

Is there a difference between these or is it likely something else driving this difference?

Is one faster/is there another better option for incorporating this logical requirement into my query?

Below the statement is my full query for context in case that's helpful.

Also open to any other optimizations I may have overlooked within this query as lowering the runtime for this query is paramount to its usefulness.

Thanks!

WHERE h.type = 'PAGE' AND
    (CASE 
        WHEN h.isEntrance = TRUE THEN 'True'
            WHEN h.hitNumber = 1 THEN 'True'
        ELSE 'False'
    END) = 'True'
SELECT 
    date,
    Medium,
    CASE
        WHEN REGEXP_Contains(landingpage,r'(?i)sweater|(?i)hoodie')
            THEN "Apparel | Sweaters"
        WHEN REGEXP_Contains(landingpage,r'(?i)sneaker')
            AND NOT REGEXP_CONTAINS(landingpage,r'(?i)active')
            THEN "Footwear | Sneakers"     
        WHEN REGEXP_Contains(landingpage,r'(?i)coats|(?i)jackets')
            THEN "Apparel | Outerwear"
        WHEN REGEXP_Contains(landingpage,r'(?i)sandal')
            THEN "Footwear | Sandals"
        WHEN REGEXP_Contains(landingpage,r'(?i)handbags|(?i)bag|(?i)tote')
            THEN "Handbags"
        WHEN REGEXP_CONTAINS(landingpage,r'(?i)boots')
            THEN "Footwear | Boots"
        WHEN REGEXP_Contains(landingpage,r'(?i)beauty|(?i)lipstick|(?i)makeup|(?i)powder|(?i)foundation|(?i)cologne|(?i)dyson-airwrap')
            THEN "Beauty"
        WHEN REGEXP_Contains(landingpage,r'(?i)dresses|(?i)dress|(?i)cocktail')
            AND NOT REGEXP_CONTAINS(landingpage,r'(?i)shoe|(?i)shaker|(?i)glasses|(?i)coaster')
            THEN "Apparel | Dresses"
        WHEN REGEXP_Contains(landingpage,r'(?i)/home|(?i)home decor|(?i)bath|(?i)bedding|(?i)kitchen|(?i)table|(?i)pantry|(?i)electronics|(?i)asweet|(?i)vacuum')
            THEN "Home"
        WHEN REGEXP_Contains(landingpage,r'(?i)sandal')
            THEN "Footwear | Sandals"
        WHEN REGEXP_Contains(landingpage,r'(?i)handbags|(?i)bag|(?i)tote')
            THEN "Handbags"
        WHEN REGEXP_Contains(landingpage,r'(?i)denim|(?i)jeans')
            THEN "Apparel | Denim"
        WHEN REGEXP_Contains(landingpage,r'(?i)activewear|(?i)active|(?i)legging|(?i)sports-bra|(?i)tennis-skirt|(?i)skort')
            THEN "Apparel | Activewear"
        WHEN REGEXP_Contains(landingpage,r'(?i)swim')
            THEN "Apparel | Swimwear"
        WHEN REGEXP_Contains(landingpage,r'(?i)accessories')
            AND NOT REGEXP_Contains(landingpage,r'(?i)jewelry')
            THEN "Accessories"
        WHEN REGEXP_Contains(landingpage,r'(?i)jewelry|(?i)watch')
            THEN "Jewelry"
        WHEN REGEXP_Contains(landingpage,r'(?i)shoe|(?i)boot|(?i)flat|(?i)heels|(?i)slie|(?i)slide|(?i)oxford|(?i)loafer|(?i)slipper|(?i)sneaker')
            THEN "Footwear"
        WHEN REGEXP_Contains(landingpage,r'(?i)clothing|(?i)lingerie|(?i)t-shirt|(?i)blouse|(?i)top|(?i)bra')
            AND NOT REGEXP_CONTAINS(landingpage,r'(?i)active')
            THEN "Apparel"
        ELSE "Other"
    END AS Segment,
    CASE
        WHEN REGEXP_CONTAINS(Campaign,r'(?i)_display_|(?i)retention|(?i)acquisition')
            THEN 'Display'  
        WHEN REGEXP_CONTAINS(Campaign,r'(?i)_m_')
            AND REGEXP_CONTAINS(Campaign,r'(?i)low_nd|(?i)mid_nd|(?i)_mid_ex_')
            AND Medium != "organic"
            THEN 'N'
        WHEN REGEXP_CONTAINS(Campaign,r'(?i)low_nd|(?i)pla_shopping|(?i)mid_nd|(?i)_mid_ex_')
            AND NOT REGEXP_CONTAINS(Campaign,r'(?i)_n_')
            AND Medium !="organic"
            THEN ''
        WHEN REGEXP_CONTAINS(Campaign,r'(?i)nr_|(?i)pla_shopping|(?i)hl_|(?i) nr ')
            AND NOT REGEXP_CONTAINS(Campaign,r'(?i)_n_')
            AND Medium !="organic"
            THEN ''
        WHEN REGEXP_CONTAINS(Campaign,r'(?i)_terms')
            AND Medium !="organic"
            THEN ''
        WHEN Medium = "organic" THEN ""
        ELSE ""
    END AS Site,
    SUM(newUsers) AS newUsers,
    SUM(Sessions) AS Sessions,
    TRUNC(SUM(Revenue)/1000000,2) AS Revenue
FROM (    
    SELECT
        date,
        trafficSource.campaign AS Campaign,
        trafficSource.medium AS Medium, 
        SUM(totals.transactionRevenue) AS Revenue,
        SUM(totals.newVisits) AS newUsers,
        SUM(totals.visits) AS Sessions,
        h.page.pagePath AS landingPage
    FROM
        `database.ga_sessions_20*`,
        UNNEST(hits) AS h
    WHERE
        h.type = 'PAGE' AND
        CASE 
            WHEN h.isEntrance = TRUE THEN 'True'
            WHEN h.hitNumber = 1 THEN 'True'
            ELSE 'False'
        END = 'True'
        --h.isEntrance = TRUE OR h.hitNumber = 1
        AND REGEXP_CONTAINS(trafficSource.medium, r'cpc|organic')
        AND PARSE_DATE('%y%m%d', _TABLE_SUFFIX) BETWEEN
            DATE_ADD(LAST_DAY(DATE_SUB(CURRENT_DATE(), INTERVAL 2 MONTH),MONTH), INTERVAL 1 DAY) AND
            LAST_DAY(DATE_SUB(current_date(), INTERVAL 1 MONTH),MONTH)
    GROUP BY
        date,
        Medium,
        landingpage,
        Campaign
)
GROUP BY
    date,
    Segment,
    Medium,
    Site
ORDER BY
    Segment
sql google-bigquery