A simplified (and kind of silly) example will make what I'm asking a bit clearer. Bob/Tim are expected to be unique identifiers herein.
DROP TABLE IF EXISTS #People
CREATE TABLE #People
(
Person varchar(10),
City varchar(20),
Department varchar(1),
PersonAge int
);
INSERT INTO #People VALUES ('Bob', 'New York', 'A', 40),
('Tim', 'New York', 'A', 30),
('Tim', 'New York', 'B', 30)
;WITH InitialGrouping AS
(
SELECT
Person,
City,
Department,
MAX(PersonAge) PersonAge,
COUNT(*) NumRows
FROM
#People
GROUP BY
Person, City, Department
) --SELECT * FROM InitialGrouping
SELECT
City,
Department,
AVG(PersonAge) AveragePersonAge,
COUNT(DISTINCT Person) PersonCount
FROM
InitialGrouping
GROUP BY
GROUPING SETS (City, Department, (City, Department))
ORDER BY
City, Department
;WITH InitialGrouping AS
(
SELECT
GROUPING_ID(Person,City,Department) GROUPID,
Person,
City,
Department,
MAX(PersonAge) PersonAge,
COUNT(*) NumRows
FROM
#People
GROUP BY
Person, GROUPING SETS (City, Department, (City, Department))
) --SELECT * FROM InitialGrouping
SELECT
City,
Department,
AVG(PersonAge) AveragePersonAge,
COUNT(DISTINCT Person) PersonCount
FROM
InitialGrouping
GROUP BY
GROUPID, City, Department
ORDER BY
City, Department
These two examples are not functionally equivalent. If you look at the rollup row for New York, you will see that the average age is wrong for the first one (33) and right for the second (35). The second is functionally correct, however, for more complex examples with lots of grouping sets, it can absolutely explode cardinality out in the concatenation step of the query plan, which causes performance to be unacceptable. In this simple example, you can see that the first query keeps things at 5 rows while the second expands to 8 rows. In more complex cases with lots of grouping sets, I've seen cardinality expand many many times across millions of rows.
Is there a way to solve this that can keep things from expanding past the target granularity of the final output?
There's a solution proposed in the comments below which does work. It may be the best option. It looks like this (I added an additional group):
DROP TABLE IF EXISTS #People
CREATE TABLE #People
(
Person varchar(10),
City varchar(20),
Department varchar(1),
Badge int,
PersonAge int
);
INSERT INTO #People VALUES ('Bob', 'New York', 'A', 1, 40),
('Tim', 'New York', 'A', 1, 30),
('Tim', 'New York', 'B', 2, 30),
('Tom', 'New York', 'B', 1, 30),
('Jim', 'Paris', 'A', 2, 50)
;WITH InitialGrouping AS
(
SELECT
Person,
City,
Department,
Badge,
ROW_NUMBER() OVER (PARTITION BY Person ORDER BY Person) OverallRowNum,
ROW_NUMBER() OVER (PARTITION BY Person, City, Department, Badge ORDER BY Person) CityDepartmentBadgeRowNum,
ROW_NUMBER() OVER (PARTITION BY Person, City, Department ORDER BY Person) CityDepartmentRowNum,
ROW_NUMBER() OVER (PARTITION BY Person, City, Badge ORDER BY Person) CityBadgeRowNum,
ROW_NUMBER() OVER (PARTITION BY Person, Badge, Department ORDER BY Person) DepartmentBadgeRowNum,
ROW_NUMBER() OVER (PARTITION BY Person, City ORDER BY Person) CityRowNum,
ROW_NUMBER() OVER (PARTITION BY Person, Department ORDER BY Person) DepartmentRowNum,
ROW_NUMBER() OVER (PARTITION BY Person, Badge ORDER BY Person) BadgeRowNum,
MAX(PersonAge) PersonAge
FROM
#People
GROUP BY
Person, City, Department, Badge
) --SELECT * FROM InitialGrouping
SELECT
GROUPING_ID(City,Department,Badge),
City,
Department,
Badge,
CASE WHEN GROUPING_ID(City,Department,Badge) = 7 THEN AVG(CASE WHEN OverallRowNum = 1 THEN PersonAge ELSE NULL END)
WHEN GROUPING_ID(City,Department,Badge) = 6 THEN AVG(CASE WHEN BadgeRowNum = 1 THEN PersonAge ELSE NULL END)
WHEN GROUPING_ID(City,Department,Badge) = 5 THEN AVG(CASE WHEN DepartmentRowNum = 1 THEN PersonAge ELSE NULL END)
WHEN GROUPING_ID(City,Department,Badge) = 4 THEN AVG(CASE WHEN DepartmentBadgeRowNum = 1 THEN PersonAge ELSE NULL END)
WHEN GROUPING_ID(City,Department,Badge) = 3 THEN AVG(CASE WHEN CityRowNum = 1 THEN PersonAge ELSE NULL END)
WHEN GROUPING_ID(City,Department,Badge) = 2 THEN AVG(CASE WHEN CityBadgeRowNum = 1 THEN PersonAge ELSE NULL END)
WHEN GROUPING_ID(City,Department,Badge) = 1 THEN AVG(CASE WHEN CityDepartmentRowNum = 1 THEN PersonAge ELSE NULL END)
WHEN GROUPING_ID(City,Department,Badge) = 0 THEN AVG(CASE WHEN CityDepartmentBadgeRowNum = 1 THEN PersonAge ELSE NULL END)
END AS AveragePersonAge,
COUNT(DISTINCT Person) PersonCount
FROM
InitialGrouping
GROUP BY
GROUPING SETS (City, Department, Badge, (City, Department), (City,Badge), (Department,Badge), (City,Department,Badge),())
ORDER BY
City, Department
I use first way but change Method type order aggregate and use window function