SELECT COUNT(*) samples, category, ROUND(100*COUNT(*)/MAX(c),2) percentage
FROM (
SELECT id, category, c
FROM table a
JOIN table_stats b
USING(category)
WHERE RAND()< 1/100
)
GROUP BY 2
ORDER BY 1 DESC
LIMIT 100
SELECT COUNT(*) samples, psample.category, ROUND(100*COUNT(*)/ANY_VALUE(c),2) percentage
FROM (
SELECT ARRAY_AGG(a ORDER BY RAND()) cat_samples, category, ANY_VALUE(c) c
FROM table a
JOIN table_stats b
USING(category)
GROUP BY category
), UNNEST(cat_samples) psample WITH OFFSET off
WHERE off<0.02*c
GROUP BY 2
If this last approach is what you want, you might notice it failing when you actually want to get data out. An early LIMIT similar to the largest group size will make sure we don't sort more data than needed:
SELECT sample.*
FROM (
SELECT ARRAY_AGG(a ORDER BY RAND() LIMIT 105000) cat_samples, category, ANY_VALUE(c) c
FROM table a
JOIN table_stats b
USING(category)
GROUP BY category
), UNNEST(cat_samples) sample WITH OFFSET off
WHERE off<0.02*c




