Isofold rewrites your SQL queries to make them cheaper—without changing what they do. We apply a suite of optimizer rules using Apache DataFusion that are tested against real-world analytical workloads (like the Google Ethereum dataset).
Each rewrite rule:
- Preserves semantics
- Targets measurable inefficiencies
- Composes cleanly with others
How It Works
-
The incoming SQL is parsed into a logical plan.
-
Isofold applies rewrite rules in a deterministic order.
-
Both original and rewritten queries are dry-run for:
- Logical plan structure
- Slot usage / bytes scanned
- Compatibility with warehouse constraints
-
If the rewritten query is valid and cheaper, it is executed.
-
Otherwise, Isofold automatically falls back to the original.
Examples
PushDownFilter
-- Original
SELECT COUNT(*)
FROM (
SELECT *
FROM `bigquery-public-data.crypto_ethereum.transactions`
WHERE to_address = '0x742d35Cc6634C0532925a3b844Bc454e4438f44e'
)
-- Rewritten
SELECT COUNT(*)
FROM `bigquery-public-data.crypto_ethereum.transactions`
WHERE to_address = '0x742d35Cc6634C0532925a3b844Bc454e4438f44e'
PushDownLimit
-- Original
WITH top_blocks AS (
SELECT *
FROM `bigquery-public-data.crypto_ethereum.blocks`
ORDER BY difficulty DESC
)
SELECT miner
FROM top_blocks
LIMIT 5
-- Rewritten
SELECT miner
FROM (
SELECT *
FROM `bigquery-public-data.crypto_ethereum.blocks`
ORDER BY difficulty DESC
LIMIT 5
)
ProjectionPrune
+ OptimizeProjections
-- Original
SELECT COUNT(*)
FROM (
SELECT *
FROM `bigquery-public-data.crypto_ethereum.transactions`
)
-- Rewritten
SELECT COUNT(*)
FROM (
SELECT 1
FROM `bigquery-public-data.crypto_ethereum.transactions`
)
SimplifyExpressions
-- Original
SELECT *,
CASE
WHEN status = 'confirmed' THEN TRUE
WHEN status != 'confirmed' THEN FALSE
ELSE TRUE
END AS is_finalized
FROM `bigquery-public-data.crypto_ethereum.transactions`
WHERE DATE(block_timestamp) >= '2023-01-01'
AND TRUE OR TRUE
-- Rewritten
SELECT *,
status = 'confirmed' AS is_finalized
FROM `bigquery-public-data.crypto_ethereum.transactions`
WHERE DATE(block_timestamp) >= '2023-01-01'
EliminateDuplicatedExpr
-- Original
SELECT value / 1e18 AS eth_value,
value / 1e18 AS normalized
FROM `bigquery-public-data.crypto_ethereum.traces`
-- Rewritten
SELECT value / 1e18 AS eth_value,
eth_value AS normalized
FROM `bigquery-public-data.crypto_ethereum.traces`
EliminateFilter
-- Original
SELECT *
FROM `bigquery-public-data.crypto_ethereum.tokens`
WHERE TRUE
-- Rewritten
SELECT *
FROM `bigquery-public-data.crypto_ethereum.tokens`
EliminateGroupByConstant
-- Original
SELECT token_address, COUNT(*)
FROM `bigquery-public-data.crypto_ethereum.token_transfers`
GROUP BY token_address, 'erc20'
-- Rewritten
SELECT token_address, COUNT(*)
FROM `bigquery-public-data.crypto_ethereum.token_transfers`
GROUP BY token_address
EliminateJoin
-- Original
SELECT b.block_number
FROM `bigquery-public-data.crypto_ethereum.blocks` b
LEFT JOIN `bigquery-public-data.crypto_ethereum.transactions` t
ON b.number = t.block_number
-- Rewritten
SELECT block_number
FROM `bigquery-public-data.crypto_ethereum.blocks`
EliminateLimit
-- Original
SELECT COUNT(*)
FROM (
SELECT *
FROM `bigquery-public-data.crypto_ethereum.transactions`
LIMIT 10000
)
-- Rewritten
SELECT COUNT(*)
FROM `bigquery-public-data.crypto_ethereum.transactions`
EliminateNestedUnion
/ EliminateOneUnion
-- Original
SELECT *
FROM (
SELECT * FROM tx1
UNION ALL
SELECT * FROM tx2
)
WHERE block_number > 1000000
-- Rewritten
SELECT *
FROM tx1
WHERE block_number > 1000000
UNION ALL
SELECT *
FROM tx2
WHERE block_number > 1000000
EliminateOuterJoin
-- Original
SELECT *
FROM a
LEFT JOIN b
ON a.id = b.id
WHERE b.id IS NOT NULL
-- Rewritten
SELECT *
FROM a
JOIN b
ON a.id = b.id
-- Original
SELECT *
FROM a, b
WHERE a.id = b.id
AND b.confirmed = TRUE
-- Rewritten
SELECT *
FROM a
JOIN b
ON a.id = b.id
WHERE b.confirmed = TRUE
FilterNullJoinKeys
-- Rewritten
SELECT *
FROM a
JOIN b
ON a.key = b.key
WHERE a.key IS NOT NULL
AND b.key IS NOT NULL
ReplaceDistinctWithAggregate
-- Original
SELECT DISTINCT from_address
FROM `bigquery-public-data.crypto_ethereum.transactions`
-- Rewritten
SELECT from_address
FROM `bigquery-public-data.crypto_ethereum.transactions`
GROUP BY from_address
ScalarSubqueryToJoin
-- Original
SELECT t.hash,
(
SELECT MAX(value)
FROM `bigquery-public-data.crypto_ethereum.transactions` t2
WHERE t2.to_address = t.to_address
) AS max_val
FROM `bigquery-public-data.crypto_ethereum.transactions` t
-- Rewritten
SELECT t.hash,
MAX(t2.value) AS max_val
FROM `bigquery-public-data.crypto_ethereum.transactions` t
JOIN `bigquery-public-data.crypto_ethereum.transactions` t2
ON t.to_address = t2.to_address
GROUP BY t.hash
SingleDistinctToGroupBy
-- Original
SELECT DISTINCT token_address
FROM `bigquery-public-data.crypto_ethereum.token_transfers`
-- Rewritten
SELECT token_address
FROM `bigquery-public-data.crypto_ethereum.token_transfers`
GROUP BY token_address
Composed Rewrite:
-- Original
SELECT DISTINCT t.from_address,
COUNT(*) as tx_count
FROM (
SELECT *
FROM `bigquery-public-data.crypto_ethereum.transactions`
WHERE status = 'confirmed'
) t
LEFT JOIN (
SELECT *
FROM `bigquery-public-data.crypto_ethereum.blocks`
) b
ON t.block_number = b.number
WHERE b.number IS NOT NULL
GROUP BY t.from_address, 'eth'
LIMIT 1000000
-- Rewritten
SELECT from_address,
COUNT(*) AS tx_count
FROM `bigquery-public-data.crypto_ethereum.transactions` t
JOIN `bigquery-public-data.crypto_ethereum.blocks` b
ON t.block_number = b.number
WHERE t.status = 'confirmed'
GROUP BY from_address
Applied Rules:
PushDownFilter
SimplifyExpressions
EliminateOuterJoin
ProjectionPrune
EliminateGroupByConstant
ReplaceDistinctWithAggregate
EliminateLimit
Guarantees
- Dry-run validated for correctness
- Cost-estimated before execution
- Fallback-safe: if a rewrite introduces cost or fails validation, Isofold reverts to the original
Next: Topology Overview