Skip to main content
Version: Canary - 2.3 🚧

QA Tests


SQL-based quality assurance tests for data validation and production checks.

Basic Validations​

Non-Null Checks​

-- QA table has rows
-- 🪟💲🌟
-- run_id: {{ run_id }}
-- task_name: {{ task_name }}
-- Table: {{ table }}
-- Columns: {{ columns }}
SELECT count(*) as n
FROM {{ table }}

Uniqueness Checks​

-- QA for uniqueness of columns in a table
-- 🪟💲🌟
-- run_id: {{ run_id }}
-- task_name: {{ task_name }}
-- Table: {{ table }}
-- Columns: {{ columns }}

WITH
df AS (
SELECT
{% for column in columns %}
{{ column }},
{% endfor %}
count(*) as n
FROM {{ table }}
GROUP BY
{% for column in columns %}
{{ column }}
{% if not loop.last %}, {% endif %}
{% endfor %}
HAVING count(*) > 1
)
SELECT count(*) as n
FROM df

Production Checks​

Analyze Report Reproduction​

SELECT 
p.provider_id,
p.provider_name,
p.bill_type,
AVG(canonical_rate_percent_of_state_avg_medicare) AS avg_canonical_rate_percent_of_state_avg_medicare
FROM {{ table_name }} p
JOIN tq_intermediate.cld_utils.utilization_equal_code_utilization u
ON p.billing_code_type = u.code_type
AND p.billing_code = u.code
WHERE provider_id IN (
{% for provider_id in config.provider_id %}
'{{ provider_id }}'{% if not loop.last %},{% endif %}
{% endfor %}
)
AND network_id IN (
{% for network_id in config.network_id %}
{{ network_id }}{% if not loop.last %},{% endif %}
{% endfor %}
)
AND payer_id IN (
{% for payer_id in config.payer_id %}
'{{ payer_id }}'{% if not loop.last %},{% endif %}
{% endfor %}
)
AND canonical_rate_score > 1
AND is_drug_code = false
GROUP BY
1,2,3
ORDER BY
1,2,3

Best Payer Hospital Score Completeness​

SELECT
bill_type,
COUNT(DISTINCT best_payer_rate_score) AS n_unique_best_payer_rate_score,
COUNT(DISTINCT best_hospital_rate_score) AS n_unique_best_hospital_rate_score
FROM {{ table_name }}
WHERE canonical_rate_score > 1
AND canonical_rate_source != 'imputation'
GROUP BY 1

Canonical Method Parameters Formula​

WITH
counts AS (
SELECT
canonical_rate_type,
COUNT(DISTINCT roid) AS n_roids
FROM {{ table_name }}
WHERE canonical_rate_type IS NOT NULL
GROUP BY 1
),
df AS (
SELECT
roid,
payer_id,
billing_code_type,
canonical_rate_type,
canonical_rate,
canonical_method_params,
canonical_method_formula,
ROW_NUMBER() OVER (PARTITION BY billing_code_type, canonical_rate_type ORDER BY RANDOM()) AS rn
FROM {{ table_name }}
WHERE canonical_rate_type IS NOT NULL
AND canonical_rate_type NOT LIKE 'raw:%'
)
SELECT df.*, c.n_roids
FROM df
JOIN counts c
ON df.canonical_rate_type = c.canonical_rate_type
WHERE rn < 5

IP/OP Have Best Payer Hospital Rate​

SELECT *
FROM {{ table_name }}
WHERE canonical_rate_score > 1
AND best_payer_rate IS NULL
AND best_hospital_rate IS NULL
AND bill_type IN ('Inpatient', 'Outpatient')
AND canonical_rate_source != 'imputation'
LIMIT 1

No Negative Rates​

SELECT *
FROM {{ table_name }}
WHERE canonical_rate_score > 1
AND canonical_rate < 0
LIMIT 1

No Rates Greater Than 20M​

SELECT *
FROM {{ table_name }}
WHERE canonical_rate_score > 1
AND canonical_rate > 20000000
AND is_drug_code = False
LIMIT 1

Non-Outlier Coverage > 30%​

-- Check non-outlier coverage greater than 30 percent

SELECT
1.000 * sum(case when canonical_rate_score > 1 then 1 else 0 end) / count(*) as coverage
FROM {{ table_name }}
WHERE provider_type = 'Hospital'

Non-Outlier Median Canonical Percentage of State Avg Medicare​

-- Median Canonical Percentage of State Avg Medicare for Non-Outlier Records
SELECT
approx_percentile(canonical_rate_percent_of_state_avg_medicare, 0.5) as median_pct
FROM {{ table_name }}
WHERE canonical_rate_score > 1

Non-Outlier Median Canonical Rate​

-- Median Canonical Rate for Non-Outlier roids
SELECT
approx_percentile(canonical_rate, 0.5) as median_rate
FROM {{ table_name }}
WHERE canonical_rate_score > 1
AND provider_type = 'Hospital'

Professional Rates Have Best Payer Hospital Rate​

SELECT *
FROM {{ table_name }}
WHERE canonical_rate_score > 1
AND best_payer_rate IS NULL
AND best_hospital_rate IS NULL
AND bill_type = 'Professional'
LIMIT 1

Provider Type Coverage​

-- Check coverage by provider type
-- Coverage is defined as percentage of rows with canonical_rate_score > 1 (non-outliers)

SELECT
provider_type,
SUM(
CASE
WHEN canonical_rate_score > 1 THEN 1
ELSE 0
END
) as n_non_outlier,
COUNT(*) as n_total,
CAST(
SUM(
CASE
WHEN canonical_rate_score > 1 THEN 1
ELSE 0
END
) AS DOUBLE
) / CAST(COUNT(*) AS DOUBLE) * 100.0 as coverage_pct
FROM {{ table_name }}
WHERE network_class = 'Commercial'
GROUP BY provider_type
ORDER BY provider_type

Version Comparison​

WITH
old AS (
SELECT
provider_type,
provider_id,
payer_id,
network_type,
network_id,
bill_type,
is_drug_code,
avg(canonical_rate) as avg_canonical_rate,
approx_percentile(canonical_rate, 0.5) as median_canonical_rate,
avg(canonical_rate_percent_of_state_avg_medicare) as avg_canonical_rate_percent_of_state_avg_medicare,
approx_percentile(canonical_rate_percent_of_state_avg_medicare, 0.5) as median_canonical_rate_percent_of_state_avg_medicare,
avg(canonical_gross_charge) as avg_gross_charge,
approx_percentile(canonical_gross_charge, 0.5) as median_gross_charge,
avg(medicare_rate) as avg_medicare_rate,
approx_percentile(medicare_rate, 0.5) as median_medicare_rate,
avg(state_avg_medicare_rate) as avg_state_avg_medicare_rate,
approx_percentile(state_avg_medicare_rate, 0.5) as median_state_avg_medicare_rate,
avg(asp_payment_limit) as avg_asp_payment_limit,
approx_percentile(asp_payment_limit, 0.5) as median_asp_payment_limit
FROM {{ old_table }}
WHERE canonical_rate_score > 1
GROUP BY 1, 2, 3, 4, 5, 6, 7
),
new AS (
SELECT
provider_type,
provider_id,
payer_id,
network_type,
network_id,
bill_type,
is_drug_code,
avg(canonical_rate) as avg_canonical_rate,
approx_percentile(canonical_rate, 0.5) as median_canonical_rate,
avg(canonical_rate_percent_of_state_avg_medicare) as avg_canonical_rate_percent_of_state_avg_medicare,
approx_percentile(canonical_rate_percent_of_state_avg_medicare, 0.5) as median_canonical_rate_percent_of_state_avg_medicare,
avg(canonical_gross_charge) as avg_gross_charge,
approx_percentile(canonical_gross_charge, 0.5) as median_gross_charge,
avg(medicare_rate) as avg_medicare_rate,
approx_percentile(medicare_rate, 0.5) as median_medicare_rate,
avg(state_avg_medicare_rate) as avg_state_avg_medicare_rate,
approx_percentile(state_avg_medicare_rate, 0.5) as median_state_avg_medicare_rate,
avg(asp_payment_limit) as avg_asp_payment_limit,
approx_percentile(asp_payment_limit, 0.5) as median_asp_payment_limit
FROM {{ new_table }}
WHERE canonical_rate_score > 1
GROUP BY 1, 2, 3, 4, 5, 6, 7
),
comparisons AS (
SELECT
provider_type,
provider_id,
payer_id,
network_type,
network_id,
bill_type,
is_drug_code,

(v.avg_canonical_rate - q.avg_canonical_rate) / NULLIF(((v.avg_canonical_rate + q.avg_canonical_rate) / 2), 0) as diff_avg_canonical_rate_pct,
(v.median_canonical_rate - q.median_canonical_rate) / NULLIF(((v.median_canonical_rate + q.median_canonical_rate) / 2), 0) as diff_median_canonical_rate_pct,
(v.avg_gross_charge - q.avg_gross_charge) / NULLIF(((v.avg_gross_charge + q.avg_gross_charge) / 2), 0) as diff_avg_gross_charge_pct,
(v.median_gross_charge - q.median_gross_charge) / NULLIF(((v.median_gross_charge + q.median_gross_charge) / 2), 0) as diff_median_gross_charge_pct,
(v.median_medicare_rate - q.median_medicare_rate) / NULLIF(((v.median_medicare_rate + q.median_medicare_rate) / 2), 0) as diff_median_medicare_rate_pct,
(v.avg_canonical_rate_percent_of_state_avg_medicare - q.avg_canonical_rate_percent_of_state_avg_medicare) / NULLIF(((v.avg_canonical_rate_percent_of_state_avg_medicare + q.avg_canonical_rate_percent_of_state_avg_medicare) / 2), 0) as diff_avg_canonical_rate_percent_of_state_avg_medicare_pct,
(v.median_canonical_rate_percent_of_state_avg_medicare - q.median_canonical_rate_percent_of_state_avg_medicare) / NULLIF(((v.median_canonical_rate_percent_of_state_avg_medicare + q.median_canonical_rate_percent_of_state_avg_medicare) / 2), 0) as diff_median_canonical_rate_percent_of_state_avg_medicare_pct,
(v.avg_medicare_rate - q.avg_medicare_rate) / NULLIF(((v.avg_medicare_rate + q.avg_medicare_rate) / 2), 0) as diff_avg_medicare_rate_pct,
(v.avg_state_avg_medicare_rate - q.avg_state_avg_medicare_rate) / NULLIF(((v.avg_state_avg_medicare_rate + q.avg_state_avg_medicare_rate) / 2), 0) as diff_avg_state_avg_medicare_rate_pct,
(v.median_state_avg_medicare_rate - q.median_state_avg_medicare_rate) / NULLIF(((v.median_state_avg_medicare_rate + q.median_state_avg_medicare_rate) / 2), 0) as diff_median_state_avg_medicare_rate_pct,
(v.avg_asp_payment_limit - q.avg_asp_payment_limit) / NULLIF(((v.avg_asp_payment_limit + q.avg_asp_payment_limit) / 2), 0) as diff_avg_asp_payment_limit_pct,
(v.median_asp_payment_limit - q.median_asp_payment_limit) / NULLIF(((v.median_asp_payment_limit + q.median_asp_payment_limit) / 2), 0) as diff_median_asp_payment_limit_pct

FROM old q
JOIN new v USING (provider_type, provider_id, payer_id, network_type, network_id, bill_type, is_drug_code)
)
SELECT
APPROX_PERCENTILE(diff_avg_canonical_rate_pct, 0.5) as median_diff_avg_canonical_rate_pct,
APPROX_PERCENTILE(diff_avg_canonical_rate_pct, 0.1) as p10_diff_avg_canonical_rate_pct,
APPROX_PERCENTILE(diff_avg_canonical_rate_pct, 0.9) as p90_diff_avg_canonical_rate_pct,
APPROX_PERCENTILE(diff_median_canonical_rate_pct, 0.5) as median_diff_median_canonical_rate_pct,
APPROX_PERCENTILE(diff_median_canonical_rate_pct, 0.1) as p10_diff_median_canonical_rate_pct,
APPROX_PERCENTILE(diff_median_canonical_rate_pct, 0.9) as p90_diff_median_canonical_rate_pct,
APPROX_PERCENTILE(diff_median_gross_charge_pct, 0.5) as median_diff_median_gross_charge_pct,
APPROX_PERCENTILE(diff_median_gross_charge_pct, 0.1) as p10_diff_median_gross_charge_pct,
APPROX_PERCENTILE(diff_median_gross_charge_pct, 0.9) as p90_diff_median_gross_charge_pct
FROM comparisons
WHERE diff_avg_canonical_rate_pct IS NOT NULL

Rate Availability Tests​

All Networks Have Rates​

SELECT 
network_id,
sum(
{% if 'tmp_raw_mrf_payer_rates' in table %}
CASE
WHEN rate IS NOT NULL THEN 1
ELSE 0
END
{% else %}
CASE
WHEN canonical_rate IS NOT NULL THEN 1
ELSE 0
END
{% endif %}
) as n
FROM {{ table }}
GROUP BY 1

All Payers Have Rates​

SELECT 
payer_id,
sum(
{% if 'tmp_raw_mrf_payer_rates' in table %}
CASE
WHEN rate IS NOT NULL THEN 1
ELSE 0
END
{% else %}
CASE
WHEN canonical_rate IS NOT NULL THEN 1
ELSE 0
END
{% endif %}
) as n
FROM {{ table }}
GROUP BY 1

All Provider Types Have Rates​

-- Check that all provider types have rates
SELECT DISTINCT
provider_type
FROM {{ table }}
WHERE
canonical_rate IS NOT NULL
ORDER BY provider_type

Bill Types Have Rates​

SELECT distinct bill_type
FROM {{ table }}
WHERE canonical_rate IS NOT NULL
ORDER BY bill_type ASC

Billing Code Types Have Rates​

SELECT distinct billing_code_type
FROM {{ table }}
WHERE canonical_rate IS NOT NULL
ORDER BY billing_code_type ASC

Most Hospitals Have Rates​

SELECT 
provider_id,
SUM(
{% if 'tmp_raw_mrf_hospital_rates' in table %}
CASE
WHEN negotiated_dollar > 0
OR negotiated_percentage > 0
OR (estimated_allowed_amount > 0 AND estimated_allowed_amount < 100000000)
THEN 1
ELSE 0
END
{% else %}
CASE
WHEN canonical_rate IS NOT NULL THEN 1
ELSE 0
END
{% endif %}
) as n
FROM {{ table }}
{% if 'combined' in table %}
{% if commercial %}
WHERE network_class = 'Commercial'
AND provider_type = 'Hospital'
{% else %}
WHERE network_class != 'Commercial'
AND provider_type = 'Hospital'
{% endif %}
{% endif %}
GROUP BY provider_id

Network Type Coverage​

-- Check coverage by network_type (non-null values only)
-- Coverage is defined as percentage of rows with canonical_rate_score > 1 (non-outliers)

SELECT
network_type,
SUM(
CASE
WHEN canonical_rate_score > 1 THEN 1
ELSE 0
END
) as n_non_outlier,
COUNT(*) as n_total,
CAST(
SUM(
CASE
WHEN canonical_rate_score > 1 THEN 1
ELSE 0
END
) AS DOUBLE
) / CAST(COUNT(*) AS DOUBLE) * 100.0 as coverage_pct
FROM {{ table }}
WHERE network_type IS NOT NULL
GROUP BY network_type
ORDER BY network_type

Provider Type Coverage​

-- Check coverage by provider type
-- Coverage is defined as percentage of rows with canonical_rate_score > 1 (non-outliers)

SELECT
provider_type,
SUM(
CASE
WHEN canonical_rate_score > 1 THEN 1
ELSE 0
END
) as n_non_outlier,
COUNT(*) as n_total,
CAST(
SUM(
CASE
WHEN canonical_rate_score > 1 THEN 1
ELSE 0
END
) AS DOUBLE
) / CAST(COUNT(*) AS DOUBLE) * 100.0 as coverage_pct
FROM {{ table_name }}
WHERE network_class = 'Commercial'
GROUP BY provider_type
ORDER BY provider_type

Rate Object Space Tests​

ROID Count​

SELECT 
t1.n,
t2.unq_n
FROM (
SELECT count(roid) as n
FROM {{ table }}
) t1
CROSS JOIN (
SELECT count(distinct roid) as unq_n
FROM {{ table_roid }}
) t2

ROS Bill Types​

SELECT distinct bill_type
FROM {{ table }}
ORDER BY bill_type ASC

ROS Billing Code Types​

SELECT distinct billing_code_type
FROM {{ table }}
ORDER BY billing_code_type ASC

ROS Networks​

SELECT count(distinct network_id) as n
FROM {{ table }}

ROS Payers​

SELECT count(distinct payer_id) as n
FROM {{ table }}

ROS Provider Types​

SELECT distinct provider_type
FROM {{ table }}
ORDER BY provider_type ASC

ROS Providers​

SELECT count(distinct provider_id) as n
FROM {{ table }}

Rate Quality Tests​

No Negative Rates​

SELECT count(*) as n
FROM {{ table }}
WHERE canonical_rate < 0

Rate Type Tests​

Impute Rate Type​

-- QA: {{ rate_type }}

SELECT
roid,
canonical_rate,
{{ expected }} as expected
FROM {{ schema_name }}.{{ table_name }}
WHERE canonical_rate_type = '{{ rate_type }}'
AND canonical_rate_score > 0
LIMIT 1

Raw Hospital Rate Type​

-- QA: {{ rate_type }}

WITH
sample AS (
SELECT
roid,
payer_id,
provider_id,
raw_id AS trace_raw_id,
canonical_rate
FROM {{ schema_name }}.{{ table_name }}, unnest(split(trace_raw_id, ',')) as t(raw_id)
WHERE canonical_rate_type = '{{ rate_type }}'
AND canonical_rate_subversion = '{{ subversion }}'
AND billing_code_type IN ('MS-DRG', 'HCPCS')
AND canonical_rate IS NOT NULL
AND canonical_rate_original_billing_code_type IS NULL
LIMIT 1
)
SELECT
s.*,
hr.negotiated_dollar
FROM {{ hospital_schema }}.hospital_rates hr
JOIN sample s
ON s.payer_id = CAST(hr.payer_id AS VARCHAR)
AND s.provider_id = hr.provider_id
AND s.trace_raw_id = hr.id

Raw Hospital Allowed Amount​

-- QA: {{ rate_type }}

WITH
sample AS (
SELECT
roid,
payer_id,
provider_id,
trace_raw_id,
canonical_rate
FROM {{ schema_name }}.{{ table_name }}
WHERE canonical_rate_type = '{{ rate_type }}'
AND canonical_rate_subversion = '{{ subversion }}'
AND canonical_rate IS NOT NULL
AND canonical_rate_original_billing_code_type IS NULL
LIMIT 1
)
SELECT
s.roid,
s.payer_id,
s.provider_id,
s.trace_raw_id,
s.canonical_rate,
hr.estimated_allowed_amount
FROM sample s
LEFT JOIN {{ hospital_schema }}.hospital_rates hr
ON s.payer_id = CAST(hr.payer_id AS VARCHAR)
AND s.provider_id = hr.provider_id
AND s.trace_raw_id = hr.id

Raw Payer Rate Type​

-- QA: {{ rate_type }}

WITH
sample AS (
SELECT
roid,
payer_id,
provider_id,
raw_id as trace_raw_id,
canonical_rate
FROM {{ schema_name }}.{{ table_name }}, unnest(split(trace_raw_id, ',')) as t(raw_id)
WHERE canonical_rate_type = '{{ rate_type }}'
AND canonical_rate_subversion = '{{ subversion }}'
AND bill_type IN ('Inpatient', 'Outpatient')
AND provider_type = 'Hospital'
AND billing_code_type IN ('MS-DRG', 'HCPCS')
AND canonical_rate_original_billing_code_type IS NULL
LIMIT 1
)
SELECT
s.*,
cr.negotiated_rate
FROM {{ payer_schema }}.core_rates cr
JOIN sample s
ON s.payer_id = cr.payer_id
AND s.provider_id = cr.provider_id
AND s.trace_raw_id = cr.id

Transform Hospital Rate Type​

-- QA: {{ rate_type }}

WITH
sample AS (
SELECT
roid,
payer_id,
provider_id,
raw_id AS trace_raw_id,
medicare_glos,
komodo_gross_charge_provider,
mrf_gross_charge_provider,
mrf_gross_charge_state_median,
komodo_gross_charge_cbsa_median,
komodo_gross_charge_state_median,
gross_charge_cbsa_provider_adjustment,
gross_charge_state_provider_adjustment,
mrf_gross_charge_cbsa_median,
canonical_rate
FROM {{ schema_name }}.{{ table_name }}, unnest(split(trace_raw_id, ',')) as t(raw_id)
WHERE canonical_rate_type = '{{ rate_type }}'
AND canonical_rate_subversion = '{{ subversion }}'
AND billing_code_type IN ('MS-DRG', 'HCPCS')
LIMIT 1
)
SELECT
s.*,
{{ rate_col }} as rate
FROM {{ hospital_schema }}.hospital_rates hr
JOIN sample s
ON s.payer_id = CAST(hr.payer_id AS VARCHAR)
AND s.provider_id = hr.provider_id
AND s.trace_raw_id = hr.id

Transform Payer Rate Type​

-- QA: {{ rate_type }}

WITH
sample AS (
SELECT
roid,
payer_id,
provider_id,
raw_id AS trace_raw_id,
CASE
WHEN billing_code_type = 'HCPCS' THEN 1
ELSE medicare_glos
END as medicare_glos,
komodo_gross_charge_provider,
mrf_gross_charge_provider,
mrf_gross_charge_state_median,
komodo_gross_charge_cbsa_median,
komodo_gross_charge_state_median,
gross_charge_cbsa_provider_adjustment,
gross_charge_state_provider_adjustment,
mrf_gross_charge_cbsa_median,
canonical_rate
FROM {{ schema_name }}.{{ table_name }}, unnest(split(trace_raw_id, ',')) as t(raw_id)
WHERE canonical_rate_type = '{{ rate_type }}'
AND canonical_rate_subversion = '{{ subversion }}'
AND billing_code_type IN ('MS-DRG', 'HCPCS')
LIMIT 1
)
SELECT
s.*,
cr.negotiated_rate as rate
FROM {{ payer_schema }}.core_rates cr
JOIN sample s
ON s.payer_id = cr.payer_id
AND s.provider_id = cr.provider_id
AND s.trace_raw_id = cr.id

Spine Validations​

Provider Spine NPI Array No Nulls​

SELECT
COUNT(*) as null_count
FROM {{ table_name }} p
CROSS JOIN UNNEST(npi) n(npi)
WHERE n.npi IS NULL
AND p.npi IS NOT NULL

Spine Column Non-Null​

SELECT 
COUNT(*) as null_count
FROM {{ table_name }}
WHERE {{ column_name }} IS NULL

Tests on Combined Data​

Canonical Method Parameters Values​

SELECT 
canonical_rate_type,
ANY_VALUE(canonical_method_params) as canonical_method_params
FROM {{ table_name }}
WHERE canonical_rate_type IS NOT NULL
AND canonical_rate_type NOT LIKE 'raw:%'
AND (
canonical_rate_type NOT LIKE '%per_diem%'
OR
(
canonical_rate_type LIKE '%per_diem%'
AND
bill_type = 'Inpatient'
)
)
AND canonical_method_params IS NOT NULL
AND canonical_rate_score > 0
GROUP BY 1

Payer Coverage Validation​

SELECT 
payer_id,
payer_name,
array_agg(DISTINCT state) as states,
count(distinct provider_id) n_providers,
count(*) as n_rates,
COUNT(
DISTINCT
CASE
WHEN canonical_rate_score > 1 THEN provider_id
ELSE NULL
END
) as n_non_outlier_providers,
SUM(
CASE
WHEN canonical_rate_score > 1 THEN 1
ELSE 0
END
) as n_non_outlier_rates,
CAST(COUNT(
DISTINCT
CASE
WHEN canonical_rate_score > 1 THEN provider_id
ELSE NULL
END
) AS DOUBLE) / CAST(count(distinct provider_id) AS DOUBLE) as pct_non_outlier_providers,
CAST(SUM(
CASE
WHEN canonical_rate_score > 1 THEN 1
ELSE 0
END
) AS DOUBLE) / CAST(count(*) AS DOUBLE) as pct_non_outlier_rates
FROM {{ table_name }}
WHERE canonical_rate_score > 0
AND payer_id IN ('643', '7', '76', '42', '522', '169', '403', '388')
GROUP BY 1, 2
ORDER BY payer_id

Required Columns Non-Null​

SELECT 
COUNT(*) as null_count
FROM {{ table_name }}
WHERE canonical_rate_score > 0
AND {{ column_name }} IS NULL