QA Tests
SQL-based quality assurance tests for data validation and production checks.
Basic Validations​
Non-Null Checks​
-- QA table has rows
-- 🪟💲🌟
-- run_id: {{ run_id }}
-- task_name: {{ task_name }}
-- Table: {{ table }}
-- Columns: {{ columns }}
SELECT count(*) as n
FROM {{ table }}
Uniqueness Checks​
-- QA for uniqueness of columns in a table
-- 🪟💲🌟
-- run_id: {{ run_id }}
-- task_name: {{ task_name }}
-- Table: {{ table }}
-- Columns: {{ columns }}
WITH
df AS (
SELECT
{% for column in columns %}
{{ column }},
{% endfor %}
count(*) as n
FROM {{ table }}
GROUP BY
{% for column in columns %}
{{ column }}
{% if not loop.last %}, {% endif %}
{% endfor %}
HAVING count(*) > 1
)
SELECT count(*) as n
FROM df
Production Checks​
Analyze Report Reproduction​
SELECT
p.provider_id,
p.provider_name,
p.bill_type,
AVG(canonical_rate_percent_of_state_avg_medicare) AS avg_canonical_rate_percent_of_state_avg_medicare
FROM {{ table_name }} p
JOIN tq_intermediate.cld_utils.utilization_equal_code_utilization u
ON p.billing_code_type = u.code_type
AND p.billing_code = u.code
WHERE provider_id IN (
{% for provider_id in config.provider_id %}
'{{ provider_id }}'{% if not loop.last %},{% endif %}
{% endfor %}
)
AND network_id IN (
{% for network_id in config.network_id %}
{{ network_id }}{% if not loop.last %},{% endif %}
{% endfor %}
)
AND payer_id IN (
{% for payer_id in config.payer_id %}
'{{ payer_id }}'{% if not loop.last %},{% endif %}
{% endfor %}
)
AND canonical_rate_score > 1
AND is_drug_code = false
GROUP BY
1,2,3
ORDER BY
1,2,3
Best Payer Hospital Score Completeness​
SELECT
bill_type,
COUNT(DISTINCT best_payer_rate_score) AS n_unique_best_payer_rate_score,
COUNT(DISTINCT best_hospital_rate_score) AS n_unique_best_hospital_rate_score
FROM {{ table_name }}
WHERE canonical_rate_score > 1
AND canonical_rate_source != 'imputation'
GROUP BY 1
Canonical Method Parameters Formula​
WITH
counts AS (
SELECT
canonical_rate_type,
COUNT(DISTINCT roid) AS n_roids
FROM {{ table_name }}
WHERE canonical_rate_type IS NOT NULL
GROUP BY 1
),
df AS (
SELECT
roid,
payer_id,
billing_code_type,
canonical_rate_type,
canonical_rate,
canonical_method_params,
canonical_method_formula,
ROW_NUMBER() OVER (PARTITION BY billing_code_type, canonical_rate_type ORDER BY RANDOM()) AS rn
FROM {{ table_name }}
WHERE canonical_rate_type IS NOT NULL
AND canonical_rate_type NOT LIKE 'raw:%'
)
SELECT df.*, c.n_roids
FROM df
JOIN counts c
ON df.canonical_rate_type = c.canonical_rate_type
WHERE rn < 5
IP/OP Have Best Payer Hospital Rate​
SELECT *
FROM {{ table_name }}
WHERE canonical_rate_score > 1
AND best_payer_rate IS NULL
AND best_hospital_rate IS NULL
AND bill_type IN ('Inpatient', 'Outpatient')
AND canonical_rate_source != 'imputation'
LIMIT 1
No Negative Rates​
SELECT *
FROM {{ table_name }}
WHERE canonical_rate_score > 1
AND canonical_rate < 0
LIMIT 1
No Rates Greater Than 20M​
SELECT *
FROM {{ table_name }}
WHERE canonical_rate_score > 1
AND canonical_rate > 20000000
AND is_drug_code = False
LIMIT 1
Non-Outlier Coverage > 30%​
-- Check non-outlier coverage greater than 30 percent
SELECT
1.000 * sum(case when canonical_rate_score > 1 then 1 else 0 end) / count(*) as coverage
FROM {{ table_name }}
WHERE provider_type = 'Hospital'
Non-Outlier Median Canonical Percentage of State Avg Medicare​
-- Median Canonical Percentage of State Avg Medicare for Non-Outlier Records
SELECT
approx_percentile(canonical_rate_percent_of_state_avg_medicare, 0.5) as median_pct
FROM {{ table_name }}
WHERE canonical_rate_score > 1
Non-Outlier Median Canonical Rate​
-- Median Canonical Rate for Non-Outlier roids
SELECT
approx_percentile(canonical_rate, 0.5) as median_rate
FROM {{ table_name }}
WHERE canonical_rate_score > 1
AND provider_type = 'Hospital'
Professional Rates Have Best Payer Hospital Rate​
SELECT *
FROM {{ table_name }}
WHERE canonical_rate_score > 1
AND best_payer_rate IS NULL
AND best_hospital_rate IS NULL
AND bill_type = 'Professional'
LIMIT 1
Provider Type Coverage​
-- Check coverage by provider type
-- Coverage is defined as percentage of rows with canonical_rate_score > 1 (non-outliers)
SELECT
provider_type,
SUM(
CASE
WHEN canonical_rate_score > 1 THEN 1
ELSE 0
END
) as n_non_outlier,
COUNT(*) as n_total,
CAST(
SUM(
CASE
WHEN canonical_rate_score > 1 THEN 1
ELSE 0
END
) AS DOUBLE
) / CAST(COUNT(*) AS DOUBLE) * 100.0 as coverage_pct
FROM {{ table_name }}
WHERE network_class = 'Commercial'
GROUP BY provider_type
ORDER BY provider_type
Version Comparison​
WITH
old AS (
SELECT
provider_type,
provider_id,
payer_id,
network_type,
network_id,
bill_type,
is_drug_code,
avg(canonical_rate) as avg_canonical_rate,
approx_percentile(canonical_rate, 0.5) as median_canonical_rate,
avg(canonical_rate_percent_of_state_avg_medicare) as avg_canonical_rate_percent_of_state_avg_medicare,
approx_percentile(canonical_rate_percent_of_state_avg_medicare, 0.5) as median_canonical_rate_percent_of_state_avg_medicare,
avg(canonical_gross_charge) as avg_gross_charge,
approx_percentile(canonical_gross_charge, 0.5) as median_gross_charge,
avg(medicare_rate) as avg_medicare_rate,
approx_percentile(medicare_rate, 0.5) as median_medicare_rate,
avg(state_avg_medicare_rate) as avg_state_avg_medicare_rate,
approx_percentile(state_avg_medicare_rate, 0.5) as median_state_avg_medicare_rate,
avg(asp_payment_limit) as avg_asp_payment_limit,
approx_percentile(asp_payment_limit, 0.5) as median_asp_payment_limit
FROM {{ old_table }}
WHERE canonical_rate_score > 1
GROUP BY 1, 2, 3, 4, 5, 6, 7
),
new AS (
SELECT
provider_type,
provider_id,
payer_id,
network_type,
network_id,
bill_type,
is_drug_code,
avg(canonical_rate) as avg_canonical_rate,
approx_percentile(canonical_rate, 0.5) as median_canonical_rate,
avg(canonical_rate_percent_of_state_avg_medicare) as avg_canonical_rate_percent_of_state_avg_medicare,
approx_percentile(canonical_rate_percent_of_state_avg_medicare, 0.5) as median_canonical_rate_percent_of_state_avg_medicare,
avg(canonical_gross_charge) as avg_gross_charge,
approx_percentile(canonical_gross_charge, 0.5) as median_gross_charge,
avg(medicare_rate) as avg_medicare_rate,
approx_percentile(medicare_rate, 0.5) as median_medicare_rate,
avg(state_avg_medicare_rate) as avg_state_avg_medicare_rate,
approx_percentile(state_avg_medicare_rate, 0.5) as median_state_avg_medicare_rate,
avg(asp_payment_limit) as avg_asp_payment_limit,
approx_percentile(asp_payment_limit, 0.5) as median_asp_payment_limit
FROM {{ new_table }}
WHERE canonical_rate_score > 1
GROUP BY 1, 2, 3, 4, 5, 6, 7
),
comparisons AS (
SELECT
provider_type,
provider_id,
payer_id,
network_type,
network_id,
bill_type,
is_drug_code,
(v.avg_canonical_rate - q.avg_canonical_rate) / NULLIF(((v.avg_canonical_rate + q.avg_canonical_rate) / 2), 0) as diff_avg_canonical_rate_pct,
(v.median_canonical_rate - q.median_canonical_rate) / NULLIF(((v.median_canonical_rate + q.median_canonical_rate) / 2), 0) as diff_median_canonical_rate_pct,
(v.avg_gross_charge - q.avg_gross_charge) / NULLIF(((v.avg_gross_charge + q.avg_gross_charge) / 2), 0) as diff_avg_gross_charge_pct,
(v.median_gross_charge - q.median_gross_charge) / NULLIF(((v.median_gross_charge + q.median_gross_charge) / 2), 0) as diff_median_gross_charge_pct,
(v.median_medicare_rate - q.median_medicare_rate) / NULLIF(((v.median_medicare_rate + q.median_medicare_rate) / 2), 0) as diff_median_medicare_rate_pct,
(v.avg_canonical_rate_percent_of_state_avg_medicare - q.avg_canonical_rate_percent_of_state_avg_medicare) / NULLIF(((v.avg_canonical_rate_percent_of_state_avg_medicare + q.avg_canonical_rate_percent_of_state_avg_medicare) / 2), 0) as diff_avg_canonical_rate_percent_of_state_avg_medicare_pct,
(v.median_canonical_rate_percent_of_state_avg_medicare - q.median_canonical_rate_percent_of_state_avg_medicare) / NULLIF(((v.median_canonical_rate_percent_of_state_avg_medicare + q.median_canonical_rate_percent_of_state_avg_medicare) / 2), 0) as diff_median_canonical_rate_percent_of_state_avg_medicare_pct,
(v.avg_medicare_rate - q.avg_medicare_rate) / NULLIF(((v.avg_medicare_rate + q.avg_medicare_rate) / 2), 0) as diff_avg_medicare_rate_pct,
(v.avg_state_avg_medicare_rate - q.avg_state_avg_medicare_rate) / NULLIF(((v.avg_state_avg_medicare_rate + q.avg_state_avg_medicare_rate) / 2), 0) as diff_avg_state_avg_medicare_rate_pct,
(v.median_state_avg_medicare_rate - q.median_state_avg_medicare_rate) / NULLIF(((v.median_state_avg_medicare_rate + q.median_state_avg_medicare_rate) / 2), 0) as diff_median_state_avg_medicare_rate_pct,
(v.avg_asp_payment_limit - q.avg_asp_payment_limit) / NULLIF(((v.avg_asp_payment_limit + q.avg_asp_payment_limit) / 2), 0) as diff_avg_asp_payment_limit_pct,
(v.median_asp_payment_limit - q.median_asp_payment_limit) / NULLIF(((v.median_asp_payment_limit + q.median_asp_payment_limit) / 2), 0) as diff_median_asp_payment_limit_pct
FROM old q
JOIN new v USING (provider_type, provider_id, payer_id, network_type, network_id, bill_type, is_drug_code)
)
SELECT
APPROX_PERCENTILE(diff_avg_canonical_rate_pct, 0.5) as median_diff_avg_canonical_rate_pct,
APPROX_PERCENTILE(diff_avg_canonical_rate_pct, 0.1) as p10_diff_avg_canonical_rate_pct,
APPROX_PERCENTILE(diff_avg_canonical_rate_pct, 0.9) as p90_diff_avg_canonical_rate_pct,
APPROX_PERCENTILE(diff_median_canonical_rate_pct, 0.5) as median_diff_median_canonical_rate_pct,
APPROX_PERCENTILE(diff_median_canonical_rate_pct, 0.1) as p10_diff_median_canonical_rate_pct,
APPROX_PERCENTILE(diff_median_canonical_rate_pct, 0.9) as p90_diff_median_canonical_rate_pct,
APPROX_PERCENTILE(diff_median_gross_charge_pct, 0.5) as median_diff_median_gross_charge_pct,
APPROX_PERCENTILE(diff_median_gross_charge_pct, 0.1) as p10_diff_median_gross_charge_pct,
APPROX_PERCENTILE(diff_median_gross_charge_pct, 0.9) as p90_diff_median_gross_charge_pct
FROM comparisons
WHERE diff_avg_canonical_rate_pct IS NOT NULL
Rate Availability Tests​
All Networks Have Rates​
SELECT
network_id,
sum(
{% if 'tmp_raw_mrf_payer_rates' in table %}
CASE
WHEN rate IS NOT NULL THEN 1
ELSE 0
END
{% else %}
CASE
WHEN canonical_rate IS NOT NULL THEN 1
ELSE 0
END
{% endif %}
) as n
FROM {{ table }}
GROUP BY 1
All Payers Have Rates​
SELECT
payer_id,
sum(
{% if 'tmp_raw_mrf_payer_rates' in table %}
CASE
WHEN rate IS NOT NULL THEN 1
ELSE 0
END
{% else %}
CASE
WHEN canonical_rate IS NOT NULL THEN 1
ELSE 0
END
{% endif %}
) as n
FROM {{ table }}
GROUP BY 1
All Provider Types Have Rates​
-- Check that all provider types have rates
SELECT DISTINCT
provider_type
FROM {{ table }}
WHERE
canonical_rate IS NOT NULL
ORDER BY provider_type
Bill Types Have Rates​
SELECT distinct bill_type
FROM {{ table }}
WHERE canonical_rate IS NOT NULL
ORDER BY bill_type ASC
Billing Code Types Have Rates​
SELECT distinct billing_code_type
FROM {{ table }}
WHERE canonical_rate IS NOT NULL
ORDER BY billing_code_type ASC
Most Hospitals Have Rates​
SELECT
provider_id,
SUM(
{% if 'tmp_raw_mrf_hospital_rates' in table %}
CASE
WHEN negotiated_dollar > 0
OR negotiated_percentage > 0
OR (estimated_allowed_amount > 0 AND estimated_allowed_amount < 100000000)
THEN 1
ELSE 0
END
{% else %}
CASE
WHEN canonical_rate IS NOT NULL THEN 1
ELSE 0
END
{% endif %}
) as n
FROM {{ table }}
{% if 'combined' in table %}
{% if commercial %}
WHERE network_class = 'Commercial'
AND provider_type = 'Hospital'
{% else %}
WHERE network_class != 'Commercial'
AND provider_type = 'Hospital'
{% endif %}
{% endif %}
GROUP BY provider_id
Network Type Coverage​
-- Check coverage by network_type (non-null values only)
-- Coverage is defined as percentage of rows with canonical_rate_score > 1 (non-outliers)
SELECT
network_type,
SUM(
CASE
WHEN canonical_rate_score > 1 THEN 1
ELSE 0
END
) as n_non_outlier,
COUNT(*) as n_total,
CAST(
SUM(
CASE
WHEN canonical_rate_score > 1 THEN 1
ELSE 0
END
) AS DOUBLE
) / CAST(COUNT(*) AS DOUBLE) * 100.0 as coverage_pct
FROM {{ table }}
WHERE network_type IS NOT NULL
GROUP BY network_type
ORDER BY network_type
Provider Type Coverage​
-- Check coverage by provider type
-- Coverage is defined as percentage of rows with canonical_rate_score > 1 (non-outliers)
SELECT
provider_type,
SUM(
CASE
WHEN canonical_rate_score > 1 THEN 1
ELSE 0
END
) as n_non_outlier,
COUNT(*) as n_total,
CAST(
SUM(
CASE
WHEN canonical_rate_score > 1 THEN 1
ELSE 0
END
) AS DOUBLE
) / CAST(COUNT(*) AS DOUBLE) * 100.0 as coverage_pct
FROM {{ table_name }}
WHERE network_class = 'Commercial'
GROUP BY provider_type
ORDER BY provider_type
Rate Object Space Tests​
ROID Count​
SELECT
t1.n,
t2.unq_n
FROM (
SELECT count(roid) as n
FROM {{ table }}
) t1
CROSS JOIN (
SELECT count(distinct roid) as unq_n
FROM {{ table_roid }}
) t2
ROS Bill Types​
SELECT distinct bill_type
FROM {{ table }}
ORDER BY bill_type ASC
ROS Billing Code Types​
SELECT distinct billing_code_type
FROM {{ table }}
ORDER BY billing_code_type ASC
ROS Networks​
SELECT count(distinct network_id) as n
FROM {{ table }}
ROS Payers​
SELECT count(distinct payer_id) as n
FROM {{ table }}
ROS Provider Types​
SELECT distinct provider_type
FROM {{ table }}
ORDER BY provider_type ASC
ROS Providers​
SELECT count(distinct provider_id) as n
FROM {{ table }}
Rate Quality Tests​
No Negative Rates​
SELECT count(*) as n
FROM {{ table }}
WHERE canonical_rate < 0
Rate Type Tests​
Impute Rate Type​
-- QA: {{ rate_type }}
SELECT
roid,
canonical_rate,
{{ expected }} as expected
FROM {{ schema_name }}.{{ table_name }}
WHERE canonical_rate_type = '{{ rate_type }}'
AND canonical_rate_score > 0
LIMIT 1
Raw Hospital Rate Type​
-- QA: {{ rate_type }}
WITH
sample AS (
SELECT
roid,
payer_id,
provider_id,
raw_id AS trace_raw_id,
canonical_rate
FROM {{ schema_name }}.{{ table_name }}, unnest(split(trace_raw_id, ',')) as t(raw_id)
WHERE canonical_rate_type = '{{ rate_type }}'
AND canonical_rate_subversion = '{{ subversion }}'
AND billing_code_type IN ('MS-DRG', 'HCPCS')
AND canonical_rate IS NOT NULL
AND canonical_rate_original_billing_code_type IS NULL
LIMIT 1
)
SELECT
s.*,
hr.negotiated_dollar
FROM {{ hospital_schema }}.hospital_rates hr
JOIN sample s
ON s.payer_id = CAST(hr.payer_id AS VARCHAR)
AND s.provider_id = hr.provider_id
AND s.trace_raw_id = hr.id
Raw Hospital Allowed Amount​
-- QA: {{ rate_type }}
WITH
sample AS (
SELECT
roid,
payer_id,
provider_id,
trace_raw_id,
canonical_rate
FROM {{ schema_name }}.{{ table_name }}
WHERE canonical_rate_type = '{{ rate_type }}'
AND canonical_rate_subversion = '{{ subversion }}'
AND canonical_rate IS NOT NULL
AND canonical_rate_original_billing_code_type IS NULL
LIMIT 1
)
SELECT
s.roid,
s.payer_id,
s.provider_id,
s.trace_raw_id,
s.canonical_rate,
hr.estimated_allowed_amount
FROM sample s
LEFT JOIN {{ hospital_schema }}.hospital_rates hr
ON s.payer_id = CAST(hr.payer_id AS VARCHAR)
AND s.provider_id = hr.provider_id
AND s.trace_raw_id = hr.id
Raw Payer Rate Type​
-- QA: {{ rate_type }}
WITH
sample AS (
SELECT
roid,
payer_id,
provider_id,
raw_id as trace_raw_id,
canonical_rate
FROM {{ schema_name }}.{{ table_name }}, unnest(split(trace_raw_id, ',')) as t(raw_id)
WHERE canonical_rate_type = '{{ rate_type }}'
AND canonical_rate_subversion = '{{ subversion }}'
AND bill_type IN ('Inpatient', 'Outpatient')
AND provider_type = 'Hospital'
AND billing_code_type IN ('MS-DRG', 'HCPCS')
AND canonical_rate_original_billing_code_type IS NULL
LIMIT 1
)
SELECT
s.*,
cr.negotiated_rate
FROM {{ payer_schema }}.core_rates cr
JOIN sample s
ON s.payer_id = cr.payer_id
AND s.provider_id = cr.provider_id
AND s.trace_raw_id = cr.id
Transform Hospital Rate Type​
-- QA: {{ rate_type }}
WITH
sample AS (
SELECT
roid,
payer_id,
provider_id,
raw_id AS trace_raw_id,
medicare_glos,
komodo_gross_charge_provider,
mrf_gross_charge_provider,
mrf_gross_charge_state_median,
komodo_gross_charge_cbsa_median,
komodo_gross_charge_state_median,
gross_charge_cbsa_provider_adjustment,
gross_charge_state_provider_adjustment,
mrf_gross_charge_cbsa_median,
canonical_rate
FROM {{ schema_name }}.{{ table_name }}, unnest(split(trace_raw_id, ',')) as t(raw_id)
WHERE canonical_rate_type = '{{ rate_type }}'
AND canonical_rate_subversion = '{{ subversion }}'
AND billing_code_type IN ('MS-DRG', 'HCPCS')
LIMIT 1
)
SELECT
s.*,
{{ rate_col }} as rate
FROM {{ hospital_schema }}.hospital_rates hr
JOIN sample s
ON s.payer_id = CAST(hr.payer_id AS VARCHAR)
AND s.provider_id = hr.provider_id
AND s.trace_raw_id = hr.id
Transform Payer Rate Type​
-- QA: {{ rate_type }}
WITH
sample AS (
SELECT
roid,
payer_id,
provider_id,
raw_id AS trace_raw_id,
CASE
WHEN billing_code_type = 'HCPCS' THEN 1
ELSE medicare_glos
END as medicare_glos,
komodo_gross_charge_provider,
mrf_gross_charge_provider,
mrf_gross_charge_state_median,
komodo_gross_charge_cbsa_median,
komodo_gross_charge_state_median,
gross_charge_cbsa_provider_adjustment,
gross_charge_state_provider_adjustment,
mrf_gross_charge_cbsa_median,
canonical_rate
FROM {{ schema_name }}.{{ table_name }}, unnest(split(trace_raw_id, ',')) as t(raw_id)
WHERE canonical_rate_type = '{{ rate_type }}'
AND canonical_rate_subversion = '{{ subversion }}'
AND billing_code_type IN ('MS-DRG', 'HCPCS')
LIMIT 1
)
SELECT
s.*,
cr.negotiated_rate as rate
FROM {{ payer_schema }}.core_rates cr
JOIN sample s
ON s.payer_id = cr.payer_id
AND s.provider_id = cr.provider_id
AND s.trace_raw_id = cr.id
Spine Validations​
Provider Spine NPI Array No Nulls​
SELECT
COUNT(*) as null_count
FROM {{ table_name }} p
CROSS JOIN UNNEST(npi) n(npi)
WHERE n.npi IS NULL
AND p.npi IS NOT NULL
Spine Column Non-Null​
SELECT
COUNT(*) as null_count
FROM {{ table_name }}
WHERE {{ column_name }} IS NULL
Tests on Combined Data​
Canonical Method Parameters Values​
SELECT
canonical_rate_type,
ANY_VALUE(canonical_method_params) as canonical_method_params
FROM {{ table_name }}
WHERE canonical_rate_type IS NOT NULL
AND canonical_rate_type NOT LIKE 'raw:%'
AND (
canonical_rate_type NOT LIKE '%per_diem%'
OR
(
canonical_rate_type LIKE '%per_diem%'
AND
bill_type = 'Inpatient'
)
)
AND canonical_method_params IS NOT NULL
AND canonical_rate_score > 0
GROUP BY 1
Payer Coverage Validation​
SELECT
payer_id,
payer_name,
array_agg(DISTINCT state) as states,
count(distinct provider_id) n_providers,
count(*) as n_rates,
COUNT(
DISTINCT
CASE
WHEN canonical_rate_score > 1 THEN provider_id
ELSE NULL
END
) as n_non_outlier_providers,
SUM(
CASE
WHEN canonical_rate_score > 1 THEN 1
ELSE 0
END
) as n_non_outlier_rates,
CAST(COUNT(
DISTINCT
CASE
WHEN canonical_rate_score > 1 THEN provider_id
ELSE NULL
END
) AS DOUBLE) / CAST(count(distinct provider_id) AS DOUBLE) as pct_non_outlier_providers,
CAST(SUM(
CASE
WHEN canonical_rate_score > 1 THEN 1
ELSE 0
END
) AS DOUBLE) / CAST(count(*) AS DOUBLE) as pct_non_outlier_rates
FROM {{ table_name }}
WHERE canonical_rate_score > 0
AND payer_id IN ('643', '7', '76', '42', '522', '169', '403', '388')
GROUP BY 1, 2
ORDER BY payer_id
Required Columns Non-Null​
SELECT
COUNT(*) as null_count
FROM {{ table_name }}
WHERE canonical_rate_score > 0
AND {{ column_name }} IS NULL