from IPython import get_ipython
from jinja2 import Template
if get_ipython() is not None:
get_ipython().run_line_magic("load_ext", "autoreload")
get_ipython().run_line_magic("autoreload", "2")
from dotenv import load_dotenv
import pandas as pd
from tqds.sql import db, sql
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid', rc={'figure.figsize': (14, 12)})
pd.options.display.max_columns = None
pd.options.display.max_rows = 100
pd.options.display.max_colwidth = None
pd.set_option('display.float_format', lambda x: '%.3f' % x)
trino_conn = db.TrinoClient().conn
redshift_conn = db.RedshiftClient().conn
query = sql.Query()
"""
Table of Contents:
- [CLD-v2.1.3] provider npi array - distinct and sorted #4010
- [CLD-v2.1.3] plan bridge updates to accommodate narrow networks + multiple networks within payer-network_type
- [CLD-v2.1.3] Medicare OP threshold set to 30x; Hospital Gross Charge Transformations threshold set to 30x
- [CLD-v2.1.3] validation: tiered approach
- [CLD-v2.1.3] static gross charge tables
- [CLD-v2.1.3] gross charges: service year filter
"""
"""
- [CLD-v2.1.3] provider npi array - distinct and sorted #4010
"""
df = pd.read_sql(f"""
WITH
new AS (
SELECT provider_id, npi, CARDINALITY(npi) AS npi_count, 'new' AS source
FROM tq_dev.internal_dev_csong_cld_v2_1_3.prod_combined_abridged
WHERE provider_id = '6612'
AND npi IS NOT NULL
LIMIT 1
),
old AS (
SELECT provider_id, npi, CARDINALITY(npi) AS npi_count, 'old' AS source
FROM tq_dev.internal_dev_csong_cld_v2_1_2.prod_combined_abridged
WHERE provider_id = '6612'
AND npi IS NOT NULL
LIMIT 1
)
SELECT *
FROM new
UNION ALL
SELECT *
FROM old
""", con=trino_conn)
df
"""
- [CLD-v2.1.3] plan bridge updates to accommodate narrow networks + multiple networks within payer-network_type
"""
pd.read_sql(f"""
WITH
old AS (
SELECT COUNT(*) as n, 'old' AS source
FROM tq_dev.internal_dev_csong_cld_v2_1_2.tmp_ref_plan_bridge_analysis_2025_07
),
new AS (
SELECT COUNT(*) as n, 'new' AS source
FROM tq_dev.internal_dev_csong_cld_v2_1_3.tmp_ref_plan_bridge_analysis_2025_07
)
SELECT *
FROM old
UNION ALL
SELECT *
FROM new
""", con=trino_conn)
df = pd.read_sql(f"""
SELECT
count(*) as n,
1.0000 * SUM(
CASE
WHEN canonical_rate_score > 0 THEN 1
ELSE 0
END
) / count(*) AS perc_with_canonical_rate,
1.0000 * SUM(
CASE
WHEN canonical_rate_score > 1 THEN 1
ELSE 0
END
) / count(*) AS perc_with_canonical_rate_above_1
FROM tq_dev.internal_dev_csong_cld_v2_1_3.prod_combined_abridged
LEFT JOIN tq_dev.internal_dev_csong_cld_v2_1_3.tmp_ref_network_spine_2025_07 ns USING (payer_id, network_id)
WHERE ns.network_type = 'NARROW'
""", con=trino_conn)
df
"""
- [CLD-v2.1.3] Medicare OP threshold set to 30x; Hospital Gross Charge Transformations threshold set to 30x
"""
df = pd.read_sql(f"""
WITH old AS (
SELECT canonical_rate_score, COUNT(*) AS old
FROM tq_dev.internal_dev_csong_cld_v2_1_2.prod_combined_abridged
LEFT JOIN tq_dev.internal_dev_csong_cld_v2_1_3.tmp_ref_network_spine_2025_07 ns USING (payer_id, network_id)
WHERE ns.network_type != 'NARROW'
GROUP BY 1
), new AS (
SELECT canonical_rate_score, COUNT(*) AS new
FROM tq_dev.internal_dev_csong_cld_v2_1_3.prod_combined_abridged
LEFT JOIN tq_dev.internal_dev_csong_cld_v2_1_3.tmp_ref_network_spine_2025_07 ns USING (payer_id, network_id)
WHERE ns.network_type != 'NARROW'
GROUP BY 1
)
SELECT *
FROM old
FULL OUTER JOIN new USING (canonical_rate_score)
ORDER BY canonical_rate_score
""", con=trino_conn)
df['old_percent'] = df['old'] / df['old'].sum()
df['new_percent'] = df['new'] / df['new'].sum()
df
df = pd.read_sql(f"""
SELECT
provider_type,
min(canonical_rate_percent_of_cbsa_avg_medicare) AS cbsa_min,
APPROX_PERCENTILE(canonical_rate_percent_of_cbsa_avg_medicare, 0.01) AS cbsa_p1,
APPROX_PERCENTILE(canonical_rate_percent_of_cbsa_avg_medicare, 0.99) AS cbsa_p99,
max(canonical_rate_percent_of_cbsa_avg_medicare) AS cbsa_max,
min(canonical_rate_percent_of_state_avg_medicare) AS state_min,
APPROX_PERCENTILE(canonical_rate_percent_of_state_avg_medicare, 0.01) AS state_p1,
APPROX_PERCENTILE(canonical_rate_percent_of_state_avg_medicare, 0.99) AS state_p99,
max(canonical_rate_percent_of_state_avg_medicare) AS state_max,
min(canonical_rate_percent_of_medicare) AS facility_min,
APPROX_PERCENTILE(canonical_rate_percent_of_medicare, 0.01) AS facility_p1,
APPROX_PERCENTILE(canonical_rate_percent_of_medicare, 0.99) AS facility_p99,
max(canonical_rate_percent_of_medicare) AS facility_max
FROM tq_dev.internal_dev_csong_cld_v2_1_3.prod_combined_abridged
WHERE canonical_rate_score > 1
GROUP BY 1
""", con=trino_conn)
df
df = pd.read_sql(f"""
SELECT
provider_type,
min(canonical_rate_percent_of_cbsa_avg_medicare) AS cbsa_min,
APPROX_PERCENTILE(canonical_rate_percent_of_cbsa_avg_medicare, 0.01) AS cbsa_p1,
APPROX_PERCENTILE(canonical_rate_percent_of_cbsa_avg_medicare, 0.99) AS cbsa_p99,
max(canonical_rate_percent_of_cbsa_avg_medicare) AS cbsa_max,
min(canonical_rate_percent_of_state_avg_medicare) AS state_min,
APPROX_PERCENTILE(canonical_rate_percent_of_state_avg_medicare, 0.01) AS state_p1,
APPROX_PERCENTILE(canonical_rate_percent_of_state_avg_medicare, 0.99) AS state_p99,
max(canonical_rate_percent_of_state_avg_medicare) AS state_max,
min(canonical_rate_percent_of_medicare) AS facility_min,
APPROX_PERCENTILE(canonical_rate_percent_of_medicare, 0.01) AS facility_p1,
APPROX_PERCENTILE(canonical_rate_percent_of_medicare, 0.99) AS facility_p99,
max(canonical_rate_percent_of_medicare) AS facility_max
FROM tq_dev.internal_dev_csong_cld_dev.tmp_int_combined_2025_07
WHERE canonical_rate_score > 1
AND canonical_rate_score < 5
AND is_drug_code = False
AND canonical_rate_type LIKE '%gc_hosp_perc_to_dol%'
GROUP BY 1
""", con=trino_conn)
df
df = pd.read_sql(f"""
SELECT
provider_type,
min(canonical_rate_percent_of_cbsa_avg_medicare) AS cbsa_min,
APPROX_PERCENTILE(canonical_rate_percent_of_cbsa_avg_medicare, 0.01) AS cbsa_p1,
APPROX_PERCENTILE(canonical_rate_percent_of_cbsa_avg_medicare, 0.99) AS cbsa_p99,
max(canonical_rate_percent_of_cbsa_avg_medicare) AS cbsa_max,
min(canonical_rate_percent_of_state_avg_medicare) AS state_min,
APPROX_PERCENTILE(canonical_rate_percent_of_state_avg_medicare, 0.01) AS state_p1,
APPROX_PERCENTILE(canonical_rate_percent_of_state_avg_medicare, 0.99) AS state_p99,
max(canonical_rate_percent_of_state_avg_medicare) AS state_max,
min(canonical_rate_percent_of_medicare) AS facility_min,
APPROX_PERCENTILE(canonical_rate_percent_of_medicare, 0.01) AS facility_p1,
APPROX_PERCENTILE(canonical_rate_percent_of_medicare, 0.99) AS facility_p99,
max(canonical_rate_percent_of_medicare) AS facility_max
FROM tq_dev.internal_dev_csong_cld_dev.tmp_int_combined_2025_07
WHERE canonical_rate_score > 1
AND canonical_rate_score < 5
AND is_drug_code = True
GROUP BY 1
""", con=trino_conn)
df
df = pd.read_sql(f"""
SELECT canonical_rate, canonical_rate_score, asp_payment_limit, medicare_rate, cbsa_avg_medicare_rate
FROM tq_dev.internal_dev_csong_cld_dev.tmp_int_combined_2025_07
WHERE is_drug_code = True
AND canonical_rate_score = 5
AND canonical_rate_percent_of_medicare > 10
AND provider_type LIKE '%Hospital%'
LIMIT 10
""", con=trino_conn)
df
"""
- [CLD-v2.1.3] validation: tiered approach
"""
df = pd.read_sql(f"""
SELECT
CASE
WHEN canonical_rate < 15000 THEN '<15k'
ELSE '15k+'
END AS rate_bucket,
APPROX_PERCENTILE((best_payer_rate - best_hospital_rate) / best_hospital_rate, 0.01) as diff1_p1,
APPROX_PERCENTILE((best_payer_rate - best_hospital_rate) / best_hospital_rate, 0.5) as diff1_median,
APPROX_PERCENTILE((best_payer_rate - best_hospital_rate) / best_hospital_rate, 0.99) as diff1_p99,
APPROX_PERCENTILE((best_hospital_rate - best_payer_rate) / best_payer_rate, 0.01) as diff2_p1,
APPROX_PERCENTILE((best_hospital_rate - best_payer_rate) / best_payer_rate, 0.5) as diff2_median,
APPROX_PERCENTILE((best_hospital_rate - best_payer_rate) / best_payer_rate, 0.99) as diff2_p99
FROM tq_dev.internal_dev_csong_cld_dev.tmp_int_combined_2025_07
WHERE canonical_rate_score = 5
AND is_drug_code = False
GROUP BY 1
""", con=trino_conn)
df
"""
Rate Score Distribution
"""
df = pd.read_sql(f"""
WITH
old AS (
SELECT canonical_rate_score, COUNT(*) AS n_old
FROM tq_dev.internal_dev_csong_cld_v2_1_2.tmp_int_combined_2025_07
GROUP BY canonical_rate_score
),
new AS (
SELECT canonical_rate_score, COUNT(*) AS n_new
FROM tq_dev.internal_dev_csong_cld_v2_1_3.tmp_int_combined_2025_07
GROUP BY canonical_rate_score
)
SELECT
*,
n_old * 1.000 / SUM(n_old) OVER () AS perc_old,
n_new * 1.000 / SUM(n_new) OVER () AS perc_new
FROM old
FULL OUTER JOIN new USING (canonical_rate_score)
ORDER BY canonical_rate_score
""", con=trino_conn)
df
df = pd.read_sql(f"""
SELECT
bill_type,
min(canonical_rate_percent_of_cbsa_avg_medicare) AS cbsa_min,
approx_percentile(canonical_rate_percent_of_cbsa_avg_medicare, 0.01) AS cbsa_p1,
approx_percentile(canonical_rate_percent_of_cbsa_avg_medicare, 0.05) AS cbsa_p05,
approx_percentile(canonical_rate_percent_of_cbsa_avg_medicare, 0.25) AS cbsa_p25,
approx_percentile(canonical_rate_percent_of_cbsa_avg_medicare, 0.5) AS cbsa_p50,
approx_percentile(canonical_rate_percent_of_cbsa_avg_medicare, 0.75) AS cbsa_p75,
approx_percentile(canonical_rate_percent_of_cbsa_avg_medicare, 0.95) AS cbsa_p95,
approx_percentile(canonical_rate_percent_of_cbsa_avg_medicare, 0.99) AS cbsa_p99,
max(canonical_rate_percent_of_cbsa_avg_medicare) AS cbsa_max
FROM tq_dev.internal_dev_csong_cld_v2_1_3.tmp_int_combined_2025_07
WHERE canonical_rate_score > 1
AND is_drug_code = False
GROUP BY 1
""", con=trino_conn)
df
df = pd.read_sql(f"""
SELECT *
FROM tq_dev.internal_dev_csong_cld_v2_1_3.tmp_int_combined_2025_07
WHERE canonical_rate_score > 1
AND canonical_rate_percent_of_cbsa_avg_medicare > 30
LIMIT 5
""", con=trino_conn)
df