from IPython import get_ipython
from jinja2 import Template
if get_ipython() is not None:
get_ipython().run_line_magic("load_ext", "autoreload")
get_ipython().run_line_magic("autoreload", "2")
from dotenv import load_dotenv
import pandas as pd
from tqds.sql import db, sql
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid', rc={'figure.figsize': (14, 12)})
pd.options.display.max_columns = None
pd.options.display.max_rows = 100
pd.options.display.max_colwidth = None
pd.set_option('display.float_format', lambda x: '%.3f' % x)
trino_conn = db.TrinoClient().conn
redshift_conn = db.RedshiftClient().conn
query = sql.Query()
"""
TOC
- [CLD-v2.1.2] tmp medicare ref - fixed tables
- [CLD-v2.1.2] provider_adjusted only for medicare benchmark geo CTEs
- [CLD-v2.1.2] ASC/Imaging Use provider_healthsystem_ id instead of parent
- [CLD-v2.1.2] drug codes medicare hierarchy cleanup
- [CLD-v2.1.2] Professional Fees from Hospital MRF Data mapped to Singly Affiliated Physician Groups
- [CLD-v2.1.2] fix anthem network names ppo
- [CLD-v2.1.2] Gross charges: enforce >=5 min distinct values within CBSA/State to use median
- [CLD-v2.1.2] Drugs Prof vs Inst hierarchy
- [CLD-v2.1.2] prod_combined_abridged: add EIN
- [CLD-v2.1.2] Historic Medicare Rates for Longitudinal Runs
"""
"""
# [CLD-v2.1.2] tmp medicare ref - fixed tables
# [CLD-v2.1.2] provider_adjusted only for medicare benchmark geo CTEs
- QA IPPS
- QA OPPS
- QA ASC
"""
df = pd.read_sql(f"""
WITH
samples AS (
SELECT
provider_id,
billing_code,
s.state,
b.cbsa,
cbsa_avg_medicare_rate,
state_avg_medicare_rate,
national_avg_medicare_rate
FROM tq_dev.internal_dev_csong_cld_dev.tmp_int_benchmarks_2025_07 b
JOIN tq_dev.internal_dev_csong_cld_dev.tmp_ref_provider_spine_2025_07 s
USING (provider_id)
WHERE b.provider_type LIKE '%Hospital%'
AND bill_type = 'Inpatient'
AND billing_code_type = 'MS-DRG'
ORDER BY RANDOM()
LIMIT 1000
),
checks AS (
SELECT DISTINCT
p.msdrg,
s.provider_state,
s.provider_cbsa_code,
AVG(p.total_payment) OVER (
PARTITION BY p.msdrg, s.provider_cbsa_code
) AS cbsa_avg_medicare_rate,
AVG(p.total_payment) OVER (
PARTITION BY p.msdrg, s.provider_state
) AS state_avg_medicare_rate,
AVG(p.total_payment) OVER (
PARTITION BY p.msdrg
) AS national_avg_medicare_rate
FROM tq_dev.internal_dev_csong_sandbox.ipps_reference_pricing p
JOIN tq_production.spines.spines_provider s
ON p.provider_id = s.provider_id
WHERE p.pricing_type = 'provider_adjusted'
AND p.is_latest_start_effective_date = true
AND p.pricing_priority = 1
AND p.provider_id IN (
SELECT provider_id
FROM tq_dev.internal_dev_csong_cld_dev.tmp_ref_provider_spine_2025_07
)
)
SELECT
samples.*,
checks.cbsa_avg_medicare_rate,
checks.state_avg_medicare_rate,
checks.national_avg_medicare_rate
FROM samples
JOIN checks
ON samples.billing_code = checks.msdrg
AND samples.state = checks.provider_state
AND samples.cbsa = checks.provider_cbsa_code
""", con=trino_conn)
df
df = pd.read_sql(f"""
WITH
samples AS (
SELECT
provider_id,
billing_code,
s.state,
b.cbsa,
cbsa_avg_medicare_rate,
state_avg_medicare_rate,
national_avg_medicare_rate
FROM tq_dev.internal_dev_csong_cld_dev.tmp_int_benchmarks_2025_07 b
JOIN tq_dev.internal_dev_csong_cld_dev.tmp_ref_provider_spine_2025_07 s
USING (provider_id)
WHERE b.provider_type LIKE '%Hospital%'
AND bill_type = 'Outpatient'
AND billing_code_type = 'HCPCS'
ORDER BY RANDOM()
LIMIT 1000
),
checks AS (
SELECT DISTINCT
p.hcpcs,
s.provider_state,
s.provider_cbsa_code,
AVG(p.rate) OVER (
PARTITION BY p.hcpcs, s.provider_cbsa_code
) AS cbsa_avg_medicare_rate,
AVG(p.rate) OVER (
PARTITION BY p.hcpcs, s.provider_state
) AS state_avg_medicare_rate,
AVG(p.rate) OVER (
PARTITION BY p.hcpcs
) AS national_avg_medicare_rate
FROM tq_dev.internal_dev_csong_sandbox.opps_reference_pricing p
JOIN tq_production.spines.spines_provider s
ON p.provider_id = s.provider_id
WHERE p.pricing_type = 'provider_adjusted'
AND p.is_latest_start_effective_date = true
AND p.pricing_priority = 1
AND p.provider_id IN (
SELECT provider_id
FROM tq_dev.internal_dev_csong_cld_dev.tmp_ref_provider_spine_2025_07
)
)
SELECT
samples.*,
checks.cbsa_avg_medicare_rate,
checks.state_avg_medicare_rate,
checks.national_avg_medicare_rate
FROM samples
JOIN checks
ON samples.billing_code = checks.hcpcs
AND samples.state = checks.provider_state
AND samples.cbsa = checks.provider_cbsa_code
""", con=trino_conn)
df
df = pd.read_sql(f"""
WITH
samples AS (
SELECT
provider_id,
billing_code,
s.state,
b.cbsa,
cbsa_avg_medicare_rate,
state_avg_medicare_rate,
national_avg_medicare_rate
FROM tq_dev.internal_dev_csong_cld_dev.tmp_int_benchmarks_2025_07 b
JOIN tq_dev.internal_dev_csong_cld_dev.tmp_ref_provider_spine_2025_07 s
USING (provider_id)
WHERE b.provider_type LIKE '%Hospital%'
AND bill_type = 'Outpatient'
AND billing_code_type = 'HCPCS'
ORDER BY RANDOM()
LIMIT 1000
),
checks AS (
SELECT DISTINCT
p.hcpcs,
s.provider_state,
s.provider_cbsa_code,
AVG(p.rate) OVER (
PARTITION BY p.hcpcs, s.provider_cbsa_code
) AS cbsa_avg_medicare_rate,
AVG(p.rate) OVER (
PARTITION BY p.hcpcs, s.provider_state
) AS state_avg_medicare_rate,
AVG(p.rate) OVER (
PARTITION BY p.hcpcs
) AS national_avg_medicare_rate
FROM tq_dev.internal_dev_csong_sandbox.opps_reference_pricing p
JOIN tq_production.spines.spines_provider s
ON p.provider_id = s.provider_id
WHERE p.pricing_type = 'provider_adjusted'
AND p.is_latest_start_effective_date = true
AND p.pricing_priority = 1
AND p.provider_id IN (
SELECT provider_id
FROM tq_dev.internal_dev_csong_cld_dev.tmp_ref_provider_spine_2025_07
)
)
SELECT
samples.*,
checks.cbsa_avg_medicare_rate,
checks.state_avg_medicare_rate,
checks.national_avg_medicare_rate
FROM samples
JOIN checks
ON samples.billing_code = checks.hcpcs
AND samples.state = checks.provider_state
AND samples.cbsa = checks.provider_cbsa_code
""", con=trino_conn)
df
pd.read_sql(f"""
SELECT health_system_name
FROM tq_dev.internal_dev_csong_cld_v2_1_2.tmp_ref_provider_spine_2025_07
WHERE provider_id = '27830'
LIMIT 100
""", con=trino_conn)
df_old = pd.read_sql(f"""
select distinct provider_type,asp_payment_limit,medicare_rate,medicare_reference_source,count(*)
from tq_dev.internal_dev_csong_cld_v2_1_0.prod_combined_abridged
where is_drug_code = true
and bill_type = 'Outpatient'
and billing_code = 'J1569'
group by 1,2,3,4
order by 1
""", con=trino_conn)
df_new = pd.read_sql(f"""
SELECT provider_type,asp_payment_limit,medicare_rate,medicare_reference_source,count(*)
FROM tq_dev.internal_dev_csong_cld_v2_1_2.tmp_int_benchmarks_2025_06
WHERE billing_code = 'J1569'
AND bill_type = 'Outpatient'
GROUP BY 1,2,3,4
""", con=trino_conn)
"""
# [CLD-v2.1.2] Professional Fees from Hospital MRF Data mapped to Singly Affiliated Physician Groups
- We have PG <> HCO mappings for Sanford, but not for the HCOs that have Professional rates
posted in Hospital Rates data
"""
pd.read_sql(f"""
SELECT health_system_name, count(*) as n
FROM tq_dev.internal_dev_csong_cld_v2_1_2.tmp_int_combined_2025_07
WHERE provider_type = 'Physician Group'
AND canonical_rate_score > 1
AND canonical_rate_source = 'hospital'
GROUP BY 1
LIMIT 10
""", con=trino_conn)
pd.read_sql(f"""
SELECT COUNT(*)
FROM tq_dev.internal_dev_csong_cld_dev.tmp_raw_mrf_hospital_rates_2025_07
WHERE payer_id = '42'
AND network_id = -6293294949651930973
LIMIT 10
""", con=trino_conn)
pd.read_sql(f"""
SELECT ein
FROM tq_dev.internal_dev_csong_cld_v2_1_2.prod_combined_abridged
LIMIT 100
""", con=trino_conn)
pd.read_sql(f"""
SELECT *
FROM tq_dev.internal_dev_csong_cld_v2_1_2.prod_combined_abridged
LIMIT 100
""", con=trino_conn)