from IPython import get_ipython
from jinja2 import Template
if get_ipython() is not None:
get_ipython().run_line_magic("load_ext", "autoreload")
get_ipython().run_line_magic("autoreload", "2")
from dotenv import load_dotenv
import pandas as pd
from tqds.sql import db, sql
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid', rc={'figure.figsize': (14, 12)})
pd.options.display.max_columns = None
pd.options.display.max_rows = 100
pd.options.display.max_colwidth = None
pd.set_option('display.float_format', lambda x: '%.3f' % x)
trino_conn = db.TrinoClient().conn
redshift_conn = db.RedshiftClient().conn
query = sql.Query()
pd.read_sql(f"""
SELECT distinct service_line
FROM tq_dev.internal_dev_csong_cld_v2_1_0.prod_combined_abridged
WHERE is_surg_code = True
""", con=trino_conn)
pd.read_sql(f"""
SELECT service_line, count(distinct billing_code) as n_codes
FROM tq_dev.internal_dev_csong_cld_v2_1_0.prod_combined_abridged
WHERE billing_code IN (
SELECT billing_code
FROM tq_intermediate.cld_utils.ref_anesthesia_base_units
)
AND canonical_rate_score > 1
GROUP BY 1
LIMIT 1000
""", con=trino_conn)
pd.read_sql(f"""
SELECT canonical_method_formula, ANY_VALUE(canonical_method_params) as canonical_method_params
FROM tq_dev.internal_dev_csong_cld_v2_1_0.prod_combined_abridged
WHERE service_line = 'Anesthesia'
AND canonical_rate_score > 1
GROUP BY 1
LIMIT 20
""", con=trino_conn)
pd.read_sql(f"""
SELECT
CASE
WHEN
(
canonical_rate_type LIKE '%msdrg_mrf_base_rate_mult_cms_weight%'
OR canonical_rate_type LIKE '%msdrg_base_rate_mult_cms_weight%'
)
THEN 1 ELSE 0
END AS base_rate_imputation,
CASE
WHEN
billing_code IN (
'768', '796', '797', '798', '805', '806', '807', '783', '784', '785', '786', '787', '788',
'795', '789', '792', '794', '791', '793', '790')
THEN 1 ELSE 0
END AS obstetrics_maternity_exclusion,
count(distinct billing_code) as n_codes
FROM tq_dev.internal_dev_csong_cld_v2_1_0.prod_combined_abridged
WHERE canonical_rate_score > 1
AND billing_code_type = 'MS-DRG'
GROUP BY 1, 2
LIMIT 10
""", con=trino_conn)
df = pd.read_sql(f"""
SELECT
hospital_case_rate_dollar,
hospital_percent_of_total_billed_charges_dollar,
hospital_fee_schedule_dollar,
hospital_other_dollar,
hospital_null_methodology_dollar,
hospital_case_rate_untransformed_dollar,
hospital_percent_of_total_billed_charges_untransformed_dollar,
hospital_fee_schedule_untransformed_dollar,
hospital_other_untransformed_dollar,
hospital_null_methodology_untransformed_dollar,
hospital_case_rate_allowed_amount,
hospital_percent_of_total_billed_charges_allowed_amount,
hospital_fee_schedule_allowed_amount,
hospital_per_diem_allowed_amount,
hospital_other_allowed_amount,
hospital_null_methodology_allowed_amount,
hospital_case_rate_untransformed_allowed_amount,
hospital_percent_of_total_billed_charges_untransformed_allowed_amount,
hospital_fee_schedule_untransformed_allowed_amount,
hospital_per_diem_untransformed_allowed_amount,
hospital_other_untransformed_allowed_amount,
hospital_null_methodology_untransformed_allowed_amount,
hosp_per_diem_mult_glos,
hospital_case_rate_gc_hosp_perc_to_dol,
hospital_case_rate_gc_hosp_cbsa_perc_to_dol,
hospital_case_rate_gc_hosp_state_perc_to_dol,
hospital_case_rate_gc_komodo_perc_to_dol,
hospital_case_rate_gc_komodo_cbsa_perc_to_dol,
hospital_case_rate_gc_komodo_state_perc_to_dol,
hospital_perc_of_total_billed_charges_gc_hosp_perc_to_dol,
hospital_perc_of_total_billed_charges_gc_hosp_cbsa_perc_to_dol,
hospital_perc_of_total_billed_charges_gc_hosp_state_perc_to_dol,
hospital_perc_of_total_billed_charges_gc_komodo_perc_to_dol,
hospital_perc_of_total_billed_charges_gc_komodo_cbsa_perc_to_dol,
hospital_perc_of_total_billed_charges_gc_komodo_state_perc_to_dol,
hospital_fee_schedule_gc_hosp_perc_to_dol,
hospital_fee_schedule_gc_hosp_cbsa_perc_to_dol,
hospital_fee_schedule_gc_hosp_state_perc_to_dol,
hospital_fee_schedule_gc_komodo_perc_to_dol,
hospital_fee_schedule_gc_komodo_cbsa_perc_to_dol,
hospital_fee_schedule_gc_komodo_state_perc_to_dol,
hospital_other_gc_hosp_perc_to_dol,
hospital_other_gc_hosp_cbsa_perc_to_dol,
hospital_other_gc_hosp_state_perc_to_dol,
hospital_other_gc_komodo_perc_to_dol,
hospital_other_gc_komodo_cbsa_perc_to_dol,
hospital_other_gc_komodo_state_perc_to_dol,
hospital_null_methodology_gc_hosp_perc_to_dol,
hospital_null_methodology_gc_hosp_cbsa_perc_to_dol,
hospital_null_methodology_gc_hosp_state_perc_to_dol,
hospital_null_methodology_gc_komodo_perc_to_dol,
hospital_null_methodology_gc_komodo_cbsa_perc_to_dol,
hospital_null_methodology_gc_komodo_state_perc_to_dol,
hosp_gc_hosp_per_diem_perc_to_dol_mult_glos,
hosp_gc_hosp_cbsa_per_diem_perc_to_dol_mult_glos,
hosp_gc_hosp_state_per_diem_perc_to_dol_mult_glos,
hosp_gc_komodo_per_diem_perc_to_dol_mult_glos,
hosp_gc_komodo_cbsa_per_diem_perc_to_dol_mult_glos,
hosp_gc_komodo_state_per_diem_perc_to_dol_mult_glos,
drug_dosage_std_case_rate_dollar,
drug_dosage_std_percent_of_total_billed_charges_dollar,
drug_dosage_std_fee_schedule_dollar,
drug_dosage_std_other_dollar,
drug_dosage_std_null_methodology_dollar,
drug_dosage_std_case_rate_perc_to_dol,
drug_dosage_std_percent_of_total_billed_charges_perc_to_dol,
drug_dosage_std_fee_schedule_perc_to_dol,
drug_dosage_std_other_perc_to_dol,
drug_dosage_std_null_methodology_perc_to_dol,
payer_negotiated_rate,
payer_derived_rate,
payer_fee_schedule_rate,
payer_negotiated_untransformed_rate,
payer_derived_untransformed_rate,
payer_fee_schedule_untransformed_rate,
payer_gc_hosp_perc_to_dol,
payer_gc_hosp_cbsa_perc_to_dol,
payer_gc_hosp_state_perc_to_dol,
payer_gc_komodo_perc_to_dol,
payer_gc_komodo_cbsa_perc_to_dol,
payer_gc_komodo_state_perc_to_dol,
payer_negotiated_rate_anesthesia_cf,
payer_derived_rate_anesthesia_cf,
payer_fee_schedule_rate_anesthesia_cf,
payer_per_diem_mult_glos,
msdrg_mrf_base_rate_mult_cms_weight,
msdrg_base_rate_mult_cms_weight,
rc_family_gc_hosp_perc_to_dol,
rc_family_gc_komodo_perc_to_dol,
rc_family_gc_komodo_cbsa_perc_to_dol,
rc_family_gc_hosp_cbsa_perc_to_dol,
rc_family_gc_hosp_state_perc_to_dol,
rc_family_gc_komodo_state_perc_to_dol,
rc_family_per_diem_mult_glos,
msdrg_gc_hosp_base_perc_to_dol,
msdrg_gc_komodo_base_perc_to_dol,
msdrg_gc_hosp_cbsa_base_perc_to_dol,
msdrg_gc_komodo_cbsa_base_perc_to_dol,
msdrg_gc_hosp_state_base_perc_to_dol,
msdrg_gc_komodo_state_base_perc_to_dol,
rc_global_gc_hosp_base_perc_to_dol,
rc_global_gc_hosp_cbsa_base_perc_to_dol,
rc_global_gc_hosp_state_base_perc_to_dol,
rc_global_gc_komodo_base_perc_to_dol,
rc_global_gc_komodo_cbsa_base_perc_to_dol,
rc_global_gc_komodo_state_base_perc_to_dol,
rc_global_per_diem_mult_glos,
opg_base_rate,
op_gc_komodo_base_perc_to_dol,
op_gc_komodo_cbsa_base_perc_to_dol,
op_gc_komodo_state_base_perc_to_dol,
op_gc_hosp_base_perc_to_dol,
op_gc_hosp_cbsa_base_perc_to_dol,
op_gc_hosp_state_base_perc_to_dol,
cstm_negotiated_rate,
cstm_gc_hosp_perc_to_dol,
cstm_gc_hosp_cbsa_perc_to_dol,
cstm_gc_hosp_state_perc_to_dol,
cstm_gc_komodo_perc_to_dol,
cstm_gc_komodo_cbsa_perc_to_dol,
cstm_gc_komodo_state_perc_to_dol,
cstm_per_diem_mult_glos,
msdrg_mrf_base_rate_mult_cms_weight,
lower_bound,
upper_bound,
canonical_rate,
canonical_rate_type,
canonical_rate_score
FROM tq_dev.internal_dev_csong_cld_v2_1_0.prod_combined_all
WHERE is_drug_code = True
AND canonical_rate_score > 1
AND best_payer_rate IS NOT NULL
AND best_hospital_rate IS NOT NULL
AND canonical_rate_source = 'hospital'
AND canonical_rate_type LIKE '%_perc_to_dol%'
ORDER BY RANDOM()
LIMIT 50
""", con=trino_conn).T
for i in range(df.shape[1]):
print(f"--- Column {i+1} ---")
print(df.loc[df[i].notnull(), i].to_markdown())
print("\n\n")
df = pd.read_sql(f"""
SELECT canonical_rate, canonical_rate_percent_of_state_avg_medicare
FROM tq_dev.internal_dev_csong_cld_v2_1_0.prod_combined_abridged
WHERE canonical_rate_score = 5
AND billing_code_type = 'MS-DRG'
ORDER BY RANDOM()
LIMIT 50000
""", con=trino_conn)
df['canonical_rate_percent_of_state_avg_medicare'].describe()
check = pd.read_sql(f"""
WITH
samples AS (
SELECT
canonical_rate,
canonical_rate_id,
billing_code,
service_line,
is_drug_code,
payer_id,
provider_id,
billing_code
FROM tq_dev.internal_dev_csong_cld_v2_1_0.tmp_int_combined_no_whisp_2025_06
WHERE provider_name LIKE '%Stanford%'
AND payer_name LIKE '%California%'
AND canonical_rate_source = 'payer'
AND canonical_rate_score > 1
AND is_drug_code = True
LIMIT 10
)
SELECT *
FROM tq_production.public_2025_06.core_rates
WHERE id IN (SELECT canonical_rate_id FROM samples)
AND payer_id IN (SELECT payer_id FROM samples)
""", con=trino_conn)
print(check['billing_class'].value_counts())
pd.read_sql(f"""
SELECT provider_id, count(distinct billing_code) as n_codes
FROM tq_dev.internal_dev_csong_cld_v2_1_0.tmp_int_combined_no_whisp_2025_06
WHERE provider_name LIKE '%Stanford%'
AND payer_name LIKE '%California%'
AND canonical_rate_source = 'payer'
AND canonical_rate_score > 1
AND is_drug_code = True
GROUP BY 1
""", con=trino_conn)
res = pd.read_sql(f"""
SELECT
payer_id,
payer_name,
count(*) as n_possible,
SUM(
CASE
WHEN canonical_rate_score > 1 THEN 1
ELSE 0
END
) as n_non_outlier,
SUM(
CASE
WHEN canonical_rate_score > 0 THEN 1
ELSE 0
END
) as n_rates
FROM tq_dev.internal_dev_csong_cld_v2_1_0.tmp_int_combined_no_whisp_2025_06
WHERE payer_id IN (
'151',
'772',
'37',
'633',
'774',
'813',
'849',
'272',
'552',
'791',
'768',
'624',
'779',
'305',
'728',
'784',
'699',
'356'
-- '108', First Health
-- '638', SummaCare
-- '97', Alliant Health Plans
-- '971', AmeriHealth
-- '27', Alliant Health Plans
-- '567', Oscar
-- '277', Molina
-- '121', HealthPartners
-- '541', Medica
-- '504', Health Net
)
GROUP BY 1, 2
""", con=trino_conn)
res['coverage'] = res['n_rates'] / res['n_possible']
res
test = pd.read_sql(f"""
SELECT DISTINCT payer_id, provider_id, network_id, provider_network_msdrg_base_rate
FROM tq_dev.internal_dev_csong_cld_v2_1_0.prod_combined_abridged
WHERE provider_network_msdrg_base_rate IS NOT NULL
AND canonical_rate_score > 1
AND canonical_rate_score != 5
AND bill_type = 'Inpatient'
LIMIT 1000
""", con=trino_conn)
sns.histplot(test['provider_network_msdrg_base_rate'], kde=True, bins=100)
test['provider_network_msdrg_base_rate'].describe(percentiles=[0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99])
pd.read_sql(f"""
SELECT DISTINCT billing_code
FROM tq_dev.internal_dev_csong_cld_v2_1_0.tmp_int_combined_2024_11
LIMIT 1000
""", con=trino_conn)