Skip to main content
Version: 2.1

v2.1.4

# %%
from IPython import get_ipython
from jinja2 import Template

if get_ipython() is not None:
get_ipython().run_line_magic("load_ext", "autoreload")
get_ipython().run_line_magic("autoreload", "2")
from dotenv import load_dotenv
import pandas as pd
from tqds.sql import db, sql

import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid', rc={'figure.figsize': (14, 12)})

pd.options.display.max_columns = None
pd.options.display.max_rows = 100
pd.options.display.max_colwidth = None
pd.set_option('display.float_format', lambda x: '%.3f' % x)
trino_conn = db.TrinoClient().conn
redshift_conn = db.RedshiftClient().conn
query = sql.Query()

# %% [markdown]
"""
Table of Contents:

- [CLD-v2.1.4] payer names from payer spines
- [CLD-v2.1.4] filter providers to US states + DC
- [CLD-v2.1.4] NYC-DOH
- [CLD-v2.1.4] canonical_gross_charge geo-average CCR adjustment
- [CLD-v2.1.4] add lab codes for hospitals
- [CLD-v2.1.4] add lab codes to PGs
- [CLD-v2.1.4] ASC codeset
- [CLD-v2.1.4] nycdoh essential plans
- [CLD-v2.1.4] add BCBS wyoming
- [CLD-v2.1.4] prevent accuracy scores bumping up integer
"""


# %% [markdown]
"""
- [CLD-v2.1.4] payer names from payer spines
"""

# %%
pd.read_sql(f"""
SELECT payer_id, payer_name
FROM tq_dev.internal_dev_csong_cld_v2_1_4.tmp_ref_payer_spine_2025_07
WHERE payer_name NOT IN (
SELECT payer_name
FROM tq_dev.internal_dev_csong_cld_v2_1_3.tmp_ref_payer_spine_2025_07
)
""", con=trino_conn)

# %%
pd.read_sql(f"""
SELECT DISTINCT payer_id, payer_name
FROM tq_dev.internal_dev_csong_cld_v2_1_4.prod_combined_abridged
WHERE payer_name NOT IN (
SELECT payer_name
FROM tq_dev.internal_dev_csong_cld_v2_1_3.prod_combined_abridged
)
""", con=trino_conn)


# %%
pd.read_sql(f"""
SELECT distinct payer_id, payer_name
FROM tq_dev.internal_dev_csong_cld_v2_1_4.prod_combined_abridged
WHERE payer_id = '720'
LIMIT 5
""", con=trino_conn)

# %% [markdown]
"""
- [CLD-v2.1.4] filter providers to US states + DC
"""

# %%
df = pd.read_sql(f"""
SELECT distinct state
FROM tq_dev.internal_dev_csong_cld_v2_1_4.prod_combined_abridged
ORDER BY 1
""", con=trino_conn)
len(df)


# %% [markdown]
"""
- [CLD-v2.1.4] NYC-DOH

QA'd in separate script in this folder
"""


# %% [markdown]
"""
- [CLD-v2.1.4] canonical_gross_charge geo-average CCR adjustment
"""

# %%
df = pd.read_sql(f"""
WITH
samples AS (
SELECT
canonical_gross_charge_type,
ANY_VALUE(roid) as roid
FROM tq_dev.internal_dev_csong_cld_v2_1_4.prod_combined_all
JOIN (
SELECT payer_id, roid, canonical_transformation_method
FROM tq_dev.internal_dev_csong_cld_v2_1_4.prod_combined_abridged
) t USING (payer_id, roid)
WHERE canonical_gross_charge IS NOT NULL
AND canonical_transformation_method = 'Percentage to Dollar'
GROUP BY 1
)
SELECT
canonical_gross_charge,
canonical_gross_charge_type,
canonical_gross_charge_source,
gross_charge_cbsa_provider_adjustment,
gross_charge_state_provider_adjustment,
canonical_rate,
canonical_transformation_method,
canonical_method_params
FROM tq_dev.internal_dev_csong_cld_v2_1_4.prod_combined_all
JOIN (
SELECT payer_id, roid, canonical_transformation_method
FROM tq_dev.internal_dev_csong_cld_v2_1_4.prod_combined_abridged
) t USING (payer_id, roid)
WHERE canonical_gross_charge IS NOT NULL
AND canonical_transformation_method = 'Percentage to Dollar'
AND roid IN (SELECT roid FROM samples)
LIMIT 100
""", con=trino_conn)
df
print(df.to_markdown(index=False))

# %% [markdown]
"""
- [CLD-v2.1.4] add lab codes for hospitals
- [CLD-v2.1.4] add lab codes to PGs
"""

# %%
df = pd.read_sql(f"""
WITH
labs AS (
SELECT DISTINCT billing_code
FROM tq_intermediate.cld_utils.lab_template_codes_2025_10_02
)
SELECT
provider_type,
count(distinct billing_code) AS n_codes
FROM tq_dev.internal_dev_csong_cld_v2_1_4.prod_combined_abridged
WHERE billing_code IN (
SELECT billing_code FROM labs
)
GROUP BY 1
""", con=trino_conn)
df


# %% [markdown]
"""
- [CLD-v2.1.4] ASC codeset
"""

# %%
df = pd.read_sql(f"""
WITH
asc_ref AS (
SELECT DISTINCT hcpcs
FROM tq_production.reference_external.asc_reference_pricing
WHERE is_latest_start_effective_date = True
),
labs AS (
SELECT DISTINCT billing_code
FROM tq_intermediate.cld_utils.lab_template_codes_2025_10_02
)
SELECT
DISTINCT
billing_code,
billing_code_type,
CASE
WHEN billing_code IN (SELECT hcpcs FROM asc_ref) THEN 'ASC'
ELSE 'Non-ASC'
END AS asc,
CASE
WHEN billing_code IN (SELECT billing_code FROM labs) THEN 'Lab'
ELSE 'Non-Lab'
END AS lab
FROM tq_dev.internal_dev_csong_cld_v2_1_4.prod_combined_abridged
WHERE provider_type = 'ASC'
""", con=trino_conn)
df

# %%
df['asc'].value_counts()

# %%
df['lab'].value_counts()

# %% [markdown]
"""
- [CLD-v2.1.4] nycdoh essential plans
"""

# %%
df = pd.read_sql(f"""
SELECT
payer_name,
canonical_rate_source,
canonical_rate_score,
count(*) as n,
count(distinct provider_id || ssp_id) as n_providers
FROM tq_dev.internal_dev_csong_cld_v2_1_4.prod_combined_abridged
JOIN (
SELECT DISTINCT ssp_id, billing_code
FROM tq_dev.internal_dev_csong_consumer_pricing_service.pricing_service_rates_2025_07_v3
)
USING (billing_code)
WHERE network_id IN (
5732238209160982574, -- Fidelis
5697578418236370479, -- Healthfirst NY
4485705121095584470 -- MetroPlus Health
)
AND payer_id IN (
'723', -- Fidelis
'796', -- Healthfirst NY
'958' -- MetroPlus Health
)
AND cbsa = '35620'
GROUP BY 1,2,3
""", con=trino_conn)


# %%
(
df
.pivot(
index=['canonical_rate_source', 'canonical_rate_score'],
columns='payer_name',
values='n'
)
.fillna(0)
.astype(int)
)



# %% [markdown]
"""
BCBS Wyoming
"""
# %%
pd.read_sql(f"""
SELECT canonical_rate_source, canonical_rate_score, count(*)
FROM tq_dev.internal_dev_csong_cld_v2_1_4.tmp_int_combined_2025_07
WHERE payer_id = '464'
GROUP BY 1, 2
LIMIT 10
""", con=trino_conn)


# %% [markdown]
"""
Prevent accuracy scores bumping up integer
"""

# %%
df = pd.read_sql(f"""
SELECT canonical_rate_source, count(*) as n
FROM tq_dev.internal_dev_csong_cld_v2_1_4.prod_combined_abridged
WHERE canonical_rate_score = 5
AND (best_payer_rate IS NULL or best_hospital_rate IS NULL)
GROUP BY 1
ORDER BY 2 DESC
""", con=trino_conn)

# %%
df
# %%