Skip to main content
Version: 2.2

v2.1.1

# %%
from IPython import get_ipython
from jinja2 import Template

if get_ipython() is not None:
get_ipython().run_line_magic("load_ext", "autoreload")
get_ipython().run_line_magic("autoreload", "2")
from dotenv import load_dotenv
import pandas as pd
from tqds.sql import db, sql

import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid', rc={'figure.figsize': (14, 12)})

pd.options.display.max_columns = None
pd.options.display.max_rows = 100
pd.options.display.max_colwidth = None
pd.set_option('display.float_format', lambda x: '%.3f' % x)
trino_conn = db.TrinoClient().conn
redshift_conn = db.RedshiftClient().conn
query = sql.Query()

# %% [markdown]
"""
TOC
- [CLD-v2.1.1] reverts surgery service line exception
- [CLD-v2.1.1] reference data table updates
- [CLD-v2.1.1] add RC and CSTM-ALL to original billing code
- [CLD-v2.1.1] Physician Groups: Filter using NPI in addition to EIN (Anthem Fix)
- [CLD-v2.1.1] OPG fix - payer network level

- [CLD-v2.1.1] update provider name from provider short name
- [CLD-v2.1.1] regence hospital network names fix
"""


# %%
# [CLD-v2.1.1] reverts surgery service line exception
pd.read_sql(f"""
SELECT distinct billing_code, service_line, is_surg_code
FROM tq_dev.internal_dev_csong_cld_v2_1_1.prod_combined_abridged
WHERE is_surg_code = True
LIMIT 1000
""", con=trino_conn)

# %%
# [CLD-v2.1.1] regence hospital network names fix
pd.read_sql(f"""
SELECT provider_id, payer_id, count(*)
FROM tq_dev.internal_dev_csong_cld_v2_1_1.prod_combined_abridged
WHERE provider_id IN ('6018', '6000', '6013', '6015')
AND payer_id = '61'
AND canonical_rate_score > 1
AND canonical_rate_source = 'hospital'
GROUP BY 1,2
LIMIT 10
""", con=trino_conn)

# %%
# [CLD-v2.1.1] reference data table updates
df = pd.read_sql(f"""
WITH
ranked_samples AS (
SELECT
roid,
payer_id,
medicare_reference_source,
state_avg_medicare_rate,
ROW_NUMBER() OVER (PARTITION BY medicare_reference_source ORDER BY RANDOM()) as rn
FROM tq_dev.internal_dev_csong_cld_v2_1_1.tmp_int_benchmarks_2025_06
),
samples AS (
SELECT roid, payer_id, medicare_reference_source, state_avg_medicare_rate
FROM ranked_samples
WHERE rn <= 100
)
SELECT
COALESCE(s.roid, b.roid) AS roid,
COALESCE(s.payer_id, b.payer_id) AS payer_id,
COALESCE(s.medicare_reference_source, b.medicare_reference_source) AS medicare_reference_source,
COALESCE(s.state_avg_medicare_rate, b.state_avg_medicare_rate) AS state_avg_medicare_rate,
b.state_avg_medicare_rate AS old_state_avg_medicare_rate
FROM samples s
LEFT JOIN tq_dev.internal_dev_csong_cld_v2_1_0.tmp_int_benchmarks_2025_06 b
ON s.roid = b.roid
AND s.payer_id = b.payer_id
""", con=trino_conn)


# %%
sns.scatterplot(
data=df,
x='old_state_avg_medicare_rate',
y='state_avg_medicare_rate',
hue='medicare_reference_source',
alpha=0.7
)


# %%
# [CLD-v2.1.1] add RC and CSTM-ALL to original billing code
df = pd.read_sql(f"""
SELECT distinct canonical_rate_original_billing_codes, canonical_rate_original_billing_code_type
FROM tq_dev.internal_dev_csong_cld_v2_1_1.prod_combined_abridged
WHERE canonical_rate_gross_charge_original_billing_codes IS NOT NULL
AND canonical_rate_type LIKE '%cstm%'
LIMIT 100
""", con=trino_conn)
df

# %%
df = pd.read_sql(f"""
SELECT distinct canonical_rate_original_billing_codes, canonical_rate_original_billing_code_type
FROM tq_dev.internal_dev_csong_cld_v2_1_1.prod_combined_abridged
WHERE canonical_rate_gross_charge_original_billing_codes IS NOT NULL
AND canonical_rate_type LIKE '%rc_family%'
LIMIT 100
""", con=trino_conn)
df


# %%
df = pd.read_sql(f"""
SELECT distinct canonical_rate_original_billing_codes, canonical_rate_original_billing_code_type
FROM tq_dev.internal_dev_csong_cld_v2_1_1.prod_combined_abridged
WHERE canonical_rate_gross_charge_original_billing_codes IS NOT NULL
AND canonical_rate_type LIKE '%rc_per_diem%'
LIMIT 100
""", con=trino_conn)
df

# %%
# [CLD-v2.1.1] OPG fix - payer network level
pd.read_sql(f"""
SELECT
DISTINCT
network_name,
canonical_rate,
canonical_rate_type
FROM tq_dev.internal_dev_csong_cld_v2_1_1.prod_combined_abridged
WHERE
provider_id = '5396' and
payer_id = '7' and
canonical_rate_type LIKE '%opg%'
LIMIT 10
""", con=trino_conn)

# %%
# [CLD-v2.1.1] update provider name from provider short name
df = pd.read_sql(f"""
WITH
samples AS (
SELECT *
FROM (
SELECT
DISTINCT
provider_id,
provider_type,
provider_name
FROM tq_dev.internal_dev_csong_cld_v2_1_1.prod_combined_abridged
WHERE (
provider_type IN ('ASC', 'Imaging Center')
OR
(provider_type LIKE '%Hospital%')
)
AND canonical_rate_score > 0
)
ORDER BY RANDOM()
LIMIT 1000
)
SELECT
s.provider_id,
s.provider_type,
s.provider_name AS cld_provider_name,
p.provider_name
FROM samples s
LEFT JOIN tq_production.spines.spines_provider p
ON s.provider_id = p.provider_id
""", con=trino_conn)
df.loc[df['cld_provider_name'] != df['provider_name'], :]


# %%
# [CLD-v2.1.1] Physician Groups: Filter using NPI in addition to EIN (Anthem Fix)
pd.read_sql(f"""
SELECT
COUNT(DISTINCT provider_id) as num_providers
FROM tq_dev.internal_dev_csong_cld_v2_1_1.prod_combined_abridged
WHERE payer_id = '42'
AND provider_type = 'Physician Group'
AND canonical_rate_score > 1
""", con=trino_conn)