Skip to main content
Version: Canary - 2.3 🚧

v2.3.0

Prod Features​

# %%
from IPython import get_ipython
from jinja2 import Template

if get_ipython() is not None:
get_ipython().run_line_magic("load_ext", "autoreload")
get_ipython().run_line_magic("autoreload", "2")
from dotenv import load_dotenv
import pandas as pd
from tqds.sql import db, sql

import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid', rc={'figure.figsize': (14, 12)})

pd.options.display.max_columns = None
pd.options.display.max_rows = 100
pd.options.display.max_colwidth = None
pd.set_option('display.float_format', lambda x: '%.3f' % x)
trino_conn = db.TrinoClient().conn
redshift_conn = db.RedshiftClient().conn
query = sql.Query()



# %%
# NEW FEATURES:
# [CLD-v2.3.0] anesthesia accuracy bounds
# [CLD-v2.3.0] drug accuracy scores
# [CLD-v2.3.0] exchange networks
# [CLD-v2.3.0] OPG base rate update params
# [CLD-v2.3.0] Update to Labs Spines
# [CLD-v2.3.0] add 21 place of service code
# [CLD-v2.3.0] more bh plan bridge exceptions
# [CLD-v2.3.0] CAR-T Plausibility
# [CLD-v2.3.0] OP outlier lower bound
# [CLD-v2.3.0] PG accuracy logic
# [CLD-v2.3.0] update analyze simulation configurations
# [CLD-v2.3.0] external prod clear rates
# [CLD-v2.3.0] add infusion centers

# other misc:
# plan bridge was using network_name Preferred, but network spine using network_name PPO


# %%
# [CLD-v2.3.0] anesthesia accuracy bounds

# %%
df = pd.read_sql(f"""
SELECT
provider_type,
canonical_rate_score,
COUNT(*) as n_rates,
MIN(canonical_rate_percent_of_medicare) as min_pct_medicare,
MAX(canonical_rate_percent_of_medicare) as max_pct_medicare,
MAX(canonical_rate) as max_canonical_rate
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE billing_code IN (
SELECT hcpcs
FROM tq_production.reference_internal.anesthesia_reference_pricing
)
GROUP BY 1, 2
ORDER BY 1, 2
""", con=trino_conn)
print(df.to_markdown(index=False))

"""
| provider_type | canonical_rate_score | n_rates | min_pct_medicare | max_pct_medicare | max_canonical_rate |
|:----------------|-----------------------:|----------:|-------------------:|-------------------:|---------------------:|
| Imaging Center | 0 | 235337 | nan | nan | nan |
| Imaging Center | 1 | 120 | 0.5 | 90.09 | 1878.33 |
| Imaging Center | 4 | 4201 | 0.83 | 29.71 | 619.4 |
| Physician Group | 0 | 123219406 | nan | nan | nan |
| Physician Group | 1 | 1132766 | 0 | 746405 | 1.55402e+07 |
| Physician Group | 2 | 6623106 | 3.5 | 10.46 | 200 |
| Physician Group | 3 | 9644892 | 1 | 3.5 | 96.75 |
| Physician Group | 4 | 110304 | 3.51 | 5.37 | 111.85 |
| Physician Group | 5 | 15174 | 1 | 3.48 | 97 |
"""

# %%
# [CLD-v2.3.0] drug accuracy scores

# %%
df = pd.read_sql(f"""
SELECT
canonical_rate_source,
canonical_rate_score,
COUNT(*) as n_rates,
MIN(canonical_rate_percent_of_medicare) as min_canonical_rate,
MAX(canonical_rate_percent_of_medicare) as max_canonical_rate
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE is_drug_code = True
GROUP BY 1,2
ORDER BY 1,2
LIMIT 100
""", con=trino_conn)
print(df.to_markdown(index=False))

"""
| canonical_rate_source | canonical_rate_score | n_rates | min_canonical_rate | max_canonical_rate |
|:------------------------|-----------------------:|----------:|---------------------:|---------------------:|
| hospital | 1 | 4451328 | 0 | inf |
| hospital | 2 | 180508 | 0.8 | 0.9 |
| hospital | 3 | 1790825 | 0.9 | 4 |
| hospital | 4 | 5255449 | 1.25 | 3.5 |
| hospital | 5 | 787512 | 1.2 | 4 |
| imputation | 1 | 4018773 | 0 | 362871 |
| imputation | 2 | 4353234 | 0.8 | 4 |
| payer | 1 | 7512310 | 0 | inf |
| payer | 2 | 2389249 | 0.8 | 10 |
| payer | 3 | 17208127 | 0.9 | 4 |
| payer | 4 | 8379973 | 1.25 | 3.5 |
| payer | 5 | 289458 | 1.2 | 4 |
"""

# %%
# [CLD-v2.3.0] exchange networks

# %%
pd.read_sql(f"""
SELECT
provider_type,
COUNT(DISTINCT provider_id) as n_providers
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE network_class = 'Exchange'
AND canonical_rate_score > 1
GROUP BY 1
LIMIT 10
""", con=trino_conn)

# %%
df = pd.read_sql(f"""
SELECT
payer_id,
payer_name,
COUNT(DISTINCT provider_id) as n_providers,
COUNT(*) as n_rates
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE network_class = 'Exchange'
AND canonical_rate_score > 1
GROUP BY 1, 2
ORDER BY n_providers DESC
""", con=trino_conn)
print(df.to_markdown(index=False))

# %%
df = pd.read_sql(f"""
SELECT
payer_id,
payer_name,
COUNT(DISTINCT provider_id) as n_providers,
COUNT(*) as n_rates
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE network_class = 'Exchange'
AND canonical_rate_score > 1
AND state IN ('LA', 'NC', 'PA')
GROUP BY 1, 2
ORDER BY n_providers DESC
""", con=trino_conn)
print(df.to_markdown(index=False))


# %%
# [CLD-v2.3.0] OPG base rate update params

# %%
pd.read_sql(f"""
SELECT
MIN(1.00 * opg_n_freq / opg_n_total) as min_opg_freq_per_total,
MIN(opg_n_total) as min_opg_n_total
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_all
WHERE opg_base_rate IS NOT NULL
AND (opg_n_freq / opg_n_total_possible) < 0.5
LIMIT 10
""", con=trino_conn)

# %%
pd.read_sql(f"""
SELECT
MIN(1.00 * opg_n_freq / opg_n_total_possible) as min_opg_freq_per_total_possible
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_all
WHERE opg_base_rate IS NOT NULL
AND (
(1.00 * opg_n_freq / opg_n_total < 0.8)
OR opg_n_total < 100
)
LIMIT 10
""", con=trino_conn)

# %%
df = pd.read_sql(f"""
WITH
new AS (
SELECT COUNT(*) as n_rates
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_all
WHERE opg_base_rate IS NOT NULL
),
old AS (
SELECT COUNT(*) as n_rates
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_all
WHERE opg_base_rate IS NOT NULL
)
SELECT
new.n_rates as new_n_rates,
old.n_rates as old_n_rates
FROM new
JOIN old ON TRUE
""", con=trino_conn)
print(df.to_markdown(index=False, floatfmt=".0f"))

"""
| new_n_rates | old_n_rates |
|--------------:|--------------:|
| 66680326 | 90324394 |
"""
66680326 / 90324394

# %%
df = pd.read_sql(f"""
WITH
new AS (
SELECT canonical_rate_type, COUNT(*) as n_rates
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE canonical_rate_type LIKE '%opg_%'
AND canonical_rate_score > 1
GROUP BY 1
),
old AS (
SELECT canonical_rate_type, COUNT(*) as n_rates
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
WHERE canonical_rate_type LIKE '%opg_%'
AND canonical_rate_score > 1
GROUP BY 1
)
SELECT
new.canonical_rate_type,
new.n_rates as new_n_rates,
old.n_rates as old_n_rates
FROM new
JOIN old ON new.canonical_rate_type = old.canonical_rate_type
""", con=trino_conn)
print(df.to_markdown(index=False, floatfmt=".0f"))

# | canonical_rate_type | new_n_rates | old_n_rates |
# |:----------------------|--------------:|--------------:|
# | impute: opg_base_rate | 29033871 | 50744655 |

# %%
# [CLD-v2.3.0] Update to Labs Spines
# check for national coverage

# %%
df = pd.read_sql(f"""
SELECT
provider_id,
provider_name,
CASE
WHEN provider_id IN (
SELECT provider_id
FROM tq_dev.internal_dev_csong_cld_v2_3_0.tmp_ref_provider_spine_2025_09
WHERE national_payer_coverage = True
)
THEN 'national_coverage'
ELSE 'non_national_coverage'
END as coverage_type,
COUNT(DISTINCT payer_id) as n_payers
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE canonical_rate_score > 1
GROUP BY 1, 2, 3
""", con=trino_conn)
df

# %%
df = pd.read_sql(f"""
SELECT
provider_id,
provider_name,
COUNT(DISTINCT payer_id) as n_payers,
COUNT(DISTINCT billing_code) as n_billing_codes,
COUNT(
DISTINCT
CASE
WHEN canonical_rate_score > 1 THEN payer_id
END
) as n_payers_w_rates,
COUNT(
DISTINCT
CASE
WHEN canonical_rate_score > 1 THEN billing_code
END
) as n_billing_codes_w_rates
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_all
WHERE national_payer_coverage = True
AND is_lab_code = True
AND canonical_rate_score > 1
GROUP BY 1, 2
""", con=trino_conn)
df

# %%
# [CLD-v2.3.0] add 21 place of service code

# %%
pd.read_sql(f"""
SELECT canonical_rate_score, count(*)
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged p
WHERE payer_id = '42'
AND billing_code like '00%'
and NOT regexp_like(billing_code, '[A-Za-z]')
AND canonical_rate IS NOT NULL
AND provider_name = 'Emory Specialty Associates, Llc'
AND provider_type = 'Physician Group'
GROUP BY 1
""", con=trino_conn)

# %%
# [CLD-v2.3.0] more bh plan bridge exceptions

# %%
pd.read_sql(f"""
select *
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
where billing_code = '927'
and billing_code_type = 'MS-DRG'
and provider_name = 'Jackson South Medical Center'
and payer_id = '76'
""", con=trino_conn)

# %%
test = pd.read_sql(f"""
SELECT *
FROM tq_production.hospital_data.hospital_rates
WHERE provider_id = '7040'
AND billing_code = '927'
AND billing_code_type = 'MS-DRG'
AND negotiated_dollar = 274466.07
""", con=trino_conn)
test.T
# CIGNA HMO/PPO - ALL OTHER PLANS


# %%
# [CLD-v2.3.0] CAR-T Plausibility

# %%
df = pd.read_sql(f"""
SELECT
COUNT(*) as n_rates,
COUNT(DISTINCT provider_id) as n_providers
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE billing_code = '018'
AND billing_code_type = 'MS-DRG'
AND canonical_rate_score > 1
LIMIT 10
""", con=trino_conn)
df

# %%
df = pd.read_sql(f"""
SELECT
billing_code,
min(canonical_rate_percent_of_cbsa_avg_medicare) as min_pct,
max(canonical_rate_percent_of_cbsa_avg_medicare) as max_pct,
avg(canonical_rate_percent_of_cbsa_avg_medicare) as avg_pct,
min(canonical_rate) as min_rate,
max(canonical_rate) as max_rate,
avg(canonical_rate) as avg_rate
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE billing_code = '018'
AND billing_code_type = 'MS-DRG'
AND canonical_rate_score > 1
GROUP BY 1
""", con=trino_conn)
print(df.T.to_markdown(floatfmt=".3f"))


# %%
# [CLD-v2.3.0] OP outlier lower bound

# %%
pd.read_sql(f"""
SELECT
min(canonical_rate_percent_of_medicare),
min(canonical_rate_percent_of_cbsa_avg_medicare),
min(canonical_rate_percent_of_state_avg_medicare)
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE bill_type = 'Outpatient'
AND canonical_rate_score > 1
AND is_drug_code = False
AND is_lab_code = False
AND billing_code NOT IN (
SELECT hcpcs
FROM tq_production.reference_internal.anesthesia_reference_pricing
)
AND provider_type = 'Hospital'
""", con=trino_conn)

# %%
# [CLD-v2.3.0] PG accuracy logic

# %%
pd.read_sql(f"""
SELECT
canonical_rate_score,
MIN(canonical_rate_percent_of_medicare) as min_pct,
MAX(canonical_rate_percent_of_medicare) as max_pct,
MIN(canonical_rate_percent_of_cbsa_avg_medicare) as min_cbsa_pct,
MAX(canonical_rate_percent_of_cbsa_avg_medicare) as max_cbsa_pct
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE provider_type = 'Physician Group'
AND is_lab_code = False
AND is_drug_code = False
AND billing_code NOT IN (
SELECT hcpcs
FROM tq_production.reference_internal.anesthesia_reference_pricing
)
GROUP BY 1
ORDER BY 1 DESC
""", con=trino_conn)

# %%
# [CLD-v2.3.0] add infusion centers

# %%
pd.read_sql(f"""
SELECT
canonical_rate_score,
COUNT(*) as n_possible_rates,
COUNT(DISTINCT provider_id) as n_possible_providers,
SUM(
CASE
WHEN canonical_rate_score > 1 THEN 1
ELSE 0
END
) as n_rates,
COUNT(
DISTINCT
CASE
WHEN canonical_rate_score > 1 THEN provider_id
END
) as n_providers
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE provider_subtype = 'Infusion Center'
GROUP BY 1
ORDER BY 1 DESC
LIMIT 10
""", con=trino_conn)

# %%
pd.read_sql(f"""
SELECT
COUNT(*) as n_possible_rates,
COUNT(DISTINCT provider_id) as n_possible_providers,
SUM(
CASE
WHEN canonical_rate_score > 1 THEN 1
ELSE 0
END
) as n_rates,
COUNT(
DISTINCT
CASE
WHEN canonical_rate_score > 1 THEN provider_id
END
) as n_providers
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE provider_subtype = 'Infusion Center'
""", con=trino_conn)

# %%
# MISC

# %%
# Compare rate counts by network class between v2.3.0 and v2.2.2

# %%
df = pd.read_sql(f"""
WITH
new AS (
SELECT network_class, COUNT(*) as new_count
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE canonical_rate_score > 1
GROUP BY 1
),
old AS (
SELECT network_class, COUNT(*) as old_count
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
WHERE canonical_rate_score > 1
GROUP BY 1
)
SELECT
network_class,
new_count,
old_count
FROM new
LEFT JOIN old USING (network_class)
""", con=trino_conn)
print(df.to_markdown(index=False, floatfmt=".0f"))


# %%
# check that we are pulling hospital rates for BCBS TN
# (fixed plan bridge naming)

# %%
pd.read_sql(f"""
select COUNT(*), 'new'
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE provider_id in ('3442', '3447', '3448', '3445')
AND payer_id = '398'
AND canonical_rate_source = 'hospital'
union
select COUNT(*), 'old'
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
WHERE provider_id in ('3442', '3447', '3448', '3445')
AND payer_id = '398'
AND canonical_rate_source = 'hospital'
""", con=trino_conn)

# %%
# provider_type_new
pd.read_sql(f"""
SELECT DISTINCT provider_type, provider_type_new
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_clickhouse_abridged
WHERE canonical_rate_score > 1
""", con=trino_conn)

# %%
pd.read_sql(f"""
SELECT network_class, COUNT(*), 'new'
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_clickhouse_abridged
GROUP BY 1
UNION ALL
SELECT network_class, COUNT(*), 'old'
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_clickhouse_abridged
GROUP BY 1
""", con=trino_conn)

Distributions​

# %%
from IPython import get_ipython
from jinja2 import Template

if get_ipython() is not None:
get_ipython().run_line_magic("load_ext", "autoreload")
get_ipython().run_line_magic("autoreload", "2")
from dotenv import load_dotenv
import pandas as pd
from tqds.sql import db, sql

import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid', rc={'figure.figsize': (14, 12)})

pd.options.display.max_columns = None
pd.options.display.max_rows = 100
pd.options.display.max_colwidth = None
pd.set_option('display.float_format', lambda x: '%.3f' % x)
trino_conn = db.TrinoClient().conn
redshift_conn = db.RedshiftClient().conn
query = sql.Query()


# %% [markdown]
"""
# Compare rate counts by bill_type, provider_type, canonical_rate_score between v2.3.0 and v2.2.2
"""

# %%
df = pd.read_sql(f"""
WITH
new AS (
SELECT
bill_type,
provider_type,
COUNT(*) as n
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE network_type IN ('PPO', 'HMO')
GROUP BY 1, 2
),
old AS (
SELECT
bill_type,
provider_type,
COUNT(*) as n
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
WHERE network_type IN ('PPO', 'HMO')
GROUP BY 1, 2
)
SELECT
new.bill_type,
new.provider_type,
new.n as new_n,
old.n as old_n
FROM new
JOIN old ON
new.provider_type = old.provider_type
AND new.bill_type = old.bill_type
ORDER BY
new.bill_type,
new.provider_type
""", con=trino_conn)
df['diff'] = df['new_n'] - df['old_n']
df['diff_pct'] = df['diff'] / df['old_n'] * 100
print(df.to_markdown(index=False, floatfmt=".0f"))


# %% [markdown]
"""
# Compare rate counts by bill_type, provider_type, canonical_rate_score between v2.3.0 and v2.2.2
"""

# %%
df = pd.read_sql(f"""
WITH
new AS (
SELECT
bill_type,
provider_type,
canonical_rate_score,
COUNT(*) as n
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE network_type IN ('PPO', 'HMO')
GROUP BY 1, 2, 3
),
old AS (
SELECT
bill_type,
provider_type,
canonical_rate_score,
COUNT(*) as n
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
WHERE network_type IN ('PPO', 'HMO')
GROUP BY 1, 2, 3
)
SELECT
new.bill_type,
new.provider_type,
new.canonical_rate_score,
new.n as new_n,
old.n as old_n
FROM new
JOIN old ON
new.provider_type = old.provider_type
AND new.canonical_rate_score = old.canonical_rate_score
AND new.bill_type = old.bill_type
ORDER BY
new.bill_type,
new.provider_type,
new.canonical_rate_score
""", con=trino_conn)
df['diff'] = df['new_n'] - df['old_n']
df['diff_pct'] = df['diff'] / df['old_n'] * 100
print(df.to_markdown(index=False, floatfmt=".0f"))

# %%
df

# %% [markdown]
"""
# ASC drop analysis
"""

# %%
df_asc = pd.read_sql(f"""
SELECT
canonical_rate_score,
SUM(
CASE
WHEN canonical_rate_percent_of_medicare < 0.5 THEN 1
ELSE 0
END
) as n_below_0_5,
COUNT(*) as n
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
WHERE network_type IN ('PPO', 'HMO')
AND provider_type = 'ASC'
GROUP BY 1
""", con=trino_conn)
df_asc

# %%
dfasc2 = pd.read_sql(f"""
WITH
new AS (
SELECT
canonical_rate_type,
canonical_rate_score,
COUNT(*) as n
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE network_type IN ('PPO', 'HMO')
GROUP BY 1, 2
),
old AS (
SELECT
canonical_rate_type,
canonical_rate_score,
COUNT(*) as n
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
WHERE network_type IN ('PPO', 'HMO')
GROUP BY 1, 2
)
SELECT
new.canonical_rate_type,
new.canonical_rate_score,
new.n as new_n,
old.n as old_n
FROM new
JOIN old ON
new.canonical_rate_score = old.canonical_rate_score
AND new.canonical_rate_type = old.canonical_rate_type
ORDER BY
new.canonical_rate_type,
new.canonical_rate_score
""", con=trino_conn)
dfasc2['diff'] = dfasc2['new_n'] - dfasc2['old_n']
dfasc2['diff_pct'] = dfasc2['diff'] / dfasc2['old_n'] * 100
print(dfasc2.to_markdown(index=False, floatfmt=".0f"))

# %%
dfasc2.sort_values(by='diff_pct', ascending=True)


# %% [markdown]
"""
# NULLs
"""

# %%
columns = pd.read_sql(f"""
DESCRIBE tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
""", con=trino_conn)['Column'].tolist()

# %%
q = """
SELECT
{% for col in columns %}
1.000 * (COUNT_IF({{col}} IS NULL) / COUNT(*)) AS {{col}}{% if not loop.last %},{% endif %}
{% endfor %}
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE canonical_rate_score > 1
"""
q_template = Template(q)
q_rendered = q_template.render(columns=columns)
nulls_v2_3_0 = pd.read_sql(q_rendered, con=trino_conn)
print(nulls_v2_3_0.T.sort_values(0).to_markdown(floatfmt=".3f"))

# %%