from IPython import get_ipython
from jinja2 import Template
if get_ipython() is not None:
get_ipython().run_line_magic("load_ext", "autoreload")
get_ipython().run_line_magic("autoreload", "2")
from dotenv import load_dotenv
import pandas as pd
from tqds.sql import db, sql
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid', rc={'figure.figsize': (14, 12)})
pd.options.display.max_columns = None
pd.options.display.max_rows = 100
pd.options.display.max_colwidth = None
pd.set_option('display.float_format', lambda x: '%.3f' % x)
trino_conn = db.TrinoClient().conn
redshift_conn = db.RedshiftClient().conn
query = sql.Query()
df = pd.read_sql(f"""
SELECT
provider_type,
canonical_rate_score,
COUNT(*) as n_rates,
MIN(canonical_rate_percent_of_medicare) as min_pct_medicare,
MAX(canonical_rate_percent_of_medicare) as max_pct_medicare,
MAX(canonical_rate) as max_canonical_rate
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE billing_code IN (
SELECT hcpcs
FROM tq_production.reference_internal.anesthesia_reference_pricing
)
GROUP BY 1, 2
ORDER BY 1, 2
""", con=trino_conn)
print(df.to_markdown(index=False))
"""
| provider_type | canonical_rate_score | n_rates | min_pct_medicare | max_pct_medicare | max_canonical_rate |
|:----------------|-----------------------:|----------:|-------------------:|-------------------:|---------------------:|
| Imaging Center | 0 | 235337 | nan | nan | nan |
| Imaging Center | 1 | 120 | 0.5 | 90.09 | 1878.33 |
| Imaging Center | 4 | 4201 | 0.83 | 29.71 | 619.4 |
| Physician Group | 0 | 123219406 | nan | nan | nan |
| Physician Group | 1 | 1132766 | 0 | 746405 | 1.55402e+07 |
| Physician Group | 2 | 6623106 | 3.5 | 10.46 | 200 |
| Physician Group | 3 | 9644892 | 1 | 3.5 | 96.75 |
| Physician Group | 4 | 110304 | 3.51 | 5.37 | 111.85 |
| Physician Group | 5 | 15174 | 1 | 3.48 | 97 |
"""
df = pd.read_sql(f"""
SELECT
canonical_rate_source,
canonical_rate_score,
COUNT(*) as n_rates,
MIN(canonical_rate_percent_of_medicare) as min_canonical_rate,
MAX(canonical_rate_percent_of_medicare) as max_canonical_rate
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE is_drug_code = True
GROUP BY 1,2
ORDER BY 1,2
LIMIT 100
""", con=trino_conn)
print(df.to_markdown(index=False))
"""
| canonical_rate_source | canonical_rate_score | n_rates | min_canonical_rate | max_canonical_rate |
|:------------------------|-----------------------:|----------:|---------------------:|---------------------:|
| hospital | 1 | 4451328 | 0 | inf |
| hospital | 2 | 180508 | 0.8 | 0.9 |
| hospital | 3 | 1790825 | 0.9 | 4 |
| hospital | 4 | 5255449 | 1.25 | 3.5 |
| hospital | 5 | 787512 | 1.2 | 4 |
| imputation | 1 | 4018773 | 0 | 362871 |
| imputation | 2 | 4353234 | 0.8 | 4 |
| payer | 1 | 7512310 | 0 | inf |
| payer | 2 | 2389249 | 0.8 | 10 |
| payer | 3 | 17208127 | 0.9 | 4 |
| payer | 4 | 8379973 | 1.25 | 3.5 |
| payer | 5 | 289458 | 1.2 | 4 |
"""
pd.read_sql(f"""
SELECT
provider_type,
COUNT(DISTINCT provider_id) as n_providers
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE network_class = 'Exchange'
AND canonical_rate_score > 1
GROUP BY 1
LIMIT 10
""", con=trino_conn)
df = pd.read_sql(f"""
SELECT
payer_id,
payer_name,
COUNT(DISTINCT provider_id) as n_providers,
COUNT(*) as n_rates
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE network_class = 'Exchange'
AND canonical_rate_score > 1
GROUP BY 1, 2
ORDER BY n_providers DESC
""", con=trino_conn)
print(df.to_markdown(index=False))
df = pd.read_sql(f"""
SELECT
payer_id,
payer_name,
COUNT(DISTINCT provider_id) as n_providers,
COUNT(*) as n_rates
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE network_class = 'Exchange'
AND canonical_rate_score > 1
AND state IN ('LA', 'NC', 'PA')
GROUP BY 1, 2
ORDER BY n_providers DESC
""", con=trino_conn)
print(df.to_markdown(index=False))
pd.read_sql(f"""
SELECT
MIN(1.00 * opg_n_freq / opg_n_total) as min_opg_freq_per_total,
MIN(opg_n_total) as min_opg_n_total
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_all
WHERE opg_base_rate IS NOT NULL
AND (opg_n_freq / opg_n_total_possible) < 0.5
LIMIT 10
""", con=trino_conn)
pd.read_sql(f"""
SELECT
MIN(1.00 * opg_n_freq / opg_n_total_possible) as min_opg_freq_per_total_possible
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_all
WHERE opg_base_rate IS NOT NULL
AND (
(1.00 * opg_n_freq / opg_n_total < 0.8)
OR opg_n_total < 100
)
LIMIT 10
""", con=trino_conn)
df = pd.read_sql(f"""
WITH
new AS (
SELECT COUNT(*) as n_rates
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_all
WHERE opg_base_rate IS NOT NULL
),
old AS (
SELECT COUNT(*) as n_rates
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_all
WHERE opg_base_rate IS NOT NULL
)
SELECT
new.n_rates as new_n_rates,
old.n_rates as old_n_rates
FROM new
JOIN old ON TRUE
""", con=trino_conn)
print(df.to_markdown(index=False, floatfmt=".0f"))
"""
| new_n_rates | old_n_rates |
|--------------:|--------------:|
| 66680326 | 90324394 |
"""
66680326 / 90324394
df = pd.read_sql(f"""
WITH
new AS (
SELECT canonical_rate_type, COUNT(*) as n_rates
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE canonical_rate_type LIKE '%opg_%'
AND canonical_rate_score > 1
GROUP BY 1
),
old AS (
SELECT canonical_rate_type, COUNT(*) as n_rates
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
WHERE canonical_rate_type LIKE '%opg_%'
AND canonical_rate_score > 1
GROUP BY 1
)
SELECT
new.canonical_rate_type,
new.n_rates as new_n_rates,
old.n_rates as old_n_rates
FROM new
JOIN old ON new.canonical_rate_type = old.canonical_rate_type
""", con=trino_conn)
print(df.to_markdown(index=False, floatfmt=".0f"))
df = pd.read_sql(f"""
SELECT
provider_id,
provider_name,
CASE
WHEN provider_id IN (
SELECT provider_id
FROM tq_dev.internal_dev_csong_cld_v2_3_0.tmp_ref_provider_spine_2025_09
WHERE national_payer_coverage = True
)
THEN 'national_coverage'
ELSE 'non_national_coverage'
END as coverage_type,
COUNT(DISTINCT payer_id) as n_payers
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE canonical_rate_score > 1
GROUP BY 1, 2, 3
""", con=trino_conn)
df
df = pd.read_sql(f"""
SELECT
provider_id,
provider_name,
COUNT(DISTINCT payer_id) as n_payers,
COUNT(DISTINCT billing_code) as n_billing_codes,
COUNT(
DISTINCT
CASE
WHEN canonical_rate_score > 1 THEN payer_id
END
) as n_payers_w_rates,
COUNT(
DISTINCT
CASE
WHEN canonical_rate_score > 1 THEN billing_code
END
) as n_billing_codes_w_rates
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_all
WHERE national_payer_coverage = True
AND is_lab_code = True
AND canonical_rate_score > 1
GROUP BY 1, 2
""", con=trino_conn)
df
pd.read_sql(f"""
SELECT canonical_rate_score, count(*)
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged p
WHERE payer_id = '42'
AND billing_code like '00%'
and NOT regexp_like(billing_code, '[A-Za-z]')
AND canonical_rate IS NOT NULL
AND provider_name = 'Emory Specialty Associates, Llc'
AND provider_type = 'Physician Group'
GROUP BY 1
""", con=trino_conn)
pd.read_sql(f"""
select *
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
where billing_code = '927'
and billing_code_type = 'MS-DRG'
and provider_name = 'Jackson South Medical Center'
and payer_id = '76'
""", con=trino_conn)
test = pd.read_sql(f"""
SELECT *
FROM tq_production.hospital_data.hospital_rates
WHERE provider_id = '7040'
AND billing_code = '927'
AND billing_code_type = 'MS-DRG'
AND negotiated_dollar = 274466.07
""", con=trino_conn)
test.T
df = pd.read_sql(f"""
SELECT
COUNT(*) as n_rates,
COUNT(DISTINCT provider_id) as n_providers
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE billing_code = '018'
AND billing_code_type = 'MS-DRG'
AND canonical_rate_score > 1
LIMIT 10
""", con=trino_conn)
df
df = pd.read_sql(f"""
SELECT
billing_code,
min(canonical_rate_percent_of_cbsa_avg_medicare) as min_pct,
max(canonical_rate_percent_of_cbsa_avg_medicare) as max_pct,
avg(canonical_rate_percent_of_cbsa_avg_medicare) as avg_pct,
min(canonical_rate) as min_rate,
max(canonical_rate) as max_rate,
avg(canonical_rate) as avg_rate
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE billing_code = '018'
AND billing_code_type = 'MS-DRG'
AND canonical_rate_score > 1
GROUP BY 1
""", con=trino_conn)
print(df.T.to_markdown(floatfmt=".3f"))
pd.read_sql(f"""
SELECT
min(canonical_rate_percent_of_medicare),
min(canonical_rate_percent_of_cbsa_avg_medicare),
min(canonical_rate_percent_of_state_avg_medicare)
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE bill_type = 'Outpatient'
AND canonical_rate_score > 1
AND is_drug_code = False
AND is_lab_code = False
AND billing_code NOT IN (
SELECT hcpcs
FROM tq_production.reference_internal.anesthesia_reference_pricing
)
AND provider_type = 'Hospital'
""", con=trino_conn)
pd.read_sql(f"""
SELECT
canonical_rate_score,
MIN(canonical_rate_percent_of_medicare) as min_pct,
MAX(canonical_rate_percent_of_medicare) as max_pct,
MIN(canonical_rate_percent_of_cbsa_avg_medicare) as min_cbsa_pct,
MAX(canonical_rate_percent_of_cbsa_avg_medicare) as max_cbsa_pct
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE provider_type = 'Physician Group'
AND is_lab_code = False
AND is_drug_code = False
AND billing_code NOT IN (
SELECT hcpcs
FROM tq_production.reference_internal.anesthesia_reference_pricing
)
GROUP BY 1
ORDER BY 1 DESC
""", con=trino_conn)
pd.read_sql(f"""
SELECT
canonical_rate_score,
COUNT(*) as n_possible_rates,
COUNT(DISTINCT provider_id) as n_possible_providers,
SUM(
CASE
WHEN canonical_rate_score > 1 THEN 1
ELSE 0
END
) as n_rates,
COUNT(
DISTINCT
CASE
WHEN canonical_rate_score > 1 THEN provider_id
END
) as n_providers
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE provider_subtype = 'Infusion Center'
GROUP BY 1
ORDER BY 1 DESC
LIMIT 10
""", con=trino_conn)
pd.read_sql(f"""
SELECT
COUNT(*) as n_possible_rates,
COUNT(DISTINCT provider_id) as n_possible_providers,
SUM(
CASE
WHEN canonical_rate_score > 1 THEN 1
ELSE 0
END
) as n_rates,
COUNT(
DISTINCT
CASE
WHEN canonical_rate_score > 1 THEN provider_id
END
) as n_providers
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE provider_subtype = 'Infusion Center'
""", con=trino_conn)
df = pd.read_sql(f"""
WITH
new AS (
SELECT network_class, COUNT(*) as new_count
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE canonical_rate_score > 1
GROUP BY 1
),
old AS (
SELECT network_class, COUNT(*) as old_count
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
WHERE canonical_rate_score > 1
GROUP BY 1
)
SELECT
network_class,
new_count,
old_count
FROM new
LEFT JOIN old USING (network_class)
""", con=trino_conn)
print(df.to_markdown(index=False, floatfmt=".0f"))
pd.read_sql(f"""
select COUNT(*), 'new'
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_combined_abridged
WHERE provider_id in ('3442', '3447', '3448', '3445')
AND payer_id = '398'
AND canonical_rate_source = 'hospital'
union
select COUNT(*), 'old'
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
WHERE provider_id in ('3442', '3447', '3448', '3445')
AND payer_id = '398'
AND canonical_rate_source = 'hospital'
""", con=trino_conn)
pd.read_sql(f"""
SELECT DISTINCT provider_type, provider_type_new
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_clickhouse_abridged
WHERE canonical_rate_score > 1
""", con=trino_conn)
pd.read_sql(f"""
SELECT network_class, COUNT(*), 'new'
FROM tq_dev.internal_dev_csong_cld_v2_3_0.prod_clickhouse_abridged
GROUP BY 1
UNION ALL
SELECT network_class, COUNT(*), 'old'
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_clickhouse_abridged
GROUP BY 1
""", con=trino_conn)