from IPython import get_ipython
from jinja2 import Template
if get_ipython() is not None:
get_ipython().run_line_magic("load_ext", "autoreload")
get_ipython().run_line_magic("autoreload", "2")
from dotenv import load_dotenv
import pandas as pd
from tqds.sql import db, sql
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style='whitegrid', rc={'figure.figsize': (14, 12)})
pd.options.display.max_columns = None
pd.options.display.max_rows = 100
pd.options.display.max_colwidth = None
pd.set_option('display.float_format', lambda x: '%.3f' % x)
trino_conn = db.TrinoClient().conn
redshift_conn = db.RedshiftClient().conn
query = sql.Query()
"""
# [CLD-v2.2.2] lab specific accuracy scoring
"""
df = pd.read_sql(f"""
WITH
new AS (
SELECT
canonical_rate_score,
MIN(canonical_rate_percent_of_cbsa_avg_medicare) as new_min_rate,
APPROX_PERCENTILE(canonical_rate_percent_of_cbsa_avg_medicare, 0.05) as new_p05,
APPROX_PERCENTILE(canonical_rate_percent_of_cbsa_avg_medicare, 0.95) as new_p95,
MAX(canonical_rate_percent_of_cbsa_avg_medicare) as new_max_rate,
count(*) as new_n_rates
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
WHERE provider_type = 'Laboratory'
AND is_lab_code = True
GROUP BY 1
),
old AS (
SELECT
canonical_rate_score,
MIN(canonical_rate_percent_of_cbsa_avg_medicare) as old_min_rate,
APPROX_PERCENTILE(canonical_rate_percent_of_cbsa_avg_medicare, 0.05) as old_p05,
APPROX_PERCENTILE(canonical_rate_percent_of_cbsa_avg_medicare, 0.95) as old_p95,
MAX(canonical_rate_percent_of_cbsa_avg_medicare) as old_max_rate,
count(*) as old_n_rates
FROM tq_dev.internal_dev_csong_cld_v2_2_1.prod_combined_abridged
WHERE provider_type = 'Laboratory'
AND is_lab_code = True
GROUP BY 1
)
SELECT
*
FROM new
LEFT JOIN old USING (canonical_rate_score)
""", con=trino_conn)
print(df.set_index('canonical_rate_score').sort_index().T.to_markdown(floatfmt=".2f"))
"""
| | 1 | 2 | 3 | 4 | 5 |
|:-------------|---------:|----------:|----------:|-----------:|-----------:|
| new_min_rate | 0.00 | 0.20 | 0.40 | 0.30 | 0.40 |
| new_p05 | 0.05 | 0.26 | 0.49 | 0.35 | 0.42 |
| new_p95 | 10.53 | 4.18 | 1.16 | 2.75 | 1.12 |
| new_max_rate | 69541.03 | 4.50 | 1.30 | 3.00 | 1.30 |
| new_n_rates | 98747.00 | 372080.00 | 864506.00 | 174467.00 | 1151916.00 |
| old_min_rate | 0.00 | 0.27 | 1.00 | 0.10 | nan |
| old_p05 | 0.00 | 0.27 | 1.00 | 0.40 | nan |
| old_p95 | 0.10 | 13.66 | 2.26 | 3.63 | nan |
| old_max_rate | 69541.03 | 13.66 | 2.97 | 30.00 | nan |
| old_n_rates | 14853.00 | 27.00 | 74.00 | 2843609.00 | nan |
"""
"""
# [CLD-v2.2.2] guardant national coverage
"""
df = pd.read_sql(f"""
SELECT provider_name, payer_id, payer_name, count(*)
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
WHERE provider_id = '-2130016735574130363'
AND canonical_rate_score > 1
GROUP BY 1,2,3
""", con=trino_conn)
df
pd.read_sql(f"""
SELECT *
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
WHERE provider_id = '-2130016735574130363'
AND canonical_rate_score > 1
LIMIT 10
""", con=trino_conn)
"""
# [CLD-v2.2.2] add summa care network
"""
df = pd.read_sql(f"""
SELECT bill_type, count(*)
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
WHERE payer_id = '638'
AND canonical_rate_score > 1
GROUP BY 1
""", con=trino_conn)
df
"""
# [CLD-v2.2.2] update outlier bounds table using provider subtype to condition for hospitals
"""
df = pd.read_sql(f"""
WITH
new AS (
SELECT
provider_subtype,
bill_type,
APPROX_PERCENTILE(lower_bound, 0.5) as new_lower,
APPROX_PERCENTILE(upper_bound, 0.5) as new_upper
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_all
WHERE provider_type = 'Hospital'
GROUP BY 1, 2
),
old AS (
SELECT
provider_subtype,
bill_type,
APPROX_PERCENTILE(lower_bound, 0.5) as old_lower,
APPROX_PERCENTILE(upper_bound, 0.5) as old_upper
FROM tq_dev.internal_dev_csong_cld_v2_2_1.prod_combined_all
WHERE provider_type = 'Hospital'
GROUP BY 1, 2
)
SELECT
*
FROM new
LEFT JOIN old USING (provider_subtype, bill_type)
ORDER BY 1,2
""", con=trino_conn)
df
"""
# [CLD-v2.2.2] remove capitation negotiation arrangement from payer mrf
"""
pd.read_sql(f"""
SELECT distinct negotiation_arrangement
FROM tq_dev.internal_dev_csong_cld_v2_2_2.tmp_raw_mrf_payer_rates_2025_09
LIMIT 10
""", con=trino_conn)
"""
# [CLD-v2.2.2] custom plan bridge - base hospital rates
"""
pd.read_sql(f"""
SELECT canonical_rate_source, bill_type, count(*)
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
WHERE payer_id = '97'
AND provider_id = '795'
GROUP BY 1,2
LIMIT 10
""", con=trino_conn)
"""
# [CLD-v2.2.2] Update Rev Code Logic
"""
pd.read_sql(f"""
SELECT
count(*)
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
WHERE
provider_id = '1607'
AND payer_id = '7'
AND billing_code_type = 'MS-DRG'
LIMIT 10
""", con=trino_conn)
pd.read_sql(f"""
SELECT
count(*)
FROM tq_dev.internal_dev_csong_cld_v2_2_1.prod_combined_abridged
WHERE
provider_id = '1607'
AND payer_id = '7'
AND billing_code_type = 'MS-DRG'
LIMIT 10
""", con=trino_conn)
"""
# [CLD-v2.2.2] Include Records for Providers Posting HCPCS as IP only
"""
pd.read_sql(f"""
WITH
hospitals_hcpcs_inpatient_only AS (
SELECT DISTINCT hr.provider_id
FROM tq_production.hospital_data.hospital_rates hr
WHERE hr.billing_code_type = 'HCPCS'
AND hr.setting = 'Inpatient'
AND hr.billing_code IS NOT NULL
AND NOT EXISTS (
SELECT 1
FROM tq_production.hospital_data.hospital_rates
WHERE provider_id = hr.provider_id
AND billing_code_type = 'HCPCS'
AND (
setting IN ('Outpatient', 'Inpatient & Outpatient')
OR
setting IS NULL
)
AND billing_code IS NOT NULL
)
AND provider_id IN (SELECT provider_id
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
WHERE provider_type = 'Hospital')
)
SELECT *
FROM hospitals_hcpcs_inpatient_only
LIMIT 10
""", con=trino_conn)
pd.read_sql(f"""
SELECT *
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
WHERE provider_id = '9810'
AND billing_code_type = 'HCPCS'
AND canonical_rate_score > 1
LIMIT 10
""", con=trino_conn)
"""
# [CLD-v2.2.2] validated rates tiebreaker fix
"""
"""
# [CLD-v2.2.2] edit gross charge cols order
"""
"""
# [CLD-v2.2.2] Always Populate Medicare Rates in Prod Tables
"""
pd.read_sql(f"""
SELECT COUNT(DISTINCT billing_code )
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
WHERE medicare_rate IS NULL
AND billing_code IN (
SELECT distinct hcpcs
FROM tq_production.reference_internal.opps_reference_pricing
WHERE is_latest_start_effective_date = True
)
AND provider_type = 'Hospital'
""", con=trino_conn)
"""
# [CLD-v2.2.2] Add New NDC Pattern
"""
pd.read_sql(f"""
SELECT *
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
LIMIT 10
""", con=trino_conn)
"""
# [CLD-v2.2.2] Update Network Mappings
"""
pd.read_sql(f"""
SELECT *
FROM tq_dev.internal_dev_csong_cld_v2_2_2.prod_combined_abridged
LIMIT 10
""", con=trino_conn)