Version: 2.1

PG Codeset

As of v1_2_5, we're adding 153,000 roids per physician group. This is beacuse the codeset is large (9501 codes). We're including everything that meets this filter:

FROM {{ cld_params.Tables.SPINES_SERVICES.value }}
WHERE
    regexp_like(service_code, '^[1-9][0-9]*[^a-zA-Z]?$') -- starts w/ number 1-9, does not end with letter
    AND service_type = 'HCPCS'

Codeset Reduction

Use Komodo encounters to pull the top procedure codes by revenue that also appear in CMS MPFS pricing. Use count of encounters and CMS MPFS pricing to estimate revenue.

Based on the below calculation, the top 3000 codes cover 99.2% of estimated revenue.

WITH
claims AS (
    SELECT 
        procedure_code, 
        count(distinct encounter_key) as n_encounters
    FROM tq_intermediate.external_komodo.medical_service_lines
    WHERE hcp_s_1_npi IN (
        SELECT DISTINCT t.hcp_neighbors
        FROM tq_dev.internal_dev_csong_sandbox.prod_pac_connections, 
            UNNEST(hcp_neighbors) AS t(hcp_neighbors)
    )
    AND procedure_code IN (
        SELECT hcpcs
        FROM tq_intermediate.cld_utils.ref_cms_mpfs_pricing_2025_04_10
    )
    GROUP BY 1
    ORDER BY 2 DESC
),
mpfs AS (
    SELECT  hcpcs, max(facility_rate) as rate
    FROM tq_intermediate.cld_utils.ref_cms_mpfs_pricing_2025_04_10
    GROUP BY hcpcs
),
revenue_added AS (
    SELECT
        c.procedure_code, 
        c.n_encounters, 
        m.rate,
        c.n_encounters * m.rate as revenue
    FROM claims c
    LEFT JOIN mpfs m ON c.procedure_code = m.hcpcs
)
SELECT
    procedure_code, 
    n_encounters, 
    n_encounters / SUM(n_encounters) OVER () AS percent_encounters,
    rate,
    revenue,
    revenue / SUM(revenue) OVER () AS percent_revenue
FROM revenue_added
ORDER BY revenue DESC

Codeset:

Code to generate table

# %%
from IPython import get_ipython

if get_ipython() is not None:
    get_ipython().run_line_magic("load_ext", "autoreload")
    get_ipython().run_line_magic("autoreload", "2")
import pandas as pd
from tqds.aws import utils as tu
from tqds.sql import db, sql

import pandas as pd
import numpy as np


pd.options.display.max_columns = None
pd.options.display.max_rows = 100
pd.options.display.max_colwidth = None
pd.set_option('display.float_format', lambda x: '%.3f' % x)
trino_conn = db.TrinoClient().conn
redshift_conn = db.RedshiftClient().conn
query = sql.Query()


# %% [markdown]
"""
Top Procedure Codes for PGs
"""

# %%
df = pd.read_sql(f"""
WITH
claims AS (
    SELECT 
        procedure_code, 
        count(distinct encounter_key) as n_encounters
    FROM tq_intermediate.external_komodo.medical_service_lines
    WHERE hcp_s_1_npi IN (
        SELECT DISTINCT t.hcp_neighbors
        FROM tq_dev.internal_dev_csong_sandbox.prod_pac_connections, 
            UNNEST(hcp_neighbors) AS t(hcp_neighbors)
    )
    AND procedure_code IN (
        SELECT hcpcs
        FROM tq_intermediate.cld_utils.ref_cms_mpfs_pricing_2025_04_10
    )
    GROUP BY 1
    ORDER BY 2 DESC
),
mpfs AS (
    SELECT  hcpcs, max(facility_rate) as rate
    FROM tq_intermediate.cld_utils.ref_cms_mpfs_pricing_2025_04_10
    GROUP BY hcpcs
),
revenue_added AS (
    SELECT
        c.procedure_code, 
        c.n_encounters, 
        m.rate,
        c.n_encounters * m.rate as revenue
    FROM claims c
    LEFT JOIN mpfs m ON c.procedure_code = m.hcpcs
)
SELECT
    procedure_code, 
    n_encounters, 
    n_encounters / SUM(n_encounters) OVER () AS percent_encounters,
    rate,
    revenue,
    revenue / SUM(revenue) OVER () AS percent_revenue
FROM revenue_added
ORDER BY revenue DESC
""", con=trino_conn)
df


# %% [markdown]
"""
Evaluate
"""

# %%
df_top5k = df.head(3000)
df_top5k['percent_revenue'].sum()

# %%
# export as json
with open('top_procedure_codes_pg.json', 'w') as f:
    f.write(df_top5k.drop(columns=['percent_encounters','revenue']).to_json(orient='records'))

Add Back High-Revenue Non-MPFS codes

There may be some codes that are not in CMS MPFS pricing that we want to include. Based on employer network check data, we should also include codes that start with (high revenue share, billed for physicians, not in MPFS):

'996XX'
'994XX'
'993XX'
'992XX'
'991XX'
'990XX'
'971XX'
'936XX'
'908XX'
'907XX'
'906XX'
'903XX'
'892XX'
'878XX'
'876XX'
'875XX'
'874XX'
'873XX'
'870XX'
'868XX'
'844XX'
'841XX'
'830XX'
'827XX'
'826XX'
'810XX'
'800XX'
'788XX'
'784XX'
'773XX'
'307XX'

# %%
df_mpfs = pd.read_sql(f"""
SELECT  hcpcs, max(facility_rate) as rate
FROM tq_intermediate.cld_utils.ref_cms_mpfs_pricing_2025_04_10
GROUP BY hcpcs
""", con=trino_conn)


# %%
df = pd.read_sql(f"""
SELECT 
    billing_code_type, 
    billing_code,
    sum(total_cost) as total_cost,
    avg(rate) as avg_rate,
    sum(utilization) as total_utilization,
    count(distinct npi) as distinct_npi_count
FROM hive.employerpcc.plancheck_main_qc
WHERE 
    provider_type IN 
    (
        'Individual Provider', 
        'Single/Multi-Specialty Physician Group', 
        'Physician Group Affiliated Entity'
    )
    AND status = True
    AND year(created_at) >= 2025
    AND billing_code_type IN ('CPT', 'HCPCS')
GROUP BY 1, 2
ORDER BY total_cost DESC
""", con=trino_conn)


# %%
df["percent_cost"] = df["total_cost"] / df["total_cost"].sum()
df_top5000 = df.head(5000)
  
# %%
df_exceptions = df_top5000.loc[~df_top5000['billing_code'].isin(df_mpfs['hcpcs'])]
prefixes = df_exceptions.head(500)['billing_code'].str[:3].value_counts().head(50)
prefixes = [p for p in prefixes.index if p.isnumeric()]

Codeset Reduction​

Codeset:​

Add Back High-Revenue Non-MPFS codes​

On this page:

Codeset Reduction

Codeset:

Add Back High-Revenue Non-MPFS codes