PG Codeset
As of v1_2_5, we're adding 153,000 roids per physician group. This is beacuse
the codeset is large (9501 codes). We're including everything that meets this filter:
FROM {{ cld_params.Tables.SPINES_SERVICES.value }}
WHERE
regexp_like(service_code, '^[1-9][0-9]*[^a-zA-Z]?$') -- starts w/ number 1-9, does not end with letter
AND service_type = 'HCPCS'
Codeset Reduction​
Use Komodo encounters to pull the top procedure codes by revenue that also appear in CMS MPFS pricing. Use count of encounters and CMS MPFS pricing to estimate revenue.
Based on the below calculation, the top 3000 codes cover 99.2% of estimated revenue.
WITH
claims AS (
SELECT
procedure_code,
count(distinct encounter_key) as n_encounters
FROM tq_intermediate.external_komodo.medical_service_lines
WHERE hcp_s_1_npi IN (
SELECT DISTINCT t.hcp_neighbors
FROM tq_dev.internal_dev_csong_sandbox.prod_pac_connections,
UNNEST(hcp_neighbors) AS t(hcp_neighbors)
)
AND procedure_code IN (
SELECT hcpcs
FROM tq_intermediate.cld_utils.ref_cms_mpfs_pricing_2025_04_10
)
GROUP BY 1
ORDER BY 2 DESC
),
mpfs AS (
SELECT hcpcs, max(facility_rate) as rate
FROM tq_intermediate.cld_utils.ref_cms_mpfs_pricing_2025_04_10
GROUP BY hcpcs
),
revenue_added AS (
SELECT
c.procedure_code,
c.n_encounters,
m.rate,
c.n_encounters * m.rate as revenue
FROM claims c
LEFT JOIN mpfs m ON c.procedure_code = m.hcpcs
)
SELECT
procedure_code,
n_encounters,
n_encounters / SUM(n_encounters) OVER () AS percent_encounters,
rate,
revenue,
revenue / SUM(revenue) OVER () AS percent_revenue
FROM revenue_added
ORDER BY revenue DESC
Codeset:​
Code to generate table
# %%
from IPython import get_ipython
if get_ipython() is not None:
get_ipython().run_line_magic("load_ext", "autoreload")
get_ipython().run_line_magic("autoreload", "2")
import pandas as pd
from tqds.aws import utils as tu
from tqds.sql import db, sql
import pandas as pd
import numpy as np
pd.options.display.max_columns = None
pd.options.display.max_rows = 100
pd.options.display.max_colwidth = None
pd.set_option('display.float_format', lambda x: '%.3f' % x)
trino_conn = db.TrinoClient().conn
redshift_conn = db.RedshiftClient().conn
query = sql.Query()
# %% [markdown]
"""
Top Procedure Codes for PGs
"""
# %%
df = pd.read_sql(f"""
WITH
claims AS (
SELECT
procedure_code,
count(distinct encounter_key) as n_encounters
FROM tq_intermediate.external_komodo.medical_service_lines
WHERE hcp_s_1_npi IN (
SELECT DISTINCT t.hcp_neighbors
FROM tq_dev.internal_dev_csong_sandbox.prod_pac_connections,
UNNEST(hcp_neighbors) AS t(hcp_neighbors)
)
AND procedure_code IN (
SELECT hcpcs
FROM tq_intermediate.cld_utils.ref_cms_mpfs_pricing_2025_04_10
)
GROUP BY 1
ORDER BY 2 DESC
),
mpfs AS (
SELECT hcpcs, max(facility_rate) as rate
FROM tq_intermediate.cld_utils.ref_cms_mpfs_pricing_2025_04_10
GROUP BY hcpcs
),
revenue_added AS (
SELECT
c.procedure_code,
c.n_encounters,
m.rate,
c.n_encounters * m.rate as revenue
FROM claims c
LEFT JOIN mpfs m ON c.procedure_code = m.hcpcs
)
SELECT
procedure_code,
n_encounters,
n_encounters / SUM(n_encounters) OVER () AS percent_encounters,
rate,
revenue,
revenue / SUM(revenue) OVER () AS percent_revenue
FROM revenue_added
ORDER BY revenue DESC
""", con=trino_conn)
df
# %% [markdown]
"""
Evaluate
"""
# %%
df_top5k = df.head(3000)
df_top5k['percent_revenue'].sum()
# %%
# export as json
with open('top_procedure_codes_pg.json', 'w') as f:
f.write(df_top5k.drop(columns=['percent_encounters','revenue']).to_json(orient='records'))
Add Back High-Revenue Non-MPFS codes​
There may be some codes that are not in CMS MPFS pricing that we want to include. Based on employer network check data, we should also include codes that start with (high revenue share, billed for physicians, not in MPFS):
- '996XX'
- '994XX'
- '993XX'
- '992XX'
- '991XX'
- '990XX'
- '971XX'
- '936XX'
- '908XX'
- '907XX'
- '906XX'
- '903XX'
- '892XX'
- '878XX'
- '876XX'
- '875XX'
- '874XX'
- '873XX'
- '870XX'
- '868XX'
- '844XX'
- '841XX'
- '830XX'
- '827XX'
- '826XX'
- '810XX'
- '800XX'
- '788XX'
- '784XX'
- '773XX'
- '307XX'
# %%
df_mpfs = pd.read_sql(f"""
SELECT hcpcs, max(facility_rate) as rate
FROM tq_intermediate.cld_utils.ref_cms_mpfs_pricing_2025_04_10
GROUP BY hcpcs
""", con=trino_conn)
# %%
df = pd.read_sql(f"""
SELECT
billing_code_type,
billing_code,
sum(total_cost) as total_cost,
avg(rate) as avg_rate,
sum(utilization) as total_utilization,
count(distinct npi) as distinct_npi_count
FROM hive.employerpcc.plancheck_main_qc
WHERE
provider_type IN
(
'Individual Provider',
'Single/Multi-Specialty Physician Group',
'Physician Group Affiliated Entity'
)
AND status = True
AND year(created_at) >= 2025
AND billing_code_type IN ('CPT', 'HCPCS')
GROUP BY 1, 2
ORDER BY total_cost DESC
""", con=trino_conn)
# %%
df["percent_cost"] = df["total_cost"] / df["total_cost"].sum()
df_top5000 = df.head(5000)
# %%
df_exceptions = df_top5000.loc[~df_top5000['billing_code'].isin(df_mpfs['hcpcs'])]
prefixes = df_exceptions.head(500)['billing_code'].str[:3].value_counts().head(50)
prefixes = [p for p in prefixes.index if p.isnumeric()]