Skip to main content
Version: 2.2

Imputations

ComponentSchema

SQL​

imputations_long_rates.sql​

preprocessed rates for imputations

-- 🪟💲🌟
-- run_id: {{ run_id }}
-- task_name: {{ task_name }}
-- Table: {{ schema_name }}.tmp_int_imputations_long_rates_{{ sub_version }}
-- Subversion: {{ sub_version }}


CREATE TABLE {{ schema_name }}.tmp_int_imputations_long_rates_{{ sub_version }}
WITH (
PARTITIONING = ARRAY['payer_id']
)
AS

-- union of all the rate columns in the raw data, excluding raw per diems and percentages
{% set payer_raw_cols = cld_params.PAYER_RAW_COLS %}
{% set payer_transformation_cols = cld_params.PAYER_TRANSFORMATION_COLS %}

{% set hospital_raw_cols = cld_params.HOSPITAL_RAW_COLS %}
{% set hospital_transformation_cols = cld_params.HOSPITAL_TRANSFORMATION_COLS %}

{% set raw_columns_table = payer_raw_cols + hospital_raw_cols %}
{% set transformed_columns_table = payer_transformation_cols + hospital_transformation_cols %}

{% for col in raw_columns_table %}
SELECT
r.roid,
r.{{ col }} AS rate,
acc.{{ col }}_validation_score as accuracy_score,
'{{ col }}' as colname,
'rate' as rate_type,
r.provider_type,
r.payer_id
FROM {{ schema_name }}.tmp_int_combined_raw_{{ sub_version }} r
JOIN {{ schema_name }}.tmp_int_accuracy_raw_{{ sub_version }} acc
ON r.roid = acc.roid
AND r.payer_id = acc.payer_id
WHERE r.{{ col }} IS NOT NULL
AND substring(acc.billing_code, 1, 1) NOT IN ('J','Q')
{% if not loop.last %}UNION ALL{% endif %}
{% endfor %}

UNION ALL

{% for col in transformed_columns_table %}
SELECT
r.roid,
r.{{ col }} AS rate,
acc.{{ col }}_validation_score as accuracy_score,
'{{ col }}' as colname,
'rate' as rate_type,
r.provider_type,
r.payer_id
FROM {{ schema_name }}.tmp_int_transformations_{{ sub_version }} r
JOIN {{ schema_name }}.tmp_int_accuracy_raw_{{ sub_version }} acc
ON r.roid = acc.roid
AND r.payer_id = acc.payer_id
WHERE r.{{ col }} IS NOT NULL
AND (
substring(acc.billing_code, 1, 1) NOT IN ('J','Q')
OR
({{'drug_dosage_std' in col }})
)
{% if not loop.last %}UNION ALL{% endif %}
{% endfor %}

imputations_rc_carveouts.sql​

-- 🪟💲🌟
-- run_id: {{ run_id }}
-- task_name: {{ task_name }}
-- Table: {{ schema_name }}.tmp_int_imputations_rc_carveouts_{{ sub_version }}
-- Subversion: {{ sub_version }}


CREATE TABLE {{ schema_name }}.tmp_int_imputations_rc_carveouts_{{ sub_version }}
WITH (
PARTITIONING = ARRAY['payer_id']
)
AS

WITH
rc_family AS (
SELECT
rc_family, array_agg(rev_code) as rev_code_array
FROM {{ cld_params.Tables.RC_GROUP_FAMILY.value }}
WHERE rc_family != ''
GROUP BY 1
),
add_jaccard_index AS (
SELECT
i.payer_id,
i.network_id,
i.provider_id,
i.negotiated_type,
CASE
WHEN i.negotiated_type = 'percentage' THEN True
ELSE False
END as is_percentage,
i.rate,
i.billing_code_array,
i.n_codes,
rf.*,
array_intersect(i.billing_code_array, rf.rev_code_array) as intersection,
cardinality(array_intersect(i.billing_code_array, rf.rev_code_array)) as n_intersection,
cardinality(array_union(i.billing_code_array, rf.rev_code_array)) as n_union,
1.00 * cardinality(array_intersect(i.billing_code_array, rf.rev_code_array)) /
cardinality(array_union(i.billing_code_array, rf.rev_code_array)) as jaccard_index
FROM {{ schema_name }}.tmp_int_imputations_rc_global_{{ sub_version }} i
CROSS JOIN rc_family rf
)
SELECT
*,
row_number() over (
partition by payer_id, network_id, provider_id, is_percentage, rc_family
order by jaccard_index desc
) as rank
FROM add_jaccard_index
WHERE n_intersection > 0

imputations_rc_global.sql​

-- 🪟💲🌟
-- run_id: {{ run_id }}
-- task_name: {{ task_name }}
-- Table: {{ schema_name }}.tmp_int_imputations_rc_global_{{ sub_version }}
-- Subversion: {{ sub_version }}


CREATE TABLE {{ schema_name }}.tmp_int_imputations_rc_global_{{ sub_version }}
WITH (
PARTITIONING = ARRAY['payer_id']
)
AS

WITH

-------------------------------------
-- Inputs
-------------------------------------

-- Network Spine
network_spine AS (
SELECT
payer_id,
network_id,
network_name,
t.payer_data_network
FROM
{{ schema_name }}.tmp_ref_network_spine_{{ sub_version }},
UNNEST(payer_data_network) AS t(payer_data_network)
),

-- Provider Spine
provider_spine AS (
SELECT
npi_value AS npi,
provider_id
FROM
{{ schema_name }}.tmp_ref_provider_spine_{{ sub_version }},
UNNEST(npi) AS t(npi_value)
WHERE
provider_id IS NOT NULL
),

-- Core Rates Filtered to CLD Networks and Providers
-- Filtered to Inpatient RC bililng code types only
core_rates_subset AS (
SELECT
ns.payer_id,
ns.network_id,
ns.network_name,
ps.provider_id,
cr.provider_npi,
cr.negotiated_type,
ROUND(cr.negotiated_rate, 0) as rate,
cr.billing_code
FROM {{ payer_schema }}.core_rates cr
JOIN network_spine ns
ON ns.payer_id = cr.payer_id
AND ns.payer_data_network = cr.product_network_label
JOIN provider_spine ps
ON ps.npi = cr.provider_npi
WHERE billing_code_type = 'RC'
AND length(cr.billing_code) = 4
-- FILTER TO INPATIENT RCs
AND (
SUBSTRING(cr.billing_code, 1, 2) IN ('00', '01')
OR
SUBSTRING(cr.billing_code, 1, 3) IN ('020', '021')
)
AND REGEXP_LIKE(cr.billing_code, '^[0-9]+$')
),

hospital_data_subset AS (
SELECT
CAST(hr.payer_id AS VARCHAR),
ns.network_id,
ns.network_name,
hr.provider_id,
hr.provider_npi,
hr.contract_methodology as negotiated_type,
ROUND(hr.negotiated_dollar, 0) as rate,
LPAD(hr.revenue_code, 4, '0') as billing_code
FROM {{ hospital_schema }}.hospital_rates hr
JOIN {{ schema_name }}.tmp_ref_plan_bridge_{{ sub_version }} pb
ON pb.provider_id = hr.provider_id
AND pb.id = hr.id
JOIN network_spine ns
ON ns.payer_id = pb.payer_id
AND ns.network_name = pb.network_name
WHERE
hr.revenue_code IS NOT NULL
AND hr.negotiated_dollar IS NOT NULL
AND hr.contract_methodology = 'per diem'
UNION ALL
SELECT
CAST(hr.payer_id AS VARCHAR),
ns.network_id,
ns.network_name,
hr.provider_id,
hr.provider_npi,
'percentage' as negotiated_type,
ROUND(
CASE
WHEN hr.negotiated_percentage < 1 THEN hr.negotiated_percentage * 100
WHEN hr.negotiated_percentage > 1000 THEN hr.negotiated_percentage / 100
ELSE hr.negotiated_percentage
END, 0
) as rate,
LPAD(hr.revenue_code, 4, '0') as billing_code
FROM {{ hospital_schema }}.hospital_rates hr
JOIN {{ schema_name }}.tmp_ref_plan_bridge_{{ sub_version }} pb
ON pb.provider_id = hr.provider_id
AND pb.id = hr.id
JOIN network_spine ns
ON ns.payer_id = pb.payer_id
AND ns.network_name = pb.network_name
WHERE
hr.revenue_code IS NOT NULL
AND hr.negotiated_percentage IS NOT NULL
),

combined AS (
SELECT
*,
'core_rates' as source
FROM core_rates_subset
UNION ALL
SELECT
*,
'hospital_data' as source
FROM hospital_data_subset
),
combined_with_flags AS (
SELECT
*,
CASE
WHEN billing_code IN (
-- R&B
'0123', '0110', '0120', '0113', '0121', '0140', '0111', '0117',
'0127', '0129', '0119', '0149'
)
THEN True
ELSE False
END AS is_r_and_b,
CASE
WHEN billing_code IN (
-- ICU
'0203', '0204', '0200', '0206', '0207', '0208', '0201', '0202',
'0209'
)
THEN True
ELSE False
END AS is_icu
FROM combined
),

-- Count of distinct RC codes and distinct rates per payer, network, provider
code_totals AS (
SELECT
payer_id,
network_id,
provider_id,
array_agg(DISTINCT source) as source_array,
count(distinct rate) as n_distinct_rates,
count(distinct billing_code) as total_n_rc_codes
FROM combined_with_flags
GROUP BY 1,2,3
),

-- RC Stats
-- Count of distinct RC codes per payer, network, provider, negotiated-type, rate
df_rc_stats AS (
SELECT
cr.payer_id,
cr.network_id,
cr.network_name,
cr.provider_id,
cr.negotiated_type,
ROUND(cr.rate, 0) as rate,
tc.total_n_rc_codes,
tc.n_distinct_rates,
tc.source_array,
CASE
WHEN SUM(CASE WHEN cr.is_r_and_b = True THEN 1 ELSE 0 END) > 0
AND SUM(CASE WHEN cr.is_icu = True THEN 1 ELSE 0 END) >0
THEN True
ELSE False
END as is_global,

-- array of distinct codes sharing the common rate
array_sort(array_agg(DISTINCT cr.billing_code)) as billing_code_array,
-- count of distinct codes sharing the common rate
count(distinct billing_code) as n_codes

FROM combined_with_flags cr
JOIN code_totals tc
ON cr.payer_id = tc.payer_id
AND cr.network_id = tc.network_id
AND cr.provider_id = tc.provider_id
GROUP BY 1,2,3,4,5,6,7,8,9
)


SELECT
*,
-- it's possible there are ties (e.g. 2 rates with 50 codes if each billing code was reported twice with 2 rates)
-- let's take the higher rate (this usually happens when negotiated type is per diem and is probably
-- for cases when first few days are paid at a different rate; generally, first few days are a higher rate)
ROW_NUMBER() OVER (PARTITION BY payer_id, network_name, provider_id ORDER BY n_codes DESC, rate DESC) as rn
FROM df_rc_stats

imputations.sql​

-- 🪟💲🌟
-- run_id: {{ run_id }}
-- task_name: {{ task_name }}
-- Table: {{ schema_name }}.tmp_int_imputations_{{ n_chunk }}_{{ sub_version }}
-- Subversion: {{ sub_version }}
-- Payer IDs: {{ payer_ids }}
-- N Chunk: {{ n_chunk }}


{% macro aggregator(rate_col, partition_cols, base_alias) -%}
AVG({{ rate_col }}) OVER (PARTITION BY {{ partition_cols }}) AS {{ base_alias }},
COUNT({{ rate_col }}) OVER (PARTITION BY {{ partition_cols }}) AS {{ base_alias }}_n,
STDDEV({{ rate_col }}) OVER (PARTITION BY {{ partition_cols }}) AS {{ base_alias }}_std
{%- endmacro %}


{% macro aggregator_with_case(condition_cols, rate_col, partition_cols, base_alias) -%}
CASE
WHEN
{% for condition in condition_cols %}
{{ condition }} IS NOT NULL
{% if not loop.last %} AND {% endif %}
{% endfor %}

THEN
AVG({{ rate_col }}) OVER (PARTITION BY {{ partition_cols }})
ELSE NULL
END AS {{ base_alias }},
COUNT({{ rate_col }}) OVER (PARTITION BY {{ partition_cols }}) AS {{ base_alias }}_n,
STDDEV({{ rate_col }}) OVER (PARTITION BY {{ partition_cols }}) AS {{ base_alias }}_std
{%- endmacro %}


CREATE TABLE {{ schema_name }}.tmp_int_imputations_{{ n_chunk }}_{{ sub_version }}
WITH (
PARTITIONING = ARRAY['payer_id']
)
AS
WITH
provider_features AS (
SELECT
provider_id,
ein,
cbsa,
state,
taxonomy_grouping,
provider_type
FROM {{ schema_name }}.tmp_ref_provider_spine_{{ sub_version }}
),
code_features AS (
SELECT hcpcs, min(apc) as apc
FROM {{ cld_params.Tables.OPPS_ADDENDUM_B.value }}
WHERE apc != '0'
AND is_latest_start_effective_date = true
GROUP BY hcpcs
)
SELECT
distinct
ros.roid,
c.apc,

-- placeholder for aggreagte imputation tiers
ros.provider_type,
ros.payer_id

FROM {{ schema_name }}.tmp_rate_object_space_{{ sub_version }} ros
LEFT JOIN (
SELECT *
FROM {{ schema_name }}.tmp_int_imputations_long_rates_{{ sub_version }}
WHERE accuracy_score > {{ min_score }}
AND rate_type = 'rate'
) r
ON ros.roid = r.roid
AND ros.payer_id = r.payer_id
LEFT JOIN provider_features p
ON ros.provider_id = p.provider_id
LEFT JOIN code_features c
ON ros.billing_code = c.hcpcs
WHERE
ros.payer_id IN ({{ payer_ids }})

imputations_derived.sql​

-- 🪟💲🌟
-- run_id: {{ run_id }}
-- task_name: {{ task_name }}
-- Table: {{ schema_name }}.tmp_int_imputations_derived_{{ n_chunk }}_{{ sub_version }}
-- Subversion: {{ sub_version }}
-- Payer IDs: {{ payer_ids }}
-- N Chunk: {{ n_chunk }}

{%- set original_nontransformed_cols = cld_params.ORIGINAL_NONTRANSFORMED_COLS %}
{%- set gross_charge_cols = cld_params.GROSS_CHARGE_COLS + cld_params.GROSS_CHARGE_STATS %}
{%- set obstetrics_and_nursery = """
'768', '796', '797', '798', '805', '806', '807', '783', '784', '785', '786', '787', '788',
'795', '789', '792', '794', '791', '793', '790'
""" %}

CREATE TABLE {{ schema_name }}.tmp_int_imputations_derived_{{ n_chunk }}_{{ sub_version }}
WITH (
PARTITIONING = ARRAY['payer_id']
)
AS
WITH

------------------------------
-- INPUTS
------------------------------

-- CHARGES
charges AS (
SELECT
roid,
payer_id,
{% for col in gross_charge_cols %}
{{ col }}{% if not loop.last %}, {% endif %}
{% endfor %}
FROM {{ schema_name }}.tmp_int_combined_raw_{{ sub_version }}
WHERE payer_id IN ({{ payer_ids }})
),

-- RATES / PERCENTAGES
rates AS (
SELECT
r.*,
ros.network_id,
ns.network_name,
ros.provider_id,
ros.billing_code,
ros.billing_code_type,
ros.bill_type
FROM {{ schema_name }}.tmp_int_imputations_long_rates_{{ sub_version }} r
JOIN {{ schema_name }}.tmp_rate_object_space_{{ sub_version }} ros
ON r.roid = ros.roid
AND r.rate_type = 'rate'
JOIN {{ schema_name }}.tmp_ref_network_spine_{{ sub_version }} ns
ON ros.network_id = ns.network_id
WHERE r.rate_type = 'rate'
AND (
ros.provider_type LIKE '%Hospital%'
OR
ros.provider_type = 'ASC'
)
AND r.payer_id IN ({{ payer_ids }})
),
percentages AS (
SELECT
r.*,
ros.network_id,
ns.network_name,
ros.provider_id,
ros.billing_code,
ros.billing_code_type,
ros.bill_type
FROM {{ schema_name }}.tmp_int_imputations_long_rates_perc_{{ sub_version }} r
JOIN {{ schema_name }}.tmp_rate_object_space_{{ sub_version }} ros
ON r.roid = ros.roid
AND r.rate_type = 'percentage'
JOIN {{ schema_name }}.tmp_ref_network_spine_{{ sub_version }} ns
ON ros.network_id = ns.network_id
WHERE r.rate_type = 'percentage'
AND (
ros.provider_type LIKE '%Hospital%'
OR
ros.provider_type = 'ASC'
)
AND r.payer_id IN ({{ payer_ids }})
),


-- UTILS / References
cms_msdrg_all_years AS (
SELECT msdrg, weights, fy
FROM {{ cld_params.Tables.MSDRG_WEIGHTS_TABLE.value }}
WHERE fy = '{{ cld_params.MSDRG_WEIGHTS_YEAR_RANGE_END }}'
),
codes AS (
SELECT
payer_id,
network_id,
provider_id,
bill_type,
billing_code_type,
billing_code
FROM {{ schema_name }}.tmp_rate_object_space_{{ sub_version }}
),

------------------------------
-- IMPUTATION: MSDRG Base Rates (from separate table)
------------------------------
msdrg_base_rates AS (
SELECT * FROM {{ schema_name }}.tmp_int_msdrg_base_rates_{{ sub_version }}
),

------------------------------
-- IMPUTATION: MSDRG Base Rates IN MRF
-- 1. Get MS-DRG Counts of Each Payer-Network-Provider
-- 2. Get MS-DRG Counts of Each Payer-Network-Provider-Rate
-- 3. Get the most frequent rate for each Payer-Network-Provider, with frequency and total code counts
------------------------------
msdrg_counts AS (
SELECT
rates.payer_id,
rates.network_id,
rates.provider_id,
count(distinct rates.roid) as total
FROM rates
WHERE rates.billing_code_type = 'MS-DRG'
AND colname NOT LIKE '%allowed_amount%'
GROUP BY 1,2,3
),
msdrg_rates_mrf_ranked AS (
SELECT
rates.payer_id,
rates.network_id,
rates.provider_id,
rates.rate,
count(distinct roid) as freq,
ROW_NUMBER() OVER (
PARTITION BY
rates.payer_id, rates.network_id, rates.provider_id
ORDER BY COUNT(distinct roid) DESC
) AS rnk
FROM rates
WHERE rates.billing_code_type = 'MS-DRG'
AND colname NOT LIKE '%allowed_amount%'
GROUP BY 1,2,3,4
),
msdrg_base_rates_mrf AS (
SELECT
r.payer_id,
r.network_id,
r.provider_id,
c.bill_type,
c.billing_code,
c.billing_code_type,
r.rate,
r.freq,
m.total
FROM msdrg_rates_mrf_ranked r
JOIN msdrg_counts m
ON r.payer_id = m.payer_id
AND r.network_id = m.network_id
AND r.provider_id = m.provider_id
JOIN codes c
ON r.payer_id = c.payer_id
AND r.network_id = c.network_id
AND r.provider_id = c.provider_id
WHERE rnk = 1
AND billing_code_type = 'MS-DRG'
AND bill_type = 'Inpatient'
),

------------------------------
-- IMPUTATION: OPG GROUPER Base Rates
-- loaded from: https://github.com/turquoisehealth/labps/blob/master/labps/data/contracts/groupers.py
-- 1. Get Counts of Each Payer-Provider-Grouper
-- 2. Get Counts of Each Payer-Provider-Grouper-Rate
-- 3. Get the most frequent rate for each Payer-Network-Provider, with frequency and total code counts
------------------------------
opg_schedules AS (
SELECT
'643' AS payer_id,
NULL AS network_name,
billing_code, billing_code_type, grouper
FROM {{ cld_params.Tables.OPG_SCHEDULES_UNITED.value }}
UNION ALL
SELECT
'7' AS payer_id,
NULL AS network_name,
billing_code, billing_code_type, grouper
FROM {{ cld_params.Tables.OPG_SCHEDULES_AETNA.value }}
UNION ALL
SELECT
'76' AS payer_id,
NULL AS network_name,
billing_code, billing_code_type, grouper
FROM {{ cld_params.Tables.OPG_SCHEDULES_CIGNA.value }}
UNION ALL
SELECT
'403' AS payer_id,
NULL AS network_name,
billing_code, billing_code_type, grouper
FROM {{ cld_params.Tables.OPG_SCHEDULES_BCBS_CA.value }}
UNION ALL
SELECT
'44' AS payer_id,
NULL AS network_name,
billing_code, billing_code_type, grouper
FROM {{ cld_params.Tables.OPG_SCHEDULES_CAREFIRST.value }}
UNION ALL
SELECT
'42' AS payer_id,
'CA BLUE CROSS PPO' AS network_name,
billing_code, billing_code_type, grouper
FROM {{ cld_params.Tables.OPG_SCHEDULES_ANTHEM_CA.value }}
UNION ALL
SELECT
'388' as payer_id,
NULL as network_name,
billing_code, billing_code_type, grouper
FROM {{ cld_params.Tables.OPG_SCHEDULES_FLORIDA_BLUE.value }}
),
opg_total_possible AS (
SELECT
payer_id,
network_name,
grouper,
count(distinct billing_code) as total
FROM opg_schedules
GROUP BY 1,2,3
),
opg_grouper_counts AS (
SELECT
rates.payer_id,
rates.network_id,
rates.provider_id,
os.grouper,
count(distinct roid) as total
FROM rates
JOIN opg_schedules os
ON rates.payer_id = os.payer_id
AND rates.billing_code = os.billing_code
AND rates.billing_code_type = os.billing_code_type
AND rates.bill_type = 'Outpatient'
AND (
os.network_name IS NULL
OR os.network_name = rates.network_name
)
WHERE colname NOT LIKE '%allowed_amount%'
AND colname NOT LIKE '%perc_%'
GROUP BY 1,2,3,4
),
opg_grouper_ranked AS (
SELECT
rates.payer_id,
rates.network_id,
rates.provider_id,
os.grouper,
ogc.total,
ot.total as total_possible,
ROUND(rates.rate, -1) as rate,
count(distinct roid) as freq,
ROW_NUMBER() OVER (
PARTITION BY
rates.payer_id, rates.network_id, rates.provider_id, os.grouper
ORDER BY COUNT(*) DESC
) AS rnk
FROM rates
JOIN opg_schedules os
ON rates.payer_id = os.payer_id
AND rates.billing_code = os.billing_code
AND rates.billing_code_type = os.billing_code_type
AND rates.bill_type = 'Outpatient'
AND (
os.network_name IS NULL
OR os.network_name = rates.network_name
)
JOIN opg_grouper_counts ogc
ON rates.payer_id = ogc.payer_id
AND rates.network_id = ogc.network_id
AND rates.provider_id = ogc.provider_id
AND os.grouper = ogc.grouper
JOIN opg_total_possible ot
ON rates.payer_id = ot.payer_id
AND os.grouper = ot.grouper
AND (
ot.network_name IS NULL
OR ot.network_name = rates.network_name
)
WHERE colname NOT LIKE '%allowed_amount%'
AND colname NOT LIKE '%perc_%'
GROUP BY 1,2,3,4,5,6,7
),
opg_base_rates AS (
SELECT
r.payer_id,
r.network_id,
r.provider_id,
r.grouper,
'Outpatient' as bill_type,
s.billing_code_type,
s.billing_code,
r.rate,
r.freq,
r.total,
r.total_possible
FROM opg_grouper_ranked r
JOIN opg_schedules s
ON r.payer_id = s.payer_id
AND r.grouper = s.grouper
WHERE rnk = 1
),

------------------------------
-- IMPUTATION: MSDRG Base Percentage Rates
-- 1. Get Percentage-Type MS-DRG Counts of Each Payer-Network-Provider
-- 2. Get Percentage-Type MS-DRG Counts of Each Payer-Network-Provider-Grouper
-- 3. Get the most frequent rate for each Payer-Network-Provider, with frequency and total code counts
------------------------------
msdrg_percentage_counts AS (
SELECT
payer_id,
network_id,
provider_id,
count(distinct roid) as total
FROM percentages
WHERE billing_code_type = 'MS-DRG'
GROUP BY 1,2,3
),
msdrg_percentage_ranked AS (
SELECT
payer_id,
network_id,
provider_id,
rate,
count(distinct roid) as freq,
ROW_NUMBER() OVER (
PARTITION BY
payer_id, provider_id
ORDER BY COUNT(distinct roid) DESC
) AS rnk
FROM percentages
WHERE billing_code_type = 'MS-DRG'
GROUP BY 1,2,3,4
),
msdrg_percentage_base_rates AS (
SELECT
r.payer_id,
r.network_id,
r.provider_id,
c.bill_type,
c.billing_code,
c.billing_code_type,
r.rate,
r.freq,
m.total
FROM msdrg_percentage_ranked r
JOIN msdrg_percentage_counts m
ON r.payer_id = m.payer_id
AND r.network_id = m.network_id
AND r.provider_id = m.provider_id
JOIN codes c
ON r.payer_id = c.payer_id
AND r.network_id = c.network_id
AND r.provider_id = c.provider_id
WHERE rnk = 1
AND billing_code_type = 'MS-DRG'
AND bill_type = 'Inpatient'
),

------------------------------
-- IMPUTATION: Outpatient Percentage Base Rates
-- 1. Get Percentage-Type HCPCS Counts of Each Payer-Network-Provider
-- 2. Get Percentage-Type HCPCS Counts of Each Payer-Network-Provider-Grouper
-- 3. Get the most frequent rate for each Payer-Network-Provider, with frequency and total code counts
------------------------------
outpatient_percentage_counts AS (
SELECT
payer_id,
network_id,
provider_id,
count(distinct roid) as total
FROM percentages
WHERE billing_code_type = 'HCPCS'
AND bill_type = 'Outpatient'
GROUP BY 1,2,3
),
outpatient_percentage_ranked AS (
SELECT
payer_id,
network_id,
provider_id,
rate,
count(distinct roid) as freq,
ROW_NUMBER() OVER (
PARTITION BY
payer_id, provider_id
ORDER BY COUNT(distinct roid) DESC
) AS rnk
FROM percentages
WHERE billing_code_type = 'HCPCS'
AND bill_type = 'Outpatient'
GROUP BY 1,2,3,4
),
outpatient_percentage_base_rates AS (
SELECT
r.payer_id,
r.network_id,
r.provider_id,
c.bill_type,
c.billing_code,
c.billing_code_type,
r.rate,
r.freq,
m.total
FROM outpatient_percentage_ranked r
JOIN outpatient_percentage_counts m
ON r.payer_id = m.payer_id
AND r.network_id = m.network_id
AND r.provider_id = m.provider_id
JOIN codes c
ON r.payer_id = c.payer_id
AND r.network_id = c.network_id
AND r.provider_id = c.provider_id
WHERE rnk = 1
AND billing_code_type = 'HCPCS'
AND bill_type = 'Outpatient'
),

------------------------------
-- IMPUTATION: RC Global Inpatient Percentages
------------------------------
rc_global_inpatient_percentage AS (
SELECT
payer_id,
network_id,
provider_id,
billing_code_array,
rate
FROM {{ schema_name }}.tmp_int_imputations_rc_global_{{ sub_version }}
WHERE rn = 1
AND (negotiated_type = 'percentage' AND is_global = True)
),

------------------------------
-- IMPUTATION: RC Global Inpatient Per Diems
------------------------------
rc_global_inpatient_per_diem AS (
SELECT
payer_id,
network_id,
provider_id,
billing_code_array,
rate
FROM {{ schema_name }}.tmp_int_imputations_rc_global_{{ sub_version }}
WHERE rn = 1
AND n_codes > 30
AND negotiated_type = 'per diem'
),

------------------------------
-- IMPUTATION: RC Family
------------------------------

msdrg_rc_mappings AS (
SELECT
distinct rc_family, msdrg
FROM {{ cld_params.Tables.RC_MSDRG_AFFILIATIONS.value }}
WHERE priority_rank = '1'
),
rc_family_percentages AS (
SELECT
rc.payer_id,
rc.network_id,
rc.provider_id,
m.msdrg as billing_code,
CASE
WHEN m.rc_family = 'RB' THEN 'Room & Board'
WHEN m.rc_family = 'OB' THEN 'Obstetrics'
ELSE m.rc_family
END as rc_family,
rc.rate
FROM {{ schema_name }}.tmp_int_imputations_rc_carveouts_{{ sub_version }} rc
JOIN msdrg_rc_mappings m
ON rc.rc_family = m.rc_family
WHERE rank = 1
AND jaccard_index > 0.25
AND negotiated_type = 'percentage'
),
rc_family_per_diem AS (
SELECT
rc.payer_id,
rc.network_id,
rc.provider_id,
m.msdrg as billing_code,
CASE
WHEN m.rc_family = 'RB' THEN 'Room & Board'
WHEN m.rc_family = 'OB' THEN 'Obstetrics'
ELSE m.rc_family
END as rc_family,
rc.rate
FROM {{ schema_name }}.tmp_int_imputations_rc_carveouts_{{ sub_version }} rc
JOIN msdrg_rc_mappings m
ON rc.rc_family = m.rc_family
WHERE rank = 1
AND jaccard_index > 0.25
AND negotiated_type != 'percentage'
),

imputation_stats AS (

SELECT
distinct
r.roid,

------------------------------
-- ms-drg base rate
------------------------------
CASE
WHEN
m.freq > 10
AND
m.freq_percent > 0.9
AND
m.billing_code NOT IN ({{ obstetrics_and_nursery }})
THEN m.base_rate * m.weights
ELSE NULL
END AS msdrg_base_rate_mult_cms_weight,
m.weights AS msdrg_cms_weight,
m.fy AS msdrg_cms_fy,
m.base_rate AS msdrg_base_rate,
m.freq AS msdrg_n_freq,
m.total AS msdrg_n_total,

------------------------------
-- ms-drg base rate in MRF
------------------------------
CASE
WHEN
mrf_m.freq > 100
AND (
(
{% for col in cld_params.HOSPITAL_RAW_COLS + cld_params.PAYER_RAW_COLS %}
mrf_m.rate = r.{{ col }}
{% if not loop.last %}OR{% endif %}
{% endfor %}
)
OR
(
{% for col in cld_params.HOSPITAL_RAW_COLS + cld_params.PAYER_RAW_COLS %}
r.{{ col }} IS NULL
{% if not loop.last %}AND{% endif %}
{% endfor %}
AND
{% for col in cld_params.HOSPITAL_TRANSFORMATION_COLS + cld_params.PAYER_TRANSFORMATION_COLS %}
tr.{{ col }} IS NULL
{% if not loop.last %}AND{% endif %}
{% endfor %}
)
)
AND
mrf_m.billing_code NOT IN ({{ obstetrics_and_nursery }})

THEN mrf_m.rate * cms.weights
ELSE NULL
END AS msdrg_mrf_base_rate_mult_cms_weight,
cms.weights AS msdrg_mrf_cms_weight,
cms.fy AS msdrg_mrf_cms_fy,
mrf_m.rate AS msdrg_mrf_base_rate,
mrf_m.freq AS msdrg_mrf_n_freq,
mrf_m.total AS msdrg_mrf_n_total,

------------------------------
-- ms-drg percentage base rate
------------------------------
CASE
WHEN
mp.freq > 50
AND
1.00 * mp.freq / mp.total > 0.9
THEN .01 * mp.rate
ELSE NULL
END AS msdrg_percentage_effective,
mp.rate AS msdrg_percentage_candidate_base_rate,
mp.freq AS msdrg_percentage_n_freq,
mp.total AS msdrg_percentage_n_total,

------------------------------
-- RC Global Inpatient PoC Base Rate
------------------------------

0.01 * rcp.rate AS rc_global_inpatient_percentage_effective,
rcp.rate AS rc_global_inpatient_base_rate,
rcp.billing_code_array AS rc_global_inpatient_billing_code_array,

------------------------------
-- RC Global Inpatient Per Diem Base Rate
------------------------------

rcpd.rate * tr.medicare_glos as rc_global_per_diem_mult_glos,
rcpd.rate AS rc_global_inpatient_per_diem_base_rate,

------------------------------
-- RC Family PoC Rate
------------------------------
0.01 * rc_family_percentages.rate AS rc_family_percentage_effective,
rc_family_percentages.rate AS rc_family_percentage_rate,
ARRAY[rc_family_percentages.rc_family] AS rc_family_perc,

------------------------------
-- RC Family Per Diem Rate
------------------------------
rc_family_per_diem.rate * tr.medicare_glos as rc_family_per_diem_mult_glos,
rc_family_per_diem.rate AS rc_family_per_diem_rate,
ARRAY[rc_family_per_diem.rc_family] AS rc_family_per_diem,

------------------------------
-- OPG base rate
------------------------------
os.grouper AS opg,
CASE
WHEN
1.00 * o.freq / o.total > 0.8
-- AND o.total > 15
-- AND o.total_possible > 40
THEN o.rate
ELSE NULL
END AS opg_base_rate,
o.rate AS opg_candidate_base_rate,
o.freq AS opg_n_freq,
o.total AS opg_n_total,
o.total_possible AS opg_n_total_possible,


------------------------------
-- OP percentage base rate
------------------------------
CASE
WHEN
op.freq > 200
AND
1.00 * op.freq / op.total > 0.9
THEN .01 * op.rate
ELSE NULL
END AS op_percentage_effective,
op.rate AS op_percentage_candidate_base_rate,
op.freq AS op_percentage_n_freq,
op.total AS op_percentage_n_total,


------------------------------
-- ROS fields to help QA and for partitioning
------------------------------
r.provider_id,
r.billing_code_type,
r.billing_code,
r.provider_type,
r.network_id,
r.payer_id

FROM {{ schema_name }}.tmp_int_combined_raw_{{ sub_version }} r
LEFT JOIN {{ schema_name }}.tmp_int_transformations_{{ sub_version }} tr
ON r.roid = tr.roid
AND r.payer_id = tr.payer_id

-- MS-DRG GLOBAL
LEFT JOIN msdrg_base_rates m
ON r.payer_id = m.payer_id
AND r.network_id = m.network_id
AND r.provider_id = m.provider_id
AND r.billing_code = m.billing_code
AND r.billing_code_type = m.billing_code_type
AND r.bill_type = m.bill_type
LEFT JOIN msdrg_base_rates_mrf mrf_m
ON r.payer_id = mrf_m.payer_id
AND r.network_id = mrf_m.network_id
AND r.provider_id = mrf_m.provider_id
AND r.bill_type = mrf_m.bill_type
AND r.billing_code = mrf_m.billing_code
AND r.billing_code_type = mrf_m.billing_code_type
LEFT JOIN msdrg_percentage_base_rates mp
ON r.payer_id = mp.payer_id
AND r.network_id = mp.network_id
AND r.provider_id = mp.provider_id
AND r.bill_type = mp.bill_type
AND r.billing_code = mp.billing_code
AND r.billing_code_type = mp.billing_code_type

-- OPG GROUPER
LEFT JOIN opg_schedules os
ON r.payer_id = os.payer_id
AND r.billing_code = os.billing_code
AND r.billing_code_type = os.billing_code_type
AND r.bill_type = 'Outpatient'
LEFT JOIN opg_base_rates o
ON r.payer_id = o.payer_id
and r.network_id = o.network_id
AND r.provider_id = o.provider_id
AND r.billing_code = o.billing_code
AND r.billing_code_type = o.billing_code_type
AND r.bill_type = o.bill_type

-- OP GLOBAL
LEFT JOIN outpatient_percentage_base_rates op
ON r.payer_id = op.payer_id
AND r.network_id = op.network_id
AND r.provider_id = op.provider_id
AND r.bill_type = op.bill_type
AND r.billing_code = op.billing_code
AND r.billing_code_type = op.billing_code_type

-- RC GLOBAL
LEFT JOIN rc_global_inpatient_percentage rcp
ON r.payer_id = rcp.payer_id
AND r.network_id = rcp.network_id
AND r.provider_id = rcp.provider_id
AND r.bill_type = 'Inpatient'
AND r.billing_code_type = 'MS-DRG'
LEFT JOIN rc_global_inpatient_per_diem rcpd
ON r.payer_id = rcpd.payer_id
AND r.network_id = rcpd.network_id
AND r.provider_id = rcpd.provider_id
AND r.bill_type = 'Inpatient'
AND r.billing_code_type = 'MS-DRG'

-- RC FAMILY
LEFT JOIN rc_family_percentages
ON r.payer_id = rc_family_percentages.payer_id
AND r.network_id = rc_family_percentages.network_id
AND r.provider_id = rc_family_percentages.provider_id
AND r.billing_code = rc_family_percentages.billing_code
AND r.billing_code_type = 'MS-DRG'
LEFT JOIN rc_family_per_diem
ON r.payer_id = rc_family_per_diem.payer_id
AND r.network_id = rc_family_per_diem.network_id
AND r.provider_id = rc_family_per_diem.provider_id
AND r.billing_code = rc_family_per_diem.billing_code
AND r.billing_code_type = 'MS-DRG'

LEFT JOIN cms_msdrg_all_years cms
ON r.billing_code = cms.msdrg
AND r.billing_code_type = 'MS-DRG'

WHERE
r.billing_code_type IN ('MS-DRG', 'HCPCS')
AND r.payer_id IN ({{ payer_ids }})

)


SELECT
r.roid,
{% for col in gross_charge_cols %}
c.{{ col }},
{% endfor %}

{{ columns_and_stats_derived }}

------------------------------
-- ROS fields to help QA and for partitioning
------------------------------
r.provider_id,
r.billing_code_type,
r.billing_code,
r.provider_type,
r.network_id,
r.payer_id
FROM imputation_stats r
LEFT JOIN charges c
ON r.roid = c.roid
AND r.payer_id = c.payer_id