Skip to content

Commit b89de2e

Browse files
committed
Last fixes for top ten rus12 tables
1 parent f726442 commit b89de2e

File tree

9 files changed

+86
-27
lines changed

9 files changed

+86
-27
lines changed

dbt/models/rus12/core_rus12__yearly_loans/schema.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,13 @@ version: 2
22
sources:
33
- name: pudl
44
tables:
5-
- name: core_rus12__yearly_renewable_plants
5+
- name: core_rus12__yearly_loans
66
data_tests:
77
- expect_columns_not_all_null
8+
- expect_col1_greater_or_equal_to_col2:
9+
arguments:
10+
col1: loan_original_amount
11+
col2: loan_balance
812
columns:
913
- name: report_date
1014
- name: borrower_id_rus
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
{% test expect_col1_greater_or_equal_to_col2(model, col1, col2) %}
2+
3+
select *
4+
from {{ model }}
5+
where
6+
{{ col1 }} is not null
7+
and {{ col2 }} is not null
8+
and {{ col1 }} < {{ col2 }}
9+
10+
{% endtest %}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{% test expect_positive_values(model, positive_columns) %}
2+
3+
select *
4+
from {{ model }}
5+
where
6+
{% for col in positive_columns %}
7+
(
8+
{{ col }} is not null
9+
and {{ col }} < 0
10+
)
11+
{% if not loop.last %} or {% endif %}
12+
{% endfor %}
13+
14+
{% endtest %}

dbt/tests/data_tests/generic_tests/schema.yml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,3 +289,22 @@ macros:
289289
- name: max_differing_fuel_rate
290290
type: float
291291
description: Maximum acceptable fraction of units mapping to multiple fuels. (optional; default 0.01)
292+
293+
- name: test_expect_col1_greater_or_equal_to_col2
294+
description: >
295+
Check that values in one column are greater than or equal to values in another column.
296+
arguments:
297+
- name: col1
298+
type: string
299+
description: The name of the first column, which should have values greater than or equal to the second column.
300+
- name: col2
301+
type: string
302+
description: The name of the second column, which should have values less than or equal to the first column.
303+
304+
- name: test_expect_positive_values
305+
description: >
306+
Check that values in a column are positive.
307+
arguments:
308+
- name: positive_columns
309+
type: list
310+
description: A list of the names of columns that should have positive values.

migrations/versions/122a38524571_add_next_rus12_tables.py renamed to migrations/versions/0c142bdd083d_add_next_four_rus12_tables.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
1-
"""add next rus12 tables
1+
"""Add next four rus12 tables
22
3-
Revision ID: 122a38524571
3+
Revision ID: 0c142bdd083d
44
Revises: 86e3a75cf325
5-
Create Date: 2026-02-09 15:59:36.418333
5+
Create Date: 2026-02-10 18:10:58.410575
66
77
"""
88
from alembic import op
99
import sqlalchemy as sa
1010

1111

1212
# revision identifiers, used by Alembic.
13-
revision = '122a38524571'
13+
revision = '0c142bdd083d'
1414
down_revision = '86e3a75cf325'
1515
branch_labels = None
1616
depends_on = None
@@ -26,16 +26,16 @@ def upgrade() -> None:
2626
sa.Column('loan_maturity_date', sa.Date(), nullable=True, comment='The date on which a loan is scheduled to be fully paid.'),
2727
sa.Column('loan_original_amount', sa.Float(), nullable=True, comment='The original amount of a loan.'),
2828
sa.Column('loan_balance', sa.Float(), nullable=True, comment='The amount of money still owned on a loan at the end of the reporting year.'),
29-
sa.Column('for_rural_development', sa.Boolean(), nullable=True, comment='Whether or not the investment is for rural development.')
29+
sa.Column('for_rural_development', sa.Boolean(), nullable=True, comment='Whether or not the investment is for rural development. This includes investments in any/all types of projects or products that were made to improve the economy and/or quality of life in the specified area.')
3030
)
3131
op.create_table('core_rus12__yearly_plant_labor',
3232
sa.Column('report_date', sa.Date(), nullable=True, comment='Date reported.'),
3333
sa.Column('borrower_id_rus', sa.Text(), nullable=True, comment="Unique identifier of RUS (Rural Utilities Service) borrower. These ID's are structured as: two character state acronyms followed by four digits."),
3434
sa.Column('borrower_name_rus', sa.Text(), nullable=True, comment='The name of the RUS (Rural Utilities Service) borrower.'),
3535
sa.Column('plant_name_rus', sa.Text(), nullable=True, comment='Name of the plant as reported to RUS.'),
3636
sa.Column('plant_type', sa.Enum('Combined Cycle', 'Steam', 'Hydro', 'Internal Combustion', 'Nuclear'), nullable=True, comment='Type of plant.'),
37-
sa.Column('employees_full_time_num', sa.Integer(), nullable=True, comment='Number of full time employees.'),
38-
sa.Column('employees_part_time_num', sa.Integer(), nullable=True, comment='Number of part time employees.'),
37+
sa.Column('employees_full_time_num', sa.Integer(), nullable=True, comment='Number of employees hired full-time for normal operations of the system.'),
38+
sa.Column('employees_part_time_num', sa.Integer(), nullable=True, comment='Number employees regularly employed on a part-time basis. Exclude employees hired for short periods of time to complete special jobs.'),
3939
sa.Column('employee_hours_worked_total', sa.Float(), nullable=True, comment='Total number of hours worked by employees.'),
4040
sa.Column('payroll_maintenance', sa.Float(), nullable=True, comment='The amount of payroll spent on plant maintenance.'),
4141
sa.Column('payroll_operations', sa.Float(), nullable=True, comment='The amount of payroll spent on plant operations.'),
@@ -45,7 +45,7 @@ def upgrade() -> None:
4545
sa.Column('report_date', sa.Date(), nullable=False, comment='Date reported.'),
4646
sa.Column('borrower_id_rus', sa.Text(), nullable=False, comment="Unique identifier of RUS (Rural Utilities Service) borrower. These ID's are structured as: two character state acronyms followed by four digits."),
4747
sa.Column('borrower_name_rus', sa.Text(), nullable=True, comment='The name of the RUS (Rural Utilities Service) borrower.'),
48-
sa.Column('source_of_energy', sa.Text(), nullable=False, comment='The source of energy (not plant type).'),
48+
sa.Column('source_of_energy', sa.Enum('energy_available_for_sale', 'energy_used_by_borrower', 'purchased_power', 'total_energy_accounted', 'total_energy_losses', 'total_plant', 'total_sales', 'delivered_out_of_system_gross', 'net_interchange', 'received_into_system_gross', 'delivered_out_of_system_wheeling', 'net_energy_wheeled', 'received_into_system_wheeling', 'energy_furnished_without_charge'), nullable=False, comment='The source of energy (not plant type).'),
4949
sa.Column('net_energy_received_mwh', sa.Float(), nullable=True, comment='The net amount of energy received into the system.'),
5050
sa.Column('cost', sa.Float(), nullable=True, comment='Cost value.'),
5151
sa.PrimaryKeyConstraint('report_date', 'borrower_id_rus', 'source_of_energy', name=op.f('pk_core_rus12__yearly_sources_and_distribution'))

src/pudl/metadata/enums.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -981,7 +981,7 @@
981981
"Biomass - wood",
982982
"Methane - waste",
983983
"Hybrid",
984-
"Solar - photvoltaic",
984+
"Solar - photovoltaic",
985985
"Solar - thermal generation",
986986
"Other",
987987
]

src/pudl/metadata/fields.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
REVENUE_CLASSES_EIA176,
5151
REVENUE_CLASSES_EIA861,
5252
RTO_CLASSES,
53-
# SOURCE_OF_ENERGY_RUS12,
53+
SOURCE_OF_ENERGY_RUS12,
5454
SUBDIVISION_CODES_ISO3166,
5555
TECH_CLASSES,
5656
TECH_DESCRIPTIONS,
@@ -2663,11 +2663,11 @@
26632663
},
26642664
"employees_full_time_num": {
26652665
"type": "integer",
2666-
"description": "Number of full time employees.",
2666+
"description": "Number of employees hired full-time for normal operations of the system.",
26672667
},
26682668
"employees_part_time_num": {
26692669
"type": "integer",
2670-
"description": "Number of part time employees.",
2670+
"description": "Number employees regularly employed on a part-time basis. Exclude employees hired for short periods of time to complete special jobs.",
26712671
},
26722672
"employee_hours_worked_total": {
26732673
"type": "number",
@@ -3320,7 +3320,12 @@
33203320
},
33213321
"for_rural_development": {
33223322
"type": "boolean",
3323-
"description": "Whether or not the investment is for rural development.",
3323+
"description": (
3324+
"Whether or not the investment is for rural development. "
3325+
"This includes investments in any/all types of projects or "
3326+
"products that were made to improve the economy and/or quality "
3327+
"of life in the specified area."
3328+
),
33243329
},
33253330
"forecast_year": {
33263331
"type": "integer",
@@ -7092,7 +7097,7 @@
70927097
"source_of_energy": {
70937098
"type": "string",
70947099
"description": "The source of energy (not plant type).",
7095-
# "constraints": {"enum": SOURCE_OF_ENERGY_RUS12},
7100+
"constraints": {"enum": SOURCE_OF_ENERGY_RUS12},
70967101
},
70977102
"source_url": {
70987103
"type": "string",

src/pudl/metadata/resources/rus12.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -317,9 +317,7 @@
317317
"usage_warnings": ["experimental_wip"],
318318
"additional_source_text": "(Parts D, E, F, G - Section B)",
319319
"additional_details_text": (
320-
"There are also some plants (at least one, Walter Scott) that are reported "
321-
"by multiple borrowers (IA0084 and IA0083) with the same values. "
322-
"Also note the lack of plant_type pre-2009 leading to a lack of "
320+
"Note the lack of plant_type pre-2009 leading to a lack of "
323321
"reliable primary keys."
324322
),
325323
},

src/pudl/transform/rus12.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,12 @@ def core_rus12__yearly_renewable_plants(raw_rus12__renewable_plants):
9393
df = rus.convert_units(
9494
df, old_unit="thousand_dollars", new_unit=None, converter=1000
9595
)
96+
97+
# Fix typo in primary_renewable_fuel_type values
98+
df.primary_renewable_fuel_type = df.primary_renewable_fuel_type.replace(
99+
{"Solar - photvoltaic": "Solar - photovoltaic"}
100+
)
101+
96102
# TODO: Make primary_renewable_fuel_type look like renewable fuels from other sources.
97103

98104
return df
@@ -141,12 +147,6 @@ def core_rus12__yearly_loans(raw_rus12__loans):
141147
df.loan_maturity_date = pd.to_datetime(df.loan_maturity_date, format="mixed")
142148
df.for_rural_development = df.for_rural_development.astype("boolean")
143149

144-
# Make sure loan balance isn't more than original loan amount
145-
loan_diff = df.loan_original_amount - df.loan_balance
146-
assert len(loan_diff[loan_diff < 0]) == 0, (
147-
"Loan balance exceeds original loan amount for some loans."
148-
)
149-
150150
return df
151151

152152

@@ -155,6 +155,18 @@ def core_rus12__yearly_plant_labor(raw_rus12__plant_labor):
155155
"""Transform the raw_rus12__plant_labor table."""
156156
df = rus.early_transform(raw_df=raw_rus12__plant_labor)
157157

158+
# Remove duplicate Walter Scott plant entries.
159+
exclude_cols = ["borrower_id_rus", "borrower_name_rus"]
160+
dupe_mask = (
161+
df["borrower_id_rus"].isin(["IA0083", "IA0084"])
162+
& (df["plant_name_rus"] == "Walter Scott")
163+
& df.drop(columns=exclude_cols).duplicated(keep=False)
164+
& (
165+
df["borrower_id_rus"] == "IA0083"
166+
) # dropping the IA0083 and keeping the IA0084 so both borrowers show up in the data.
167+
)
168+
df = df.loc[~dupe_mask]
169+
158170
# Test payroll_total column so can remove it in the schema
159171
payroll_cols = [
160172
"payroll_maintenance",
@@ -212,9 +224,6 @@ def core_rus12__yearly_sources_and_distribution_by_plant_type(
212224
converter=0.001,
213225
)
214226

215-
# # Make sure plant num is only int values and then convert to integer
216-
# assert (df.plant_num.dropna() % 1 == 0).all()
217-
# df.plant_num = df.plant_num.astype("Int64")
218227
# TODO: add is_total column.
219228
return df
220229

0 commit comments

Comments
 (0)