import pandas as pd
import plotly.express as px
import numpy as np
import math
import requests
from urllib.request import urlopen
import json
import os
import matplotlib.pyplot as plt

from poverty_interpolation_utils import DataDownloader, CountyData, CountyPlotter, CensusData, SamplingMethods, IDWInterpolation, ErrorCalculator, ErrorVisualizer
censusVar = 'DP03_0120PE'


# Instantiate the Classes
data_downloader = DataDownloader()
county_data = CountyData(data_downloader)
census_data = CensusData(county_data)
county_plotter = CountyPlotter()


# Download the necessary data using the instances:
data_downloader.download_fips_data()
data_downloader.download_county_centers()
county_plotter.download_geojson()


# Get Census Datasets
censusData2015 = census_data.getCensusPovertyDataByYear_ne("2015")
censusData2020 = census_data.getCensusPovertyDataByYear_ne("2020")
censusData2020.head()


import plotly.express as px
import plotly.io as pio
from IPython.display import display, Image

print("Percent of Houses Under the Poverty Line Across Counties in Nebraska")
fig = county_plotter.plotCountyData_ne(censusData2015)
img_bytes = pio.to_image(fig, format="png")  # Convert the figure to an image in memory (PNG format)
Image(img_bytes)

Percent of Houses Under the Poverty Line Across Counties in Nebraska


# Instantiate the `SamplingMethods` class:
sampling_methods = SamplingMethods()

# Modify the code to use the class instance and its methods:
sample_1 = sampling_methods.getHalfCounties_random(censusData2020)
sample_2 = sampling_methods.getHalf_highestPovCounties(censusData2015, censusData2020)
sample_3 = sampling_methods.get25PercLowestPov_25PercHighestPov(censusData2015, censusData2020)


from IPython.display import Image, display

fig = county_plotter.plotCountyData_ne(sample_1)
img_bytes = pio.to_image(fig, format="png")  # Convert the figure to an image in memory (PNG format)
Image(img_bytes)


from IPython.display import Image, display
print("Linear Interpolation")
display(Image("https://gisgeography.com/wp-content/uploads/2016/05/Linear-Interpolation-2.png"))

Linear Interpolation


print("Inverse Distance Weighting with 3 points")
display(Image("https://gisgeography.com/wp-content/uploads/2016/05/IDW-3Points.png"))

Inverse Distance Weighting with 3 points


print("Illustration of spatial interpolation with a power of 1")
display(Image("https://gisgeography.com/wp-content/uploads/2016/05/IDW-Power1.png"))

print("Illustration of spatial interpolation with a power of 2")
display(Image("https://gisgeography.com/wp-content/uploads/2016/05/IDW-Power2.png"))

print("Inverse Distance Weighting formula")
display(Image("https://gisgeography.com/wp-content/uploads/2016/05/idw-formula.png"))

Illustration of spatial interpolation with a power of 1

Illustration of spatial interpolation with a power of 2

Inverse Distance Weighting formula


# Instantiate the `IDWInterpolation` class:
idw_interpolation = IDWInterpolation()

# Interpolate the values
sample_1_interp = idw_interpolation.interpolate(censusData2020, sample_1, power=2, numNeighbors=5)

# Plot a sampled and interpolated data
fig = idw_interpolation.plotSampleWithInterp(sample_1, sample_1_interp, county_plotter)

img_bytes = pio.to_image(fig, format="png")  # Convert the figure to an image in memory (PNG format)
Image(img_bytes)


# Get the real values that correspond to the interpolated values
sample_1_interp_withActual = idw_interpolation.getRealvaluesGivenInterpolated(censusData2020, sample_1_interp)

# Calculate the error
error_calculator = ErrorCalculator()
percentBound = 3
errors = error_calculator.getErrors(sample_1_interp_withActual, percentBound, printErrors=True)

Average Error: -1.4350797291601014
Average Absolute Error: 4.832253826027726
Mean Squared Error: 39.79716386439208
Root Mean Squared Error: 6.308499335372247
Percent Predicted With Smaller than a 3% error: 36.17%


result = error_calculator.getErrorsByQuartile(sample_1_interp_withActual, percentBound, printErrors=True)
quartile_errors = result['quartile_errors']
poverty_ranges = result['poverty_ranges']
quartile_errors

ERROR FOR QUARTILE #1 -- 3.2% poverty to 6.7% poverty:

ERROR FOR QUARTILE #2 -- 6.8% poverty to 10.8% poverty:

ERROR FOR QUARTILE #3 -- 10.9% poverty to 15.3% poverty:

ERROR FOR QUARTILE #4 -- 15.4% poverty to 36.0% poverty:

{'Quartile 1 Errors': {'Average Error': 5.036136112119926,
  'Average Absolute Error': 5.036136112119926,
  'Mean Squared Error': 26.95467124810375,
  'Root Mean Squared Error': 5.191788829305729,
  'Percent Under Error Threshold': '0.0%'},
 'Quartile 2 Errors': {'Average Error': 0.40718513287355773,
  'Average Absolute Error': 2.4841513297567284,
  'Mean Squared Error': 9.767625895584974,
  'Root Mean Squared Error': 3.125320126896599,
  'Percent Under Error Threshold': '80.0%'},
 'Quartile 3 Errors': {'Average Error': -2.2133597558727853,
  'Average Absolute Error': 2.973863697072576,
  'Mean Squared Error': 12.979338252317518,
  'Root Mean Squared Error': 3.6026848671952307,
  'Percent Under Error Threshold': '61.538%'},
 'Quartile 4 Errors': {'Average Error': -8.598379593196103,
  'Average Absolute Error': 8.598379593196103,
  'Mean Squared Error': 106.7169158677671,
  'Root Mean Squared Error': 10.330387982441275,
  'Percent Under Error Threshold': '8.333%'}}


error_visualizer = ErrorVisualizer()  # Create an instance of ErrorVisualizer class
error_visualizer.plot_error_barchart(quartile_errors, poverty_ranges)


# Interpolate the values
power = 2         # consider changing this value and observe interpret any changes in performance
numNeighbors = 5  # consider changing this value and observe interpret any changes in performance
sample = sample_2 # consider changing this value and observe interpret any changes in performance
sample_2_interp = idw_interpolation.interpolate(censusData2020, sample, power, numNeighbors)

# Plot a sampled and interpolated data
fig = idw_interpolation.plotSampleWithInterp(sample_2, sample_2_interp, county_plotter)
img_bytes = pio.to_image(fig, format="png")  # Convert the figure to an image in memory (PNG format)
Image(img_bytes)


# Get the real values that correspond to the interpolated values
sample_2_interp_withActual = idw_interpolation.getRealvaluesGivenInterpolated(censusData2020, sample_2_interp)

# Calculate the error
percentBound = 3
overallErrors = pd.DataFrame.from_dict(error_calculator.getErrors(sample_2_interp_withActual, percentBound), orient="index", columns=["Sample Results"])

errors_by_quartile_result = error_calculator.getErrorsByQuartile(sample_2_interp_withActual, percentBound)
errorsByQuartile = pd.DataFrame.from_dict({(key, sub_key): value for key, sub_dict in errors_by_quartile_result['quartile_errors'].items() for sub_key, value in sub_dict.items()}, orient="index").unstack().droplevel(0, axis=0)


overallErrors


errorsByQuartile

(Quartile 1 Errors, Average Error)                     7.468757
(Quartile 1 Errors, Average Absolute Error)            7.468757
(Quartile 1 Errors, Mean Squared Error)               61.144233
(Quartile 1 Errors, Root Mean Squared Error)           7.819478
(Quartile 1 Errors, Percent Under Error Threshold)         0.0%
(Quartile 2 Errors, Average Error)                     2.739632
(Quartile 2 Errors, Average Absolute Error)            3.076405
(Quartile 2 Errors, Mean Squared Error)               14.758129
(Quartile 2 Errors, Root Mean Squared Error)           3.841631
(Quartile 2 Errors, Percent Under Error Threshold)      58.333%
(Quartile 3 Errors, Average Error)                    -0.502834
(Quartile 3 Errors, Average Absolute Error)            2.377943
(Quartile 3 Errors, Mean Squared Error)               11.941773
(Quartile 3 Errors, Root Mean Squared Error)           3.455687
(Quartile 3 Errors, Percent Under Error Threshold)      72.727%
(Quartile 4 Errors, Average Error)                    -3.139077
(Quartile 4 Errors, Average Absolute Error)            4.880194
(Quartile 4 Errors, Mean Squared Error)               29.617114
(Quartile 4 Errors, Root Mean Squared Error)           5.442161
(Quartile 4 Errors, Percent Under Error Threshold)      16.666%
dtype: object

	DP03_0120PE	state	county	fips_id	Latitude	Longitude	countyName	stateName
0	3.2	31	179	31179	42.210746	-97.126243	Wayne County	Nebraska
1	3.2	31	089	31089	42.459287	-98.784766	Holt County	Nebraska
2	3.4	31	081	31081	40.877145	-98.021943	Hamilton County	Nebraska
3	3.6	31	039	31039	41.915865	-96.788517	Cuming County	Nebraska
4	3.9	31	165	31165	42.483806	-103.742605	Sioux County	Nebraska

COURSE: CSCE 155T

Introduction to Computer Science I: Informatics

Designing Methods to Estimate US Poverty Data Using Spatial Interpolation and Ethical Considerations

Course Description:

Module Topic:Designing Methods to Estimate US Poverty Data Using Spatial Interpolation and Ethical Considerations

Module Overview:

Learning Objectives:

Key Questions:

Key Concepts:

Interpolation of Census Data¶

Utility Functions¶

¶

Scenario:¶

Approach Overview¶

1. Identify the poverty indicator for the survey¶

1.b) Obtain Census Data for the Given Variable¶

Visualizing Nebraska's 2015 Poverty Data¶

2. Sampling: Determine which counties to survey¶

Method #1¶

Method #2¶

Method #3¶

Other¶

Visualize Samples¶

Task: Change the above code from `sample_1` to one of the other two samples.¶

3.a) Interpolation Background: Use a method to estimate the poverty value of the counties we don't have samples for¶

Spatial Interpolation¶

Inverse Distance Weighting (IDW)¶

3.b) Apply Interpolation to our Dataset¶

Performance Measure¶

How do we measure the performance of our model?¶

Example Interpretation¶

Quartile Errors and Performance¶

Methodology and Implications¶

Optimize and Reflect: Interpolation, Sampling, and Ethics¶

	Sample Results
Average Error	1.561561
Average Absolute Error	4.43028
Mean Squared Error	29.053239
Root Mean Squared Error	5.390106
Percent Under Error Threshold	36.956%

COURSE: CSCE 155T

Introduction to Computer Science I: Informatics

Designing Methods to Estimate US Poverty Data Using Spatial Interpolation and Ethical Considerations

Course Description:

Module Topic:Designing Methods to Estimate US Poverty Data Using Spatial Interpolation and Ethical Considerations

Module Overview:

Learning Objectives:

Key Questions:

Key Concepts:

Interpolation of Census Data¶

Utility Functions¶

¶

Scenario:¶

Approach Overview¶

1. Identify the poverty indicator for the survey¶

1.b) Obtain Census Data for the Given Variable¶

Visualizing Nebraska's 2015 Poverty Data¶

2. Sampling: Determine which counties to survey¶

Method #1¶

Method #2¶

Method #3¶

Other¶

Visualize Samples¶

Task: Change the above code from sample_1 to one of the other two samples.¶

3.a) Interpolation Background: Use a method to estimate the poverty value of the counties we don't have samples for¶

Spatial Interpolation¶

Inverse Distance Weighting (IDW)¶

3.b) Apply Interpolation to our Dataset¶

Performance Measure¶

How do we measure the performance of our model?¶

Example Interpretation¶

Quartile Errors and Performance¶

Methodology and Implications¶

Optimize and Reflect: Interpolation, Sampling, and Ethics¶

Task: Change the above code from `sample_1` to one of the other two samples.¶