# A tsibble: 74 x 7 [1Q]
Quarter Beer Tobacco Bricks Cement Electricity Gas
<qtr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1992 Q1 443 5777 383 1289 38332 117
2 1992 Q2 410 5853 404 1501 39774 151
3 1992 Q3 420 6416 446 1539 42246 175
4 1992 Q4 532 5825 420 1568 38498 129
5 1993 Q1 433 5724 394 1450 39460 116
6 1993 Q2 421 6036 462 1668 41356 149
7 1993 Q3 410 6570 475 1648 42949 163
8 1993 Q4 512 5675 443 1863 40974 138
9 1994 Q1 449 5311 421 1468 40162 127
10 1994 Q2 381 5717 475 1755 41199 159
# … with 64 more rows
Each graph shows y_t plotted against y_{t-k} for different values of k.
Covariance and correlation: measure extent of linear relationship between two variables (y and X).
Autocovariance and autocorrelation: measure linear relationship between lagged values of a time series y.
We measure the relationship between:
We denote the sample autocovariance at lag k by c_k and the sample autocorrelation at lag k by r_k. Then define
c_k = \frac{1}{T}\sum_{t=k+1}^T (y_t-\bar{y})(y_{t-k}-\bar{y}) r_{k} = c_k/c_0
Results for first 9 lags for beer data:
Results for first 9 lags for beer data:
google_2015 <- gafa_stock %>%
filter(Symbol == "GOOG", year(Date) == 2015) %>%
select(Date, Close)
google_2015
# A tsibble: 252 x 2 [!]
Date Close
<date> <dbl>
1 2015-01-02 522.
2 2015-01-05 511.
3 2015-01-06 499.
4 2015-01-07 498.
5 2015-01-08 500.
6 2015-01-09 493.
7 2015-01-12 490.
8 2015-01-13 493.
9 2015-01-14 498.
10 2015-01-15 499.
# … with 242 more rows
White noise data is uncorrelated across time with zero mean and constant variance.
(Technically, we require independence as well.)
r_{1} | r_{2} | r_{3} | r_{4} | r_{5} | r_{6} | r_{7} | r_{8} | r_{9} | r_{10} |
---|---|---|---|---|---|---|---|---|---|
0.014 | -0.163 | 0.163 | -0.259 | -0.198 | 0.064 | -0.139 | -0.032 | 0.199 | -0.024 |
Sampling distribution of r_k for white noise data is asymptotically N(0,1/T).
Monthly total number of pigs slaughtered in the state of Victoria, Australia, from January 2014 through December 2018 (Source: Australian Bureau of Statistics.)
These show the series is not a white noise series.
You can compute the daily changes in the Google stock price in 2018 using
Does diff
look like white noise?
Getting started
library(tidyverse)
library(fpp3)
library(purrr)
library(gganimate)
Consider the GDP information in global_economy
. Plot the GDP per capita for each country over time. Which country has the highest GDP per capita? How has this changed over time?
global_economy %>%
mutate(GDPPC = GDP/Population) %>%
select(Country, Year, GDPPC) %>%
top_n(., 10, wt = GDPPC)
# A tsibble: 10 x 3 [1Y]
# Key: Country [2]
Country Year GDPPC
<fct> <dbl> <dbl>
1 Liechtenstein 2013 173528.
2 Liechtenstein 2014 179308.
3 Liechtenstein 2015 167591.
4 Liechtenstein 2016 164993.
5 Monaco 2007 167125.
6 Monaco 2008 180640.
7 Monaco 2013 172589.
8 Monaco 2014 185153.
9 Monaco 2015 163369.
10 Monaco 2016 168011.
global_economy <- tsibbledata::global_economy
print_retail <- aus_retail %>%
filter(Industry == "Newspaper and book retailing") %>%
group_by(Industry) %>%
index_by(Year = year(Month)) %>%
summarise(Turnover = sum(Turnover))
aus_economy <- filter(global_economy, Code == "AUS")
print_retail %>%
left_join(aus_economy, by = "Year") %>%
mutate(Adj_turnover = Turnover/CPI) %>%
pivot_longer(c(Turnover, Adj_turnover), names_to = "Type", values_to = "Turnover") %>%
ggplot(aes(x = Year, y = Turnover)) + geom_line() + facet_grid(vars(Type),
scales = "free_y") + xlab("Years") + ylab(NULL) + ggtitle("Turnover: Australian print media industry") +
hrbrthemes::theme_ipsum_rc()
If the data show different variation at different levels of the series, then a transformation can be useful.
Denote original observations as y_1,\dots,y_n and transformed observations as w_1, \dots, w_n.
Transformations | |
---|---|
Square root | w_t = \sqrt{y_t} |
Cube root | w_t = \sqrt[3]{y_t} |
Logarithm | w_t = \log(y_t) |
Logarithms, in particular, are useful because they are more interpretable: changes in a log value are relative (percent) changes on the original scale.
Each of these transformations is close to a member of the family of Box-Cox transformations: w_t = \left\{\begin{array}{ll} \log(y_t), & \quad \lambda = 0; \\ (y_t^\lambda-1)/\lambda , & \quad \lambda \ne 0. \end{array}\right.
food %>%
mutate(!!!set_names(map(seq(0, 1, 0.01), ~expr(fabletools::box_cox(Turnover,
!!.x))), seq(0, 1, 0.01))) %>%
select(-Turnover) %>%
pivot_longer(-Month, names_to = "lambda", values_to = "Turnover") %>%
mutate(lambda = as.numeric(lambda)) %>%
ggplot(aes(x = Month, y = Turnover)) + geom_line() + transition_states(1 -
lambda, state_length = 0) + view_follow() + ggtitle("Box-CoxT(lambda = {format(1 - as.numeric(closest_state), digits = 2)})") +
hrbrthemes::theme_ipsum_rc() -> my.anim
# save_animation('./img/Anim1.gif')
logp1()
can be useful for data with zeros.fable
.)global_economy
aus_livestock
vic_elec
.aus_production
canadian_gas
data?Recall
Trend pattern exists when there is a long-term increase or decrease in the data.
Cyclic pattern exists when data exhibit rises and falls that are not of fixed period (duration usually of at least 2 years).
Seasonal pattern exists when a series is influenced by seasonal factors (e.g., the quarter of the year, the month, or day of the week).
us_retail_employment <- us_employment %>%
filter(year(Month) >= 1990, Title == "Retail Trade") %>%
select(-Series_ID)
dcmp <- us_retail_employment %>%
model(STL(Employed))
autoplot(us_retail_employment, Employed, color = "gray") + autolayer(components(dcmp),
season_adjust, color = "blue") + labs(y = "Persons (thousands)", title = "Total employment in US retail")
The general idea is a moving window. We will set .before
and .after
as follows.
aus_exports <- global_economy %>%
filter(Country == "Australia") %>%
mutate(`5-MA` = slider::slide_dbl(Exports, mean, .before = 2, .after = 2,
.complete = TRUE))
aus_exports
# A tsibble: 58 x 10 [1Y]
# Key: Country [1]
Country Code Year GDP Growth CPI Imports Exports Popul…¹ `5-MA`
<fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Austral… AUS 1960 1.86e10 NA 7.96 14.1 13.0 1.03e7 NA
2 Austral… AUS 1961 1.96e10 2.49 8.14 15.0 12.4 1.05e7 NA
3 Austral… AUS 1962 1.99e10 1.30 8.12 12.6 13.9 1.07e7 13.5
4 Austral… AUS 1963 2.15e10 6.21 8.17 13.8 13.0 1.10e7 13.5
5 Austral… AUS 1964 2.38e10 6.98 8.40 13.8 14.9 1.12e7 13.6
6 Austral… AUS 1965 2.59e10 5.98 8.69 15.3 13.2 1.14e7 13.4
7 Austral… AUS 1966 2.73e10 2.38 8.98 15.1 12.9 1.17e7 13.3
8 Austral… AUS 1967 3.04e10 6.30 9.29 13.9 12.9 1.18e7 12.7
9 Austral… AUS 1968 3.27e10 5.10 9.52 14.5 12.3 1.20e7 12.6
10 Austral… AUS 1969 3.66e10 7.04 9.83 13.3 12.0 1.23e7 12.6
# … with 48 more rows, and abbreviated variable name ¹Population
aus_exports2 <- aus_exports %>%
mutate(`2x5-MA` = slider::slide_dbl(`5-MA`, mean, .before = 1, .after = 0,
.complete = TRUE))
aus_exports2
# A tsibble: 58 x 11 [1Y]
# Key: Country [1]
Country Code Year GDP Growth CPI Imports Exports Popul…¹ `5-MA`
<fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Austral… AUS 1960 1.86e10 NA 7.96 14.1 13.0 1.03e7 NA
2 Austral… AUS 1961 1.96e10 2.49 8.14 15.0 12.4 1.05e7 NA
3 Austral… AUS 1962 1.99e10 1.30 8.12 12.6 13.9 1.07e7 13.5
4 Austral… AUS 1963 2.15e10 6.21 8.17 13.8 13.0 1.10e7 13.5
5 Austral… AUS 1964 2.38e10 6.98 8.40 13.8 14.9 1.12e7 13.6
6 Austral… AUS 1965 2.59e10 5.98 8.69 15.3 13.2 1.14e7 13.4
7 Austral… AUS 1966 2.73e10 2.38 8.98 15.1 12.9 1.17e7 13.3
8 Austral… AUS 1967 3.04e10 6.30 9.29 13.9 12.9 1.18e7 12.7
9 Austral… AUS 1968 3.27e10 5.10 9.52 14.5 12.3 1.20e7 12.6
10 Austral… AUS 1969 3.66e10 7.04 9.83 13.3 12.0 1.23e7 12.6
# … with 48 more rows, 1 more variable: `2x5-MA` <dbl>, and abbreviated
# variable name ¹Population
y_t = f(S_t, T_t, R_t)
where y_t=: data at period t
T_t=: trend-cycle component at period t
S_t= & seasonal component at period t
R_t= & remainder component at period t
Additive decomposition: y_t = S_t + T_t + R_t.
Multiplicative decomposition: y_t = S_t \times T_t \times R_t.
y_t = S_t \times T_t \times E_t \quad\Rightarrow\quad \log y_t = \log S_t + \log T_t + \log R_t.
us_retail_employment <- us_employment %>%
filter(year(Month) >= 1990, Title == "Retail Trade") %>%
select(-Series_ID)
us_retail_employment
# A tsibble: 357 x 3 [1M]
Month Title Employed
<mth> <chr> <dbl>
1 1990 Jan Retail Trade 13256.
2 1990 Feb Retail Trade 12966.
3 1990 Mar Retail Trade 12938.
4 1990 Apr Retail Trade 13012.
5 1990 May Retail Trade 13108.
6 1990 Jun Retail Trade 13183.
7 1990 Jul Retail Trade 13170.
8 1990 Aug Retail Trade 13160.
9 1990 Sep Retail Trade 13113.
10 1990 Oct Retail Trade 13185.
# … with 347 more rows
USREDC <- us_retail_employment %>%
model(classical_decomposition(Employed, type = "additive")) %>%
components()
USREDC
# A dable: 357 x 7 [1M]
# Key: .model [1]
# : Employed = trend + seasonal + random
.model Month Emplo…¹ trend seaso…² random seaso…³
<chr> <mth> <dbl> <dbl> <dbl> <dbl> <dbl>
1 "classical_decomposition… 1990 Jan 13256. NA -75.5 NA 13331.
2 "classical_decomposition… 1990 Feb 12966. NA -273. NA 13239.
3 "classical_decomposition… 1990 Mar 12938. NA -253. NA 13191.
4 "classical_decomposition… 1990 Apr 13012. NA -190. NA 13203.
5 "classical_decomposition… 1990 May 13108. NA -88.9 NA 13197.
6 "classical_decomposition… 1990 Jun 13183. NA -10.4 NA 13193.
7 "classical_decomposition… 1990 Jul 13170. 13178. -13.3 5.65 13183.
8 "classical_decomposition… 1990 Aug 13160. 13161. -9.99 8.80 13169.
9 "classical_decomposition… 1990 Sep 13113. 13141. -87.4 59.9 13201.
10 "classical_decomposition… 1990 Oct 13185. 13117. 34.6 33.8 13151.
# … with 347 more rows, and abbreviated variable names ¹Employed,
# ²seasonal, ³season_adjust
# A dable: 357 x 7 [1M]
# Key: .model [1]
# : Employed = trend + season_year + remainder
.model Month Employed trend season_year remainder season_adjust
<chr> <mth> <dbl> <dbl> <dbl> <dbl> <dbl>
1 stl 1990 Jan 13256. 13288. -33.0 0.836 13289.
2 stl 1990 Feb 12966. 13269. -258. -44.6 13224.
3 stl 1990 Mar 12938. 13250. -290. -22.1 13228.
4 stl 1990 Apr 13012. 13231. -220. 1.05 13232.
5 stl 1990 May 13108. 13211. -114. 11.3 13223.
6 stl 1990 Jun 13183. 13192. -24.3 15.5 13207.
7 stl 1990 Jul 13170. 13172. -23.2 21.6 13193.
8 stl 1990 Aug 13160. 13151. -9.52 17.8 13169.
9 stl 1990 Sep 13113. 13131. -39.5 22.0 13153.
10 stl 1990 Oct 13185. 13110. 61.6 13.2 13124.
# … with 347 more rows
Advantages
Disadvantages
X11_dcmp <- us_retail_employment %>%
model(seats = feasts:::X_13ARIMA_SEATS(Employed)) %>%
components()
X11_dcmp
# A dable: 357 x 7 [1M]
# Key: .model [1]
# : Employed = f(trend, seasonal, irregular)
.model Month Employed trend seasonal irregular season_adjust
<chr> <mth> <dbl> <dbl> <dbl> <dbl> <dbl>
1 seats 1990 Jan 13256. 13261. 0.999 1.00 13266.
2 seats 1990 Feb 12966. 13243. 0.980 0.999 13235.
3 seats 1990 Mar 12938. 13236. 0.977 1.00 13238.
4 seats 1990 Apr 13012. 13233. 0.983 1.00 13235.
5 seats 1990 May 13108. 13222. 0.991 1.00 13223.
6 seats 1990 Jun 13183. 13206. 0.998 1.00 13205.
7 seats 1990 Jul 13170. 13187. 0.999 1.00 13190.
8 seats 1990 Aug 13160. 13165. 1.00 1.00 13162.
9 seats 1990 Sep 13113. 13145. 0.998 1.00 13146.
10 seats 1990 Oct 13185. 13129. 1.00 1.00 13126.
# … with 347 more rows
Advantages
Disadvantages
seats_dcmp <- us_retail_employment %>%
model(seats = feasts:::SEATS(Employed)) %>%
components()
seats_dcmp
# A dable: 357 x 7 [1M]
# Key: .model [1]
# : Employed = trend * seasonal * irregular
.model Month Employed trend seasonal irregular season_adjust
<chr> <mth> <dbl> <dbl> <dbl> <dbl> <dbl>
1 seats 1990 Jan 13256. 13261. 0.999 1.00 13266.
2 seats 1990 Feb 12966. 13243. 0.980 0.999 13235.
3 seats 1990 Mar 12938. 13236. 0.977 1.00 13238.
4 seats 1990 Apr 13012. 13233. 0.983 1.00 13235.
5 seats 1990 May 13108. 13222. 0.991 1.00 13223.
6 seats 1990 Jun 13183. 13206. 0.998 1.00 13205.
7 seats 1990 Jul 13170. 13187. 0.999 1.00 13190.
8 seats 1990 Aug 13160. 13165. 1.00 1.00 13162.
9 seats 1990 Sep 13113. 13145. 0.998 1.00 13146.
10 seats 1990 Oct 13185. 13129. 1.00 1.00 13126.
# … with 347 more rows
s_windows <- seq(5, 55, by = 2)
stl_defs <- purrr::map(s_windows, function(s_window) {
STL(Employed ~ season(window = s_window), robust = TRUE)
})
names(stl_defs) <- sprintf("season(window=%02d)", s_windows)
us_retail_employment %>%
model(!!!stl_defs) %>%
components() %>%
as_tibble() %>%
pivot_longer(Employed:remainder, names_to = "component", names_ptypes = list(component = factor(levels = c("Employed",
"trend", "season_year", "remainder"))), values_to = "Employed") %>%
ggplot(aes(x = Month, y = Employed)) + geom_line() + facet_grid(rows = vars(component),
scales = "free_y") + labs(title = "STL decomposition of US retail employment",
subtitle = "{closest_state}") + transition_states(.model)
trend(window = ?)
controls wiggliness of trend component.season(window = ?)
controls variation on seasonal component.season(window = 'periodic')
is equivalent to an infinite window.STL()
chooses season(window=13)
by defaultwindow = 13
0 Default trend window = nextodd(ceiling((1.5*period)/(1-(1.5/s.window)))
# A tsibble: 440 x 4 [1M]
Time Month Year Employed
<mth> <ord> <dbl> <dbl>
1 1978 Feb Feb 1978 5986.
2 1978 Mar Mar 1978 6041.
3 1978 Apr Apr 1978 6054.
4 1978 May May 1978 6038.
5 1978 Jun Jun 1978 6031.
6 1978 Jul Jul 1978 6036.
7 1978 Aug Aug 1978 6005.
8 1978 Sep Sep 1978 6024.
9 1978 Oct Oct 1978 6046.
10 1978 Nov Nov 1978 6034.
# … with 430 more rows
Panel data. Multiple time series are often described as a panel, a cross-section of time series, or a time series of cross-sections. The data structure has two [non-overlapping] indices. Let’s review, and discuss a bit, what exactly we mean.
fredr
is amazing.
US.Employment <- map_dfr(
c(rownames(table(us_employment$Series_ID))), ~fredr::fredr_series_observations(.))
save(US.Employment, file="USEmployment.RData")
load(url("https://github.com/robertwwalker/xaringan/raw/master/CMF-Week-9/USEmployment.RData"))
load("USEmployment.RData")
us_employment %>%
data.frame() %>%
group_by(Series_ID) %>%
summarise(Title = first(Title)) %>%
mutate(series_id = Series_ID) %>%
ungroup() %>%
select(-Series_ID) -> Names.List
US.Employment.T <- left_join(US.Employment, Names.List, by = c(series_id = "series_id")) %>%
mutate(YM = yearmonth(date)) %>%
rename(Employed = value) %>%
as_tsibble(., index = YM, key = Title)
For much of the study of time series, the key issue is one known as stationarity. For now, we will do at least some hand waving to be clarified in chapters 5 and more in 9. But we want to compute things and then build out all the details. Let’s take my new retail employment data.
EMPN <- US.Employment.T %>%
filter(YM > yearmonth("1990-01") & Title == "Retail Trade") %>%
as_tsibble(index = YM)
EMPO <- us_employment %>%
filter(Title == "Retail Trade" & Month > yearmonth("1990-01")) %>%
as_tsibble(., index = Month)
Plot1 <- ggplot(EMPN, aes(x = YM, y = Employed)) + geom_line(color = "red") +
geom_line(data = EMPO, aes(x = Month, y = Employed), inherit.aes = FALSE)
Plot1
The features command is the magic tool for tidy summary and statistics for time series in this index/key format. For example, basic summary
# A tibble: 3 × 10
Title mean min max sd `0%` `25%` `50%` `75%` `100%`
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Financial … 7767. 6472 8846 640. 6472 7363 7876 8226. 8846
2 Manufactur… 14554. 11340 17870 2241. 11340 12333. 14219 17088 17870
3 Retail Tra… 14746. 12548. 16394. 915. 12548. 14336. 14962. 15387. 16394.
Learning about the time series properties
# A tibble: 3 × 8
Title acf1 acf10 diff1_…¹ diff1…² diff2…³ diff2…⁴ seaso…⁵
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Financial Activities 0.990 8.94 0.283 0.165 -0.313 0.415 0.883
2 Manufacturing 0.995 9.35 0.0466 0.128 -0.499 0.505 0.925
3 Retail Trade 0.951 7.29 0.133 0.377 -0.198 0.305 0.876
# … with abbreviated variable names ¹diff1_acf1, ²diff1_acf10,
# ³diff2_acf1, ⁴diff2_acf10, ⁵season_acf1
The 6/7 and 12/13 patterns are interesting….
# A tibble: 3 × 10
Title trend…¹ seaso…² seaso…³ seaso…⁴ spiki…⁵ linea…⁶ curva…⁷ stl_e…⁸
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Financia… 0.999 0.820 6 3 1.28e1 10603. -3014. 0.718
2 Manufact… 0.999 0.555 8 3 8.41e3 -39200. 4365. 0.612
3 Retail T… 0.983 0.834 11 3 1.11e5 13098. -6397. 0.506
# … with 1 more variable: stl_e_acf10 <dbl>, and abbreviated variable
# names ¹trend_strength, ²seasonal_strength_year, ³seasonal_peak_year,
# ⁴seasonal_trough_year, ⁵spikiness, ⁶linearity, ⁷curvature, ⁸stl_e_acf1
The details are at the bottom for other statistics.
library(kableExtra)
USET8k %>%
features(Employed, feat_stl) %>%
knitr::kable(format = "html") %>%
scroll_box(width = "100%", height = "300px")
Title | trend_strength | seasonal_strength_year | seasonal_peak_year | seasonal_trough_year | spikiness | linearity | curvature | stl_e_acf1 | stl_e_acf10 |
---|---|---|---|---|---|---|---|---|---|
Construction | 0.999 | 0.963 | 8 | 2 | 2.10e+02 | 53918 | -1084.7 | 0.576 | 0.526 |
Durable Goods | 0.994 | 0.215 | 9 | 7 | 3.42e+03 | 1295 | -40394.6 | 0.750 | 1.251 |
Education and Health Services | 1.000 | 0.715 | 11 | 7 | 7.21e+03 | 217955 | 58537.7 | 0.529 | 0.580 |
Education and Health Services: Health Care | 0.999 | 0.354 | 0 | 4 | 1.59e+04 | 46025 | -436.7 | 0.495 | 0.533 |
Education and Health Services: Health Care and Social Assistance | 0.999 | 0.313 | 0 | 4 | 6.19e+04 | 63326 | 17.6 | 0.517 | 0.584 |
Financial Activities | 1.000 | 0.870 | 7 | 4 | 6.92e-01 | 78437 | -273.1 | 0.723 | 0.879 |
Goods-Producing | 0.996 | 0.812 | 9 | 2 | 1.12e+04 | 43343 | -64790.2 | 0.734 | 1.081 |
Government | 1.000 | 0.981 | 11 | 7 | 3.94e+02 | 190941 | -19815.3 | 0.599 | 0.538 |
Government: Local Government | 1.000 | 0.986 | 5 | 7 | 3.14e+02 | 96008 | -15450.2 | 0.640 | 0.692 |
Government: Local Government Education | 1.000 | 0.996 | 3 | 7 | 3.54e+01 | 54936 | -8479.9 | 0.538 | 0.480 |
Leisure and Hospitality | 0.997 | 0.607 | 7 | 4 | 5.47e+05 | 136395 | 22427.3 | 0.522 | 0.604 |
Leisure and Hospitality: Accommodation and Food Services | 0.971 | 0.473 | 8 | 4 | 5.27e+06 | 31615 | -1157.7 | 0.499 | 0.541 |
Leisure and Hospitality: Food Services and Drinking Places | 0.975 | 0.404 | 6 | 4 | 3.26e+06 | 29889 | -377.9 | 0.476 | 0.497 |
Manufacturing | 0.997 | 0.434 | 9 | 2 | 5.19e+03 | -8507 | -64155.3 | 0.772 | 1.262 |
Private Service-Providing | 1.000 | 0.543 | 0 | 4 | 1.50e+07 | 906495 | 115965.6 | 0.545 | 0.623 |
Professional and Business Services | 1.000 | 0.675 | 11 | 1 | 3.62e+03 | 187091 | 41544.7 | 0.626 | 0.810 |
Professional and Business Services: Administrative and Support Services | 0.995 | 0.808 | 11 | 1 | 1.84e+04 | 21351 | -8027.1 | 0.617 | 0.797 |
Professional and Business Services: Administrative and Waste Services | 0.995 | 0.810 | 11 | 1 | 1.93e+04 | 22521 | -8025.8 | 0.617 | 0.804 |
Professional and Business Services: Professional and Technical Services | 1.000 | 0.691 | 2 | 5 | 1.81e+02 | 29125 | -1380.0 | 0.626 | 0.719 |
Retail Trade | 1.000 | 0.881 | 0 | 4 | 5.78e+03 | 135654 | -6877.1 | 0.511 | 0.473 |
Trade, Transportation, and Utilities | 1.000 | 0.845 | 0 | 4 | 2.02e+04 | 211628 | -7668.1 | 0.566 | 0.583 |
coef_hurst
A measure of the degree to which adjacent observations depend on one another over time. Generically, this statistic takes values between zero and one with one indicating very high levels of dependence through time.
feat_spectral
Ljung-Box modifies the idea in the Box-Pierce statistic for assessing whether or not a given series [or transformation thereof] is essentially uncorrelated. In both cases, we will get to the details next week [chapter 5]. For now, the idea is simply that k squared autocorrelations will sum to a chi-squared distribution with k degrees of freedom. Large correlations reveal dependence.
# A tibble: 3 × 5
Title bp_stat bp_pvalue lb_stat lb_pvalue
<chr> <dbl> <dbl> <dbl> <dbl>
1 Financial Activities 365. 0 368. 0
2 Manufacturing 368. 0 371. 0
3 Retail Trade 337. 0 339. 0
# A tibble: 1 × 4
bp_stat bp_pvalue lb_stat lb_pvalue
<dbl> <dbl> <dbl> <dbl>
1 0.0359 0.850 0.0363 0.849
feat_pacf
# A tibble: 3 × 5
Title pacf5 diff1_pacf5 diff2_pacf5 season_pacf
<chr> <dbl> <dbl> <dbl> <dbl>
1 Financial Activities 0.987 0.712 1.00 -0.0555
2 Manufacturing 0.994 0.238 0.791 0.0348
3 Retail Trade 1.08 0.834 1.06 -0.0188
# A tibble: 1 × 4
pacf5 diff1_pacf5 diff2_pacf5 season_pacf
<dbl> <dbl> <dbl> <dbl>
1 0.00612 0.632 1.04 0.109
The stationarity issue from earlier is given much attention. Can we reasonably think of characteristics as fixed? There are three means of assessment with details to Chapter 9.
USET %>%
features(Employed, features = list(unitroot_kpss, unitroot_pp, unitroot_ndiffs,
unitroot_nsdiffs)) %>%
knitr::kable(format = "html")
Title | kpss_stat | kpss_pvalue | pp_stat | pp_pvalue | ndiffs | nsdiffs |
---|---|---|---|---|---|---|
Financial Activities | 4.63 | 0.01 | -1.193 | 0.100 | 1 | 1 |
Manufacturing | 5.68 | 0.01 | -0.938 | 0.100 | 1 | 0 |
Retail Trade | 3.91 | 0.01 | -2.636 | 0.089 | 1 | 1 |
FC %>%
features(monthly.returns, features = list(unitroot_kpss, unitroot_pp, unitroot_ndiffs,
unitroot_nsdiffs))
# A tibble: 1 × 6
kpss_stat kpss_pvalue pp_stat pp_pvalue ndiffs nsdiffs
<dbl> <dbl> <dbl> <dbl> <int> <int>
1 0.0895 0.1 -16.6 0.01 0 0
# A tibble: 3 × 3
Title var_tiled_mean var_tiled_var
<chr> <dbl> <dbl>
1 Financial Activities 1.02 0.0000411
2 Manufacturing 1.03 0.0000923
3 Retail Trade 0.922 0.0136
# A tibble: 1 × 2
var_tiled_mean var_tiled_var
<dbl> <dbl>
1 0.118 2.59
USET %>%
features(Employed, features = list(shift_level_max, shift_var_max, shift_kl_max)) %>%
kable(format = "html")
Title | shift_level_max | shift_level_index | shift_var_max | shift_var_index | shift_kl_max | shift_kl_index |
---|---|---|---|---|---|---|
Financial Activities | 371 | 229 | 24037 | 233 | 0.299 | 227 |
Manufacturing | 1559 | 228 | 417020 | 235 | 0.522 | 227 |
Retail Trade | 777 | 226 | 788931 | 354 | 1.841 | 227 |
FC %>%
features(monthly.returns, features = list(shift_level_max, shift_var_max,
shift_kl_max)) %>%
kable(format = "html")
shift_level_max | shift_level_index | shift_var_max | shift_var_index | shift_kl_max | shift_kl_index |
---|---|---|---|---|---|
0.258 | 110 | 0.194 | 113 | 36.8 | 112 |
USET %>%
features(Employed, features = list(n_crossing_points, longest_flat_spot)) %>%
kable(format = "html")
Title | n_crossing_points | longest_flat_spot |
---|---|---|
Financial Activities | 5 | 40 |
Manufacturing | 11 | 52 |
Retail Trade | 31 | 10 |
FC %>%
features(monthly.returns, features = list(n_crossing_points, longest_flat_spot)) %>%
kable(format = "html")
n_crossing_points | longest_flat_spot |
---|---|
129 | 8 |
What proportion of the current squared residual is explained by the prior squared residual? This reports R^2; if the variance explained is large, volatility is persistent. There is a chi-square statistic also.
Title | lambda_guerrero |
---|---|
Financial Activities | 0.948 |
Manufacturing | 1.037 |
Retail Trade | 1.186 |
lambda_guerrero |
---|
0.78 |
Lets walk through this example.
Models of Choice and Forecasting