[1] "versão 4"
Além do #rstats
Thiago Pires
[1] "versão 4"
Tidyverse
é uma coleção de pacotes desenvolvidos pela RStudio e projetados para ciência de dados. Todos estes pacotes tentam compartilhar uma mesma sintaxe e estrutura de dados.
dplyr::
utilizado na manipulação de dadosMédia, desvio-padrão e N da idade, segundo sexo
# A tibble: 2 × 4
Sex Média `Desvio-padrão` N
<chr> <dbl> <dbl> <int>
1 female 25 13.6 81
2 male 31.6 14.1 468
Percentual de mortos, segundo sexo
ggplot2::
utilizado na visualização de dadosPercentual de mortos e vivos, segundo sexo e classe
library(ggplot2)
titanic::titanic_train |>
ggplot() +
aes(Sex, ..count../sum(..count..),
group = Survived,
fill = as.factor(Survived)) +
geom_bar(position = "fill") +
facet_grid(~Pclass) +
scale_y_continuous(labels = scales::percent) +
scale_fill_manual(values = c("#003f9a", "#5099f4"),
labels = c("não", "sim")) +
labs(x = "Sexo", y = "", fill = "Sobreviveu") +
theme_light() +
theme(text = element_text(size = 24))
stringr::
e forcats::
readr::
, tidyr::
e lubridate::
para mais manipulações# A tibble: 1 × 976
Country/Regi…¹ 1/22/…² 1/23/…³ 1/24/…⁴ 1/25/…⁵ 1/26/…⁶ 1/27/…⁷ 1/28/…⁸ 1/29/…⁹
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Brazil 0 0 0 0 0 0 0 0
# … with 967 more variables: `1/30/20` <dbl>, `1/31/20` <dbl>, `2/1/20` <dbl>,
# `2/2/20` <dbl>, `2/3/20` <dbl>, `2/4/20` <dbl>, `2/5/20` <dbl>,
# `2/6/20` <dbl>, `2/7/20` <dbl>, `2/8/20` <dbl>, `2/9/20` <dbl>,
# `2/10/20` <dbl>, `2/11/20` <dbl>, `2/12/20` <dbl>, `2/13/20` <dbl>,
# `2/14/20` <dbl>, `2/15/20` <dbl>, `2/16/20` <dbl>, `2/17/20` <dbl>,
# `2/18/20` <dbl>, `2/19/20` <dbl>, `2/20/20` <dbl>, `2/21/20` <dbl>,
# `2/22/20` <dbl>, `2/23/20` <dbl>, `2/24/20` <dbl>, `2/25/20` <dbl>, …
# ℹ Use `colnames()` to see all variable names
# A tibble: 5 × 4
# Groups: country [1]
country date cumulate value
<chr> <date> <dbl> <dbl>
1 Brazil 2020-01-22 0 NA
2 Brazil 2020-01-23 0 0
3 Brazil 2020-01-24 0 0
4 Brazil 2020-01-25 0 0
5 Brazil 2020-01-26 0 0
ggplot2::
na visualização de casos de COVID 19covid19 |>
dplyr::mutate(moving_average = zoo::rollmean(value, 7, align = "right", fill = NA)) |>
ggplot2::ggplot() +
ggplot2::aes(date, moving_average) +
ggplot2::geom_line() +
ggplot2::theme_minimal() +
ggplot2::labs(x = "Data",
y = "Média móvel casos COVID 19") +
theme(text = element_text(size = 22))
plotly::
gráficos dinâmicoscovid19 |>
dplyr::mutate(moving_average = zoo::rollmean(value, 7, align = "right", fill = NA)) |>
plotly::plot_ly(x = ~date, y = ~moving_average,
type = "scatter", mode = "lines") |>
plotly::layout(xaxis = list(title = "Date"),
yaxis = list(title = "Média móvel casos COVID 19"),
font = list(size = 20), margin = list(t = 100)) |>
plotly::rangeslider() |>
widgetframe::frameWidget()
leaflet::
mapas dinâmicoslibrary(leaflet)
locale <- dplyr::tibble(label = "IBM Hortolândia",
lat = -22.8996401,
lng = -47.2032362,
logo = "https://www.ibm.com/brand/experience-guides/developer/8f4e3cc2b5d52354a6d43c8edba1e3c9/02_8-bar-reverse.svg")
leaflet(options = list(closePopupOnClick = FALSE)) |>
setView(lng = -46.9,
lat = -22.8,
zoom = 7) |>
addTiles() |>
addPopups(data = locale,
~lng,
~lat,
options = popupOptions(closeButton = FALSE),
popup = ~paste("<img src=",
logo, "width=100%> <br>",
label))
leaflet::
rota do Titanicevents <- bind_rows(
tibble(location = "Southampton (10-04-1912)",
lng = -1.4191, lat = 50.7894),
tibble(location = "Cherbourg (10-04-1912)",
lng = -1.6109, lat = 49.6445),
tibble(location = "Queenstown (11-04-1912)",
lng = -8.3211, lat = 51.8535),
tibble(location = "Naufrágio (14-04-1912)",
lng = -49.9408, lat = 41.7258),
tibble(location = "New York",
lng = -73.9655, lat = 40.6832))
leaflet() |>
setView(lng = -33.9,
lat = 46.8,
zoom = 3) |>
addTiles() |>
addCircleMarkers(data = events |> slice(1:3, 5),
label = ~location,
color = c(rep("blue", 3), "green")) |>
addMarkers(data = events |> slice(4),
icon = list(
iconUrl = "resources/images/sinking-ship.jpeg",
iconSize = c(50, 50)),
label = ~location) |>
addPolylines(data = events, ~lng, ~lat) |>
widgetframe::frameWidget()
DBI::
, RJDBC::
e dbplyr::
conectando com o DB2 na IBM Cloudlibrary(dbplyr)
# Ler variáveis de ambiente
readRenviron("../.Renviron")
# Conexão com o DB2
drv <-
RJDBC::JDBC("com.ibm.db2.jcc.DB2Driver",
"../jars/db2jcc4.jar")
host <- Sys.getenv("DB2_HOST")
user <- Sys.getenv("DB2_USER")
password <- Sys.getenv("DB2_PASSWORD")
uri <-
sprintf("jdbc:db2://%s/bludb:user=%s;password=%s;sslConnection=true;",
host, user, password)
db2 <-
DBI::dbConnect(drv, uri)
# Enviando tabela para o DB2
DBI::dbWriteTable(db2, "COVID19",
value = na.omit(covid19),
overwrite = TRUE)
# Fazendo consulas no DB2
dplyr::tbl(db2, "COVID19") |>
dplyr::filter(dplyr::between(DATE,
"2022-06-07",
"2022-06-15")) |>
dplyr::select(COUNTRY, DATE, VALUE)
# Source: SQL [9 x 3]
# Database: JDBCConnection
COUNTRY DATE VALUE
<chr> <chr> <dbl>
1 Brazil 2022-06-07 71045
2 Brazil 2022-06-08 49614
3 Brazil 2022-06-09 45073
4 Brazil 2022-06-10 56491
5 Brazil 2022-06-11 27796
6 Brazil 2022-06-12 11728
7 Brazil 2022-06-13 40173
8 Brazil 2022-06-14 44441
9 Brazil 2022-06-15 70290
DBI::
, RJDBC::
e dbplyr::
conectando com o DB2 na IBM Cloudtidymodels::
aprendizado supervisionadolibrary(tidymodels)
set.seed(555)
titanic <-
titanic::titanic_train |>
dplyr::mutate(Pclass = relevel(factor(Pclass, labels = c("1st", "2nd", "3rd")),
ref = "3rd"),
Survived = factor(Survived, labels = c("não", "sim")),
Sex = factor(Sex, levels = c("male", "female")))
# Separar 3/4 dos dados para treino
data_split <-
initial_split(titanic, prop = 3/4)
train_data <- training(data_split)
test_data <- testing(data_split)
# Ajuste do modelo
lr_mod <-
logistic_reg() |>
set_engine("glm")
lr_fit <-
lr_mod |>
fit(Survived ~ Sex + Pclass, data = train_data)
lr_fit |> broom::tidy() |> dplyr::mutate(exp(estimate))
# A tibble: 4 × 6
term estimate std.error statistic p.value `exp(estimate)`
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) -2.42 0.193 -12.5 8.35e-36 0.0893
2 Sexfemale 2.75 0.219 12.6 2.89e-36 15.6
3 Pclass1st 1.98 0.253 7.82 5.28e-15 7.22
4 Pclass2nd 1.19 0.261 4.56 5.01e- 6 3.29
tidymodels::
visualizando o modelonewdata <-
expand.grid(Pclass = c("1st", "2nd", "3rd"),
Sex = c("male", "female"))
pihat <-
(lr_fit |> predict(newdata, type = "prob")) |>
pull(.pred_sim)
newdata |> mutate(Pihat = pihat) |>
ggplot(aes(Sex, Pihat,
group = Pclass,
colour = Pclass)) +
geom_line() + geom_point() +
labs(x = "Sex", y = expression(pi(Survived == sim)),
colour = "Ticket Class") +
theme_minimal() +
theme(text = element_text(size = 22))
tidymodels::
avaliando o modelo# A tibble: 2 × 3
.metric .estimator .estimate
<chr> <chr> <dbl>
1 accuracy binary 0.762
2 f_meas binary 0.803
pins::
versionando recursos no cloud object storage (COS) na IBM CloudVersionando o modelo selecionado para a predição da probabilidade de sobreviver ao desastre do titanic.
# Obtendo o modelo
saved_lr_fit <- tidypredict::parse_model(lr_fit)
# Conexão com o COS
board <- pins::board_s3(bucket = Sys.getenv("COS_BUCKET"),
region = Sys.getenv("COS_REGION"),
access_key = Sys.getenv("COS_ACCESS_KEY_ID"),
secret_access_key = Sys.getenv("COS_SECRET_ACCESS_KEY"),
endpoint = Sys.getenv("COS_ENDPOINT"))
Lendo o modelo do COS e fazendo a predição
plumber::
criando api# plumber.R
#* @apiTitle Prediction Survived in Titanic Disaster
#* @param sex Sex
#* @param pclass Class
#* @post /predict
function(sex, pclass) {
board <- pins::board_s3(bucket = Sys.getenv("COS_BUCKET"),
region = Sys.getenv("COS_REGION"),
access_key = Sys.getenv("COS_ACCESS_KEY_ID"),
secret_access_key = Sys.getenv("COS_SECRET_ACCESS_KEY"),
endpoint = Sys.getenv("COS_ENDPOINT"))
model <- board |> pins::pin_read("titanic-model")
input <- data.frame(Sex = sex, Pclass = pclass)
pred <- tidypredict::tidypredict_to_column(input, model)
return(pred)
}
Executando a api
httr::
fazendo requisições[
{
"Sex": "female",
"Pclass": "1st",
"fit": 0.9097
}
]
[
{
"Sex": "male",
"Pclass": "3rd",
"fit": 0.082
}
]
quantmod::
preços de açõesdataStocks <- function(code, from, to) {
stock <-
quantmod::getSymbols(code,
src = "yahoo",
from = from,
to = to,
auto.assign = FALSE)
stock <-
stock |>
as.data.frame() |>
tibble::rownames_to_column(var = "Date") |>
dplyr::as_tibble() |>
dplyr::select(dplyr::matches("Date|Adjusted")) |>
dplyr::mutate(Date = Date |> lubridate::as_date()) |>
dplyr::rename(Adjusted = 2)
return(stock)
}
gtrendsR::
Google trendsdataStocks <- function(code, from, to) {
stock <-
quantmod::getSymbols(code,
src = "yahoo",
from = from,
to = to,
auto.assign = FALSE)
stock <-
stock |>
as.data.frame() |>
tibble::rownames_to_column(var = "Date") |>
dplyr::as_tibble() |>
dplyr::select(dplyr::matches("Date|Adjusted")) |>
dplyr::mutate(Date = Date |> lubridate::as_date()) |>
dplyr::rename(Adjusted = 2)
return(stock)
}
dataGtrends <- function(query, from, to) {
data_gtrends <-
gtrendsR::gtrends(keyword = c(query),
geo = "BR",
time = glue::glue("{from} {to}"),
onlyInterest = TRUE)
data_gtrends$interest_over_time |>
dplyr::as_tibble() |>
dplyr::mutate(Date = date |> lubridate::as_date()) |>
dplyr::select(Date, Hits = hits)
}
plotStocksGtrends <- function(stock, query, from, to) {
data_stocks <- dataStocks(stock, from, to)
data_gtrends <- dataGtrends(query, from, to)
coeff <- 50/data_stocks$Adjusted |> mean(na.rm = TRUE)
data_stocks |>
dplyr::mutate(Week = Date |> lubridate::week()) |>
dplyr::left_join(
data_gtrends |>
dplyr::mutate(Week = Date |> lubridate::week()) |>
dplyr::select(Week, Hits), by = "Week"
) |>
ggplot2::ggplot(ggplot2::aes(x = Date)) +
ggplot2::geom_smooth(ggplot2::aes(y = Adjusted, colour = "Stock Price")) +
ggplot2::geom_line(ggplot2::aes(y = Adjusted), colour = "gray") +
ggplot2::geom_smooth(ggplot2::aes(y = Hits / coeff, colour = "Google Trends")) +
ggplot2::scale_y_continuous(
name = "Adjusted Price",
sec.axis = ggplot2::sec_axis(~.*coeff, name = "Hits")) +
ggplot2::theme_minimal() +
ggplot2::labs(colour = "") +
ggplot2::theme(text = element_text(size = 20))
}
plotStocksGtrends("PETR4.SA", "Petrobras", "2020-10-05", "2021-10-05")
shiny::
criando aplicações webshiny::
app para obter preços de açõeslibrary(shiny)
library(shinythemes)
source("dataStocks.R")
start_value <- Sys.Date() - 31
end_value <- Sys.Date()
ui <- fluidPage(
theme = shinytheme("sandstone"),
navbarPage("STOCK PRICE"),
sidebarLayout(
sidebarPanel(
textInput("code", "Stock code", value = "PETR4.SA"),
dateRangeInput("date", "Date",
start = start_value,
end = end_value)
),
mainPanel(
plotly::plotlyOutput("plot")
)
)
)
server <- function(input, output, session) {
data_stock <- reactive({
validate(
need(input$code, "Please type a stock code!"),
need(input$date[1], "Please choose a start date!"),
need(input$date[2], "Please choose a end date!"))
dataStocks(input$code, input$date[1], input$date[2])
})
output$plot <- plotly::renderPlotly({
plotly::plot_ly(data_stock(),
x = ~Date,
y = ~Adjusted,
color = "orange",
mode = "lines",
type = "scatter") |>
plotly::rangeslider()
})
}
shinyApp(ui, server)
docker
Dockerfile
FROM openwhisk/dockerskeleton
ARG NOT_CRAN=true
ARG ARROW_R_DEV=true
RUN apk update && apk add R R-dev R-doc \
build-base libsodium-dev autoconf automake bash \
cmake g++ gcc make libxml2-dev
RUN R -e "install.packages(c('jsonlite', 'tidypredict', 'yaml', \
'pins', 'paws.storage'), repos = 'http://cran.rstudio.com/')"
Build
podman build -t docker.io/th1460/titanic-classifier .
Push para um registry
podman login -u <user> -p <password> docker.io/th1460/titanic-classifier
podman push docker.io/th1460/titanic-classifier
O serviço IBM Cloud Functions é uma plataforma de computação orientada a eventos, também conhecida como computação sem servidor ou como Function-as-a-Service (FaaS), que executa código em resposta a eventos ou chamadas diretas.
exec
#!/bin/bash
# run R script
chmod +x script.R # turn executable
echo "$@" > input.json # set input
./script.R # run script
script.R
#!/usr/bin/env Rscript
readRenviron(".Renviron")
# load model
board <- pins::board_s3(bucket = Sys.getenv("COS_BUCKET"),
region = Sys.getenv("COS_REGION"),
access_key = Sys.getenv("COS_ACCESS_KEY_ID"),
secret_access_key = Sys.getenv("COS_SECRET_ACCESS_KEY"),
endpoint = Sys.getenv("COS_ENDPOINT"))
model <- pins::pin_read(board, "titanic-model")
# input
input <- jsonlite::fromJSON("input.json", flatten = FALSE)
# predict
pred <- tidypredict::tidypredict_to_column(as.data.frame(input), model)
# output
jsonlite::stream_out(pred, verbose = FALSE)
Login na IBM Cloud
ibmcloud login -sso
ibmcloud target --cf
Empacotar os arquivos para o deploy
zip -r titanic.zip exec script.R .Renviron
Deploy
ibmcloud fn action create titanic-classifier titanic.zip \
--docker th1460/titanic-classifier --web true
Descobrindo qual é a url
ibmcloud fn action get titanic-classifier --url
rayshader::
mapas em 3D# Define bounding box with longitude/latitude coordinates
bbox <- list(
p2 = list(long = -43.2328217452992, lat = -22.99560928307949),
p1 = list(long = -43.133008448808454, lat = -22.930329210944166)
)
leaflet::leaflet() |>
leaflet::addTiles() |>
leaflet::addRectangles(
lng1 = bbox$p1$long, lat1 = bbox$p1$lat,
lng2 = bbox$p2$long, lat2 = bbox$p2$lat,
fillColor = "transparent"
) |>
leaflet::fitBounds(
lng1 = bbox$p1$long, lat1 = bbox$p1$lat,
lng2 = bbox$p2$long, lat2 = bbox$p2$lat,
)
rayshader::
mapas em 3Dsource("../src/define_image_size.R")
source("../src/get_arcgis_map_image.R")
image_size <- define_image_size(bbox, major_dim = 600)
# Fetch overlay image
overlay_file <- "../data/rj_overlay.png"
get_arcgis_map_image(bbox, map_type = "World_Imagery", file = overlay_file,
width = image_size$width, height = image_size$height,
sr_bbox = 4326)
overlay_img <- png::readPNG(overlay_file)
# Calculate rayshader layers
watermap <- rayshader::detect_water(elev_matrix)
# Plot 2D with texture
elev_matrix |>
rayshader::sphere_shade(texture = "imhof4") |>
rayshader::add_water(watermap, color = "imhof4") |>
rayshader::add_overlay(overlay_img, alphalayer = 0.5) |>
rayshader::plot_map()
rayshader::
mapas em 3D# Plot 3D
zscale <- 30
rgl::clear3d()
elev_matrix |>
rayshader::sphere_shade(texture = "imhof4") |>
rayshader::add_water(watermap, color = "imhof4") |>
rayshader::add_overlay(overlay_img, alphalayer = .8) |>
plot_3d(elev_matrix, zscale = zscale, windowsize = c(1500, 1200),
water = TRUE, soliddepth = -max(elev_matrix)/zscale, wateralpha = 1,
theta = 25, phi = 30, zoom = 0.65, fov = 60)
rayshader::render_snapshot("../data/rj-3d.png")
purrr::
programação funcionalFazendo loop com for
Fazendo loop com map_dbl
keras::dataset_imdb()
mineração de textoEstruturando a base de dados
keras::dataset_imdb_word_index()
mineração de textoDecodificando o review
word_index <- keras::dataset_imdb_word_index()
reverse_word_index <- names(word_index)
names(reverse_word_index) <- word_index
decoded_review <- function(x) {sapply(train_data[[x]], function(index) {
word <- if (index >= 3) reverse_word_index[[as.character(index - 3)]]
if (!is.null(word)) word else "?"
})}
data_decoded_review <-
seq(10000) |>
purrr::map(~ decoded_review(.x))
data_decoded_review <-
tibble::enframe(data_decoded_review,
name = "index",
value = "words")
data_decoded_review <-
data_decoded_review |>
dplyr::bind_cols(train_labels)
data_decoded_review |>
saveRDS("../data/data_decoded_review.RDS")
tidytext::
e wordcloud::
data_decoded_review <- readRDS("../data/data_decoded_review.RDS")
set.seed(1984)
data_decoded_review |>
tidyr::unnest(words) |>
dplyr::anti_join(tidytext::stop_words, by = c("words" = "word")) |>
dplyr::filter(! words %in% c("?", "br", "1", "2",
"3", "4", "7", "8",
"9", "10")) |>
dplyr::count(words, y, sort = TRUE) |>
dplyr::mutate(y = ifelse(y == 0, "Negativo", "Positivo")) |>
reshape2::acast(words ~ y, value.var = "n", fill = 0) |>
wordcloud::comparison.cloud(colors = c("#E24664", "#1D6CE6"),
max.words = 200,
title.size = 4,
scale = c(5, 2))
tensorflow::
deep learningimdb <- keras::dataset_imdb(num_words = 10000)
set.seed(1984)
train_labels <-
dplyr::tibble(y = imdb$train$y, id = 1:25000) |>
dplyr::group_by(y) |>
dplyr::sample_n(size = 5000) |>
dplyr::ungroup()
train_data <-
train_labels |>
dplyr::pull(id) |>
purrr::map(~ imdb$train$x[[.x]])
saveRDS(list(data = train_data,
labels = train_labels), "../data/train_imdb.RDS")
test_labels <-
dplyr::tibble(y = imdb$test$y, id = 1:25000) |>
dplyr::group_by(y) |>
dplyr::sample_n(size = 5000) |>
dplyr::ungroup()
test_data <-
test_labels |>
dplyr::pull(id) |>
purrr::map(~ imdb$test$x[[.x]])
saveRDS(list(data = test_data,
labels = test_labels), "../data/test_imdb.RDS")
tensorflow::
deep learningvectorize_sequences <- function(sequences, dimension = 10000) {
# Creates an all-zero matrix of shape (length(sequences), dimension)
results <- matrix(0, nrow = length(sequences), ncol = dimension)
for (i in 1:length(sequences))
# Sets specific indices of results[i] to 1s
results[i, sequences[[i]]] <- 1
results
}
train_imdb <- readRDS("../data/train_imdb.RDS")
test_imdb <- readRDS("../data/test_imdb.RDS")
# tranformando os reviews (inputs) em um vetor de 0s e 1s
x_train <- vectorize_sequences(train_imdb$data)
x_test <- vectorize_sequences(test_imdb$data)
# transformando os outpus em numérico
y_train <- as.numeric(train_imdb$labels |> pull(y))
y_test <- as.numeric(train_imdb$labels |> pull(y))
tensorflow::
deep learningtensorflow::
deep learning loss accuracy
0.350753 0.870200
tensorflow::
deep learning# codificação
coded_review <- function(review){
word_index <- keras::dataset_imdb_word_index()
reverse_word_index <- unlist(word_index)
words <-
(review |>
tolower() |>
strsplit(split = " "))[[1]] |>
gsub("[!,:]", "", x = _)
index <- reverse_word_index[names(reverse_word_index) %in% words]
return(list(as.numeric(index) + 3))
}
(exemplo1 <-
"The film is best, great, beautiful and wonderful!" |>
coded_review()) |> str()
bbox <- list(
p1 = list(long = -47.0708, lat = -22.9075),
p2 = list(long = -47.0348, lat = -22.8804)
)
leaflet::leaflet() |>
leaflet::addTiles()|>
leaflet::addRectangles(
lng1 = bbox$p1$long, lat1 = bbox$p1$lat,
lng2 = bbox$p2$long, lat2 = bbox$p2$lat,
fillColor = "transparent"
) |>
leaflet::fitBounds(
lng1 = bbox$p1$long, lat1 = bbox$p1$lat,
lng2 = bbox$p2$long, lat2 = bbox$p2$lat,
)
osmdata
if (!exists("has_internet_via_proxy")) {
assign("has_internet_via_proxy", TRUE, environment(curl::has_internet))
}
# Consulta
query <- osmdata::opq(bbox = unlist(bbox)) |>
osmdata::add_osm_feature(key = "highway") |>
osmdata::osmdata_sf()
# Retornando linhas
query_lines <- query$osm_lines
# Visualizando o objeto
query_lines |>
sf::st_geometry() |>
ggplot2::ggplot() +
ggplot2::geom_sf(colour = "gray") +
ggplot2::theme_minimal() +
ggplot2::theme(text = element_text(size = 15))
sfnetwork
poly_to_lines <- sf::st_cast(query$osm_polygons, "LINESTRING")
query_lines <- dplyr::bind_rows(query_lines, poly_to_lines)
highway_net <- sfnetworks::as_sfnetwork(query_lines, directed = FALSE)
# Remove nodes that have only two edges connected
highway_simple <- tidygraph::convert(highway_net, sfnetworks::to_spatial_smooth)
# Filter connected components
connected_net <- highway_simple |>
tidygraph::activate("nodes") |>
dplyr::filter(tidygraph::group_components() == 1)
# Plot
ggplot2::ggplot() +
ggplot2::geom_sf(data = sf::st_as_sf(connected_net, "edges"), col = "gray") +
ggplot2::geom_sf(data = sf::st_as_sf(connected_net, "nodes"), col = "gray") +
ggplot2::theme_minimal() +
ggplot2::theme(text = element_text(size = 15))
São 3 veículos e preciso selecionar a melhor rota. Eles devem sair e retornar a um depósito.
set.seed(711)
index_visits <- sample(1:603, 9)
visits <- connected_net |>
tidygraph::activate("nodes") |>
dplyr::slice(index_visits)
depot <- connected_net |>
tidygraph::activate("nodes") |>
dplyr::slice(23)
ggplot2::ggplot() +
ggplot2::geom_sf(data = sf::st_as_sf(connected_net, "edges"), col = "gray") +
ggplot2::geom_sf(data = sf::st_as_sf(connected_net, "nodes"), col = "gray") +
ggplot2::theme_minimal() +
ggplot2::theme(text = element_text(size = 15)) +
ggplot2::geom_sf(data = sf::st_as_sf(visits, "nodes"),
ggplot2::aes(col = "Visits"), size = 6) +
ggplot2::geom_sf(data = sf::st_as_sf(depot, "nodes"),
ggplot2::aes(col = "Depot"), size = 6) +
ggplot2::labs(x = "", y = "", colour = "") +
ggplot2::scale_color_manual(values = c("Visits" = "blue",
"Depot" = "orange"))
Cálculo da matriz de custo
connected_net <-
connected_net |>
tidygraph::activate("edges") |>
dplyr::mutate(weight = sfnetworks::edge_length())
cost_matrix_route_problem <-
sfnetworks::st_network_cost(connected_net,
from = c(23, index_visits),
to = c(23, index_visits),
weights = "weight")
mode(cost_matrix_route_problem) <- "integer"
cost_matrix_route_problem
Units: [m]
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] 0 3016 3476 308 3118 3307 4848 1631 3902 4196
[2,] 3016 0 3335 2708 2424 4025 6296 2764 1059 5639
[3,] 3476 3335 0 3167 4183 4899 4506 2152 4175 4939
[4,] 308 2708 3167 0 3187 2999 4540 1323 3593 3888
[5,] 3118 2424 4183 3187 0 3165 5855 3818 3264 5128
[6,] 3307 4025 4899 2999 3165 0 4941 3055 4910 5375
[7,] 4848 6296 4506 4540 5855 4941 0 5326 7150 2709
[8,] 1631 2764 2152 1323 3818 3055 5326 0 3649 4844
[9,] 3902 1059 4175 3593 3264 4910 7150 3649 0 6479
[10,] 4196 5639 4939 3888 5128 5375 2709 4844 6479 0
ortools
e reticulate::
reticulate::use_virtualenv("../venv", required = TRUE)
constraint_solver <- reticulate::import("ortools.constraint_solver")
# Create data
create_data_model <- function() {
data <- reticulate::dict()
data['distance_matrix'] <- cost_matrix_route_problem
data['distance_matrix'] <- data['distance_matrix']$tolist()
data['num_vehicles'] <- 3L
data['depot'] <- 0L
return(data)
}
# Create the routing model
data <- create_data_model()
manager <- constraint_solver$pywrapcp$RoutingIndexManager(
length(data['distance_matrix']),
data['num_vehicles'],
data['depot']
)
routing <- constraint_solver$pywrapcp$RoutingModel(manager)
# Create the distance callback
distance_callback <- function(from_index, to_index) {
from_node <- manager$IndexToNode(from_index)
to_node <- manager$IndexToNode(to_index)
return(data['distance_matrix'][from_node][to_node])
}
transit_callback_index <- routing$RegisterTransitCallback(distance_callback)
# Set the cost of travel
routing$SetArcCostEvaluatorOfAllVehicles(transit_callback_index)
# Add a distance dimension
dimension_name <- 'Distance'
routing$AddDimension(
transit_callback_index,
0L, # no slack
15000L, # vehicle maximum travel distance
TRUE, # start cumul to zero
dimension_name)
distance_dimension <- routing$GetDimensionOrDie(dimension_name)
distance_dimension$SetGlobalSpanCostCoefficient(100L)
# Set search parameters
search_parameters <- constraint_solver$pywrapcp$DefaultRoutingSearchParameters()
search_parameters$first_solution_strategy = (
constraint_solver$routing_enums_pb2$FirstSolutionStrategy$PATH_CHEAPEST_ARC)
solution <- routing$SolveWithParameters(search_parameters)
# Save routes to a list or array
get_routes <- function(solution, routing, manager) {
routes <- list()
for (route_nbr in seq(0, routing$vehicles() - 1)) {
index <- routing$Start(route_nbr)
route <- c(manager$IndexToNode(index))
while(!routing$IsEnd(index)) {
index <- solution$Value(routing$NextVar(index))
route <- append(route, manager$IndexToNode(index))
}
routes[[route_nbr + 1]] <- route
names(routes)[route_nbr + 1] <- route_nbr
}
return(routes)
}
(routes <- get_routes(solution, routing, manager))
$`0`
[1] 0 6 9 0
$`1`
[1] 0 3 2 5 0
$`2`
[1] 0 7 1 8 4 0
real_routes <- lapply(readRDS("../data/routes.RDS"), \(x) c(23, index_visits)[x + 1])
vrp_paths <- function(x) {
results <- list()
for (i in seq(length(x))) {
from_idxs <- unique(x[[i]])
to_idxs <- c(from_idxs[2:length(from_idxs)], from_idxs[1])
paths <- mapply(sfnetworks::st_network_paths,
from = from_idxs,
to = to_idxs,
MoreArgs = list(x = connected_net, weights = "weight"))
results[[i]] <- paths
}
return(results)
}
# Plot results
vrp_paths_fun <- function(x, index, vehicle) {
vrp_edge_paths <- connected_net |>
tidygraph::activate("edges") |>
dplyr::slice(vrp_paths(x)[[vehicle]][, index]$edge_paths |> unlist())
ggplot2::geom_sf(data = sf::st_as_sf(vrp_edge_paths, "edges"),
ggplot2::aes(col = as.character(vehicle)), lwd = 1.3)
}
p <- ggplot2::ggplot() +
ggplot2::geom_sf(data = sf::st_as_sf(connected_net, "edges"), col = "gray") +
ggplot2::geom_sf(data = sf::st_as_sf(connected_net, "nodes"), col = "gray") +
ggplot2::theme_minimal()
for (j in seq(length(real_routes))) {
for (i in seq(length(real_routes[[j]]) - 1)) {
p <- p + vrp_paths_fun(real_routes, i, j)
}
}
p <- p + ggplot2::geom_sf(data = sf::st_as_sf(visits, "nodes"),
ggplot2::aes(col = "Visits"), size = 6) +
ggplot2::geom_sf(data = sf::st_as_sf(depot, "nodes"),
ggplot2::aes(col = "Depot"), size = 6) +
ggplot2::labs(x = "", y = "", colour = "") +
ggplot2::theme(text = element_text(size = 15))
plotly::ggplotly(p, tooltip = "") |>
plotly::layout(modebar = list(orientation = "v"))
th1460.github.io
github.com/th1460
medium.com/@thopr
linkedin.com/in/thop
slack: @thop