This repository has been archived by the owner on Oct 9, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.R
94 lines (73 loc) · 3.06 KB
/
main.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# LuisFillipeEmidio_RM559976_fase2_cap7
# EdersonLuizBadecadosSantos_RM560204_fase2_cap7
# CaioRodriguesCastro_RM559766_fase2_cap7
# FelipeSoaresNascimento_RM560151_fase2_cap7
# LucasFerreiraHillesheim_RM559319_fase2_cap7
if(!require("ggplot2")) install.packages("ggplot2")
if(!require("readxl")) install.packages("readxl")
if(!require("dplyr")) install.packages("dplyr")
options(encoding = "UTF-8")
library(ggplot2)
library(dplyr)
library(readxl)
cat("# Exploratory Data Analysis (EDA)\n")
cat("\n")
## 1. Read data from a CSV file
df <- readxl::read_xlsx("data.xlsx")
production <- df$`Produção (Toneladas)`
cat("Data loaded successfully from the XLSX file: `./data.xlsx`.\n")
cat("Column selected: Produção (Toneladas)\n")
cat("\n")
## 2. Exploratory Data Analysis
# Measures of Central Tendency
mean_yield <- as.numeric(mean(production))
median_yield <- as.numeric(median(production))
mode_yield <- as.numeric(names(sort(table(production), decreasing = TRUE)[1]))
cat("## Measures of Central Tendency\n\n")
cat("Mean:", mean_yield, "\n")
cat("Median:", median_yield, "\n")
cat("Mode:", mode_yield, "\n")
cat("\n")
# Measures of Dispersion
sd_yield <- sd(production) # Standard deviation
variance_yield <- var(production) # Variance
range_yield <- range(production) # Range (minimum and maximum)
amplitude_yield <- diff(range_yield) # Absolute range
cat("## Measures of Dispersion\n\n")
cat("Standard Deviation:", sd_yield, "\n")
cat("Variance:", variance_yield, "\n")
cat("Range:", amplitude_yield, "\n")
cat("\n")
# Separating Measures
quartiles_yield <- quantile(production, probs = c(0.25, 0.5, 0.75)) # Quartiles
percentiles_yield <- quantile(production, probs = seq(0.1, 1, by = 0.1)) # Deciles or Percentiles
cat("## Separating Measures\n\n")
cat("Quartiles:", quartiles_yield, "\n")
cat("Percentiles (Deciles):", percentiles_yield, "\n")
cat("\n")
## 3. Data Visualization
## Pie chart for the production (in percentage) of each culture type
total_production <- df %>%
group_by(`Tipo de Cultura`) %>%
summarise(total_production = sum(`Produção (Toneladas)`))
# Calcular o percentual de produção total para cada tipo de cultura
total_production <- total_production %>%
mutate(percentage = total_production / sum(total_production) * 100)
# Create a barchart
ggplot(total_production, aes(x = reorder(`Tipo de Cultura`, percentage), y = percentage, fill = `Tipo de Cultura`)) +
geom_bar(stat = "identity") +
coord_flip() +
labs(title = "Distribuicao da Producao por Tipo de Cultura",
subtitle = "Analise da producao agricola em toneladas",
caption = "Fonte: Dados extraidos de 'data.xlsx'",
x = "Tipo de Cultura",
y = "Percentual de Producao (%)") +
theme_minimal() +
theme(legend.position = "none",
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)) +
geom_text(aes(label = paste(round(percentage, 2), "%")),
hjust = -0.1,
color = "black")
cat("Insights:\n")
cat("- O tipo de cultura com maior producao representa", max(total_production$percentage), "% da producao total.\n")