df <- CleandLaptopData |>
tibble()
## Due to having too many missing values in the display size, I decided to replace it with average values
mean_dps <- df |>
select(display_size) |>
filter(display_size != "Missing") |>
unlist() |>
as.numeric()
mean_dps <- as.character(round(mean(mean_dps), 2))
## To select important product specifications that can be used for analyzing
df <- df |>
select(brand, model,
processor_brand, processor_name,
ram_gb, ram_type, ssd, hdd,
os, os_bit, graphic_card_gb,
weight, display_size, Touchscreen,
latest_price, star_rating) |>
mutate(display_size = replace(
display_size, display_size == "Missing", mean_dps))
## Converting to the suitable data types
df$display_size <- as.double(df$display_size) ##double for display_size
df[, -c(13, 15, 16)] <- lapply(df[, -c(13, 15, 16)], as.factor) ##factor for others
The purpose of analyzing this time is to find the preferred components that customers gave a rating more than or equal to 4.0 stars. Instead of simply sorting from the ranking, I chose to sort from 5 brands that have the most product lists in this dataset to get enough information and bring to preferred factors accurately.
## To sort the most numbers of 5 brands in the dataset
freq_b <- table(df$brand)
sorted_b <- sort(freq_b, decreasing = TRUE)
print(names(sorted_b)[1:5])
## Results
[1] "ASUS" "DELL" "Lenovo" "HP" "acer"
# convert brands to lowercase
df$brand <- as.factor(tolower(df$brand))
## To sort by rating
df <- df[order(df$star_rating, decreasing = TRUE), ]
## To filter only top five frequent brands
top5df <- df |>
filter(brand %in% c("asus", "dell", "lenovo", "hp", "acer"))
85% of this dataset was recorded by these five brands; Asus, Dell, Lenovo, HP, and Acer accordingly.
## To find processor and ram type in products that get rating >= 4.0
pop_pcsr <- top5df |>
filter(star_rating >= 4.0) |>
select(3,4,6) |>
group_by(processor_brand, processor_name, ram_type) |>
summarise(n=n(), .groups = "drop") |>
arrange(desc(n))
Laptops that got more than or equal to 4.0 ratings from customers have processor models including Ryzen 5,7,3, and 9 from AMD. Meanwhile, processors from Intel including Core i5,i3, and i7 that are the most voted processors from filtered data.
## To analyze suitable display size data
## Average and quartile display size
fstar <- top5df |>
filter(star_rating >= 4.0)
## Average display size
avg_dis <- fstar |>
summarise(avg_dps = mean(display_size))
## Results of average display size
1 15.2
## Quantile value of display size
q_dis <- fstar |>
select(display_size)
q_dis <- quantile(q_dis, probs = c(0, .25, .5, .75, 1), na.rm = T)
## Results of all five brands
0% 25% 50% 75% 100%
13.00 15.12 15.12 15.60 17.30
For the touchscreen function, data reflects that customers do not prioritize that much. Even each laptop does not have a touchscreen for 87.90%, they still got the mentioned rating star.
## Touchscreen ratio
t_dis <- fstar |> count(Touchscreen, sort = T) |>
mutate(percent = n/sum(n)*100)
This dataset did not specify the currency of all products. For the average price is $67,144.00, meanwhile, the first to the third quartiles are $44,517.25 to $75,997.50 that still got the mentioned rating star.
## Average latest product prices
avg_p <- fstar |>
summarise(avg_p = mean(latest_price))
## Result for average latest product prices
1 67144
## Quantile value of latest product prices
q_p <- fstar |>
select(latest_price)
q_p <- quantile(q_p, probs = c(0, .25, .5, .75, 1), na.rm = T)
## Results of all five brands
0% 25% 50% 75% 100%
19990.00 44517.25 59490.00 75997.50 441990.00