From 750ec5c719328f8d14d8faa441ba38fcae88772b Mon Sep 17 00:00:00 2001 From: Arthur DANJOU Date: Thu, 14 Nov 2024 15:22:21 +0100 Subject: [PATCH] Add TP1 of Data Analysis --- M1/Data Analysis/TP1/TP1.rmd | 120 ++++++++++++++++++++++ M1/Data Analysis/TP1/Temperature Data.csv | 16 +++ 2 files changed, 136 insertions(+) create mode 100644 M1/Data Analysis/TP1/TP1.rmd create mode 100644 M1/Data Analysis/TP1/Temperature Data.csv diff --git a/M1/Data Analysis/TP1/TP1.rmd b/M1/Data Analysis/TP1/TP1.rmd new file mode 100644 index 0000000..153a705 --- /dev/null +++ b/M1/Data Analysis/TP1/TP1.rmd @@ -0,0 +1,120 @@ +```{r} +setwd('/Users/arthurdanjou/Workspace/studies/M1/Data Analysis/TP1') +``` + +# Part 1 - Analysis of the data + +```{r} +x <- c(1, 2, 3, 4) +mean(x) + +y <- x - mean(x) +mean(y) +t(y) + +sum(y^2) +``` + +```{r} +T <- read.table("Temperature Data.csv", header = TRUE, sep = ";", dec = ",", row.names = 1) +n <- nrow(T) + +g <- colMeans(T) +Y <- as.matrix(T - rep(1, n) %*% t(g)) + +Dp <- diag(1 / n, n) + +V <- t(Y) %*% Dp %*% Y + +eigen_values <- eigen(V)$values +vectors <- eigen(V)$vectors +total_inertia <- sum(eigen_values) +inertia_one <- max(eigen_values) / sum(eigen_values) +inertia_plan <- (eigen_values[1] + eigen_values[2]) / sum(eigen_values) + +P <- Y %*% vectors[, 1:2] + +plot(P, pch = 19, xlab = "PC1", ylab = "PC2") +text(P, rownames(T), cex = 0.7, pos = 3) +axis(1, -10:10, pos = 0, labels = F) +axis(2, -5:5, pos = 0, labels = F) +``` + +```{r} +France <- P %*% matrix(c(0, -1, 1, 0), 2, 2) +plot(France, pch = 19, xlab = "PC1", ylab = "PC2") +text(France, rownames(T), cex = 0.7, pos = 3) +axis(1, -10:10, pos = 0, labels = F) +axis(2, -5:5, pos = 0, labels = F) +``` +```{r} +results <- matrix(NA, nrow = n, ncol = 2) +colnames(results) <- c("Quality of Representation (%)", "Contribution to Inertia (%)") +rownames(results) <- rownames(T) + +for (i in 1:n) { + yi <- Y[i,] + norm_yi <- sqrt(sum(yi^2)) + qlt <- sum((yi %*% vectors[, 1:2])^2) / norm_yi^2 * 100 + ctr <- (P[i, 1]^2 / eigen_values[1]) / n * 100 + results[i,] <- c(qlt, ctr) +} + +# Add the total row +results <- rbind(results, colSums(results)) +rownames(results)[n + 1] <- "Total" + +results +``` + +# Part 2 - PCA with FactoMineR + +```{r} +library(FactoMineR) +T <- read.csv("Temperature Data.csv", header = TRUE, sep = ";", dec = ",", row.names = 1) +summary(T) +``` + +```{r} +T.pca <- PCA(T, graph = F) +plot.PCA(T.pca, axes = c(1, 2), habillage = 1, choix = "ind") +plot.PCA(T.pca, axes = c(1, 2), habillage = 1, choix = "var") + +print("Var coords") +round(T.pca$var$coord[, 1:2], 2) + +print("Eigen values") +round(T.pca$eig, 2) + +print("Ind dis") +round(T.pca$ind$dist, 2) + +print("Ind contrib") +round(T.pca$ind$contrib[, 1:2], 2) + +print("Var contrib") +round(T.pca$var$contrib[, 1:2], 2) +``` + +## We add new values +```{r} +Amiens <- c(3.1, 3.8, 6.7, 9.5, 12.8, 15.8, 17.6, 17.6, 15.5, 11.1, 6.8, 4.2) +T <- rbind(T, Amiens) +row.names(T)[16] <- "Amiens" + +Moscow <- c(-9.2, -8, -2.5, 5.9, 12.8, 16.8, 18.4, 16.6, 11.2, 4.9, -1.5, -6.2) +T <- rbind(T, Moscow) +row.names(T)[17] <- "Moscow" + +Marrakech <- c(11.3, 12.8, 15.8, 18.1, 21.2, 24.7, 28.6, 28.6, 25, 20.9, 15.9, 12.1) +T <- rbind(T, Marrakech) +row.names(T)[18] <- "Marrakech" +``` + +## We redo the PCA + +```{r} +T.pca <- PCA(T, ind.sup = 16:18, graph = F) +plot.PCA(T.pca, axes = c(1, 2), habillage = 1, choix = "ind") +plot.PCA(T.pca, axes = c(1, 2), habillage = 1, choix = "var") +``` diff --git a/M1/Data Analysis/TP1/Temperature Data.csv b/M1/Data Analysis/TP1/Temperature Data.csv new file mode 100644 index 0000000..b7b7fcf --- /dev/null +++ b/M1/Data Analysis/TP1/Temperature Data.csv @@ -0,0 +1,16 @@ +Ville;janv;fev;mars;avril;mai;juin;juil;aout;sept;oct;nov;dec +Bordeaux;5,6;6,6;10,3;12,8;15,8;19,3;20,9;21,0;18,6;13,8;9,1;6,2 +Brest;6,1;5,8;7,8;9,2;11,6;14,4;15,6;16,0;14,7;12,0;9,0;7,0 +Clermont-Ferrand;2,6;3,7;7,5;10,3;13,8;17,3;19,4;19,1;16,2;11,2;6,6;3,6 +Grenoble;1,5;3,2;7,7;10,6;14,5;17,8;20,1;19,5;16,7;11,4;6,5;2,3 +Lille;2,4;2,9;6,0;8,9;12,4;15,3;17,1;17,1;14,7;10,4;6,1;3,5 +Lyon;2,1;3,3;7,7;10,9;14,9;18,5;20,7;20,1;16,9;11,4;6,7;3,1 +Marseille;5,5;6,6;10,0;13,0;16,8;20,8;23,3;22,8;19,9;15,0;10,2;6,9 +Montpellier;5,6;6,7;9,9;12,8;16,2;20,1;22,7;22,3;19,3;14,6;10,0;6,5 +Nantes;5,0;5,3;8,4;10,8;13,9;17,2;18,8;18,6;16,4;12,2;8,2;5,5 +Nice;7,5;8,5;10,8;13,3;16,7;20,1;22,7;22,5;20,3;16,0;11,5;8,2 +Paris;3,4;4,1;7,6;10,7;14,3;17,5;19,1;18,7;16,0;11,4;7,1;4,3 +Rennes;4,8;5,3;7,9;10,1;13,1;16,2;17,9;17,8;15,7;11,6;7,8;5,4 +Strasbourg;0,4;1,5;5,6;9,8;14,0;17,2;19,0;18,3;15,1;9,5;4,9;1,3 +Toulouse;4,7;5,6;9,2;11,6;14,9;18,7;20,9;20,9;18,3;13,3;8,6;5,5 +Vichy;2,4;3,4;7,1;9,9;13,6;17,1;19,3;18,8;16,0;11,0;6,6;3,4