Skip to content

Commit 9527d29

Browse files
committedMar 7, 2025
first commit
0 parents  commit 9527d29

4 files changed

+28249
-0
lines changed
 

‎.DS_Store

6 KB
Binary file not shown.

‎MAT2775 Projet1 rapport.pdf

966 KB
Binary file not shown.

‎Projet1_mat2775.R

+347
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,347 @@
1+
### ETAPE 2
2+
3+
4+
##importation des donnees
5+
data <- read.csv("/Users/keliane/Downloads/StudentDepressionDataset.csv",header=T)
6+
head(data,15)
7+
str(data)
8+
print(dim(data))
9+
10+
## nettoyage des donnes
11+
sum(is.na(data))
12+
data_clean <- na.omit(data)
13+
14+
#Convertir variables categorielles
15+
ID_fct <- factor(data$id)
16+
gender_fct <- factor(data$Gender)
17+
city_fct <- factor(data$City)
18+
profession_fct <- factor(data$Profession)
19+
Sleep_duration_fct <- factor(data$Sleep.Duration)
20+
dietary_habits_fct <- factor(data$Dietary.Habits)
21+
family_history_fct <- factor(data$Family.History.of.Mental.Illness)
22+
suicidal_thoughts_fct <- factor(data$Have.you.ever.had.suicidal.thoughts)
23+
degree_fct <- factor(data$Degree)
24+
depression_status_fct <- factor(data$Depression)
25+
26+
ID_fct
27+
gender_fct
28+
city_fct
29+
profession_fct
30+
Sleep_duration_fct
31+
dietary_habits_fct
32+
family_history_fct
33+
suicidal_thoughts_fct
34+
degree_fct
35+
depression_status_fct
36+
37+
38+
39+
### ETAPE 3 : Analyse et exploration des donnees
40+
41+
##Statistiques Descriptives
42+
43+
#colonne Age
44+
mean_age <- mean(data$Age, na.rm = TRUE)
45+
median_age <- median(data$Age, na.rm = TRUE)
46+
variance_age <- var(data$Age, na.rm = TRUE)
47+
sd_age <- sd(data$Age, na.rm = TRUE)
48+
min_age <- min(data$Age, na.rm = TRUE)
49+
max_age <- max(data$Age, na.rm = TRUE)
50+
quartiles_age <- quantile(data$Age, probs = c(0, 0.25, 0.5, 0.75, 1), na.rm = TRUE)
51+
summary_stats_age <- summary(data$Age)
52+
53+
print(summary_stats_age)
54+
55+
56+
#colonne CGPA
57+
mean_cgpa <- mean(data$CGPA, na.rm = TRUE)
58+
median_CGPA <- median(data$CGPA, na.rm = TRUE)
59+
variance_CGPA <- var(data$CGPA, na.rm = TRUE)
60+
sd_CGPA <- sd(data$CGPA, na.rm = TRUE)
61+
min_CGPA <- min(data$CGPA, na.rm = TRUE)
62+
max_CGPA <- max(data$CGPA, na.rm = TRUE)
63+
quartiles_CGPA <- quantile(data$CGPA, probs = c(0, 0.25, 0.5, 0.75, 1), na.rm = TRUE)
64+
summary_stats_CGPA <- summary(data$CGPA)
65+
66+
print(summary_stats_CGPA)
67+
68+
69+
70+
71+
72+
#colonne Study Satisfaction
73+
mean_Study_satisfaction <- mean(data$Study.Satisfaction, na.rm = TRUE)
74+
median_Study_satisfaction <- median(data$Study.Satisfaction, na.rm = TRUE)
75+
variance_Study_satisfaction <- var(data$Study.Satisfaction, na.rm = TRUE)
76+
sd_Study_satisfaction <- sd(data$Study.Satisfaction, na.rm = TRUE)
77+
min_Study_satisfaction <- min(data$Study.Satisfaction, na.rm = TRUE)
78+
max_Study_satisfaction <- max(data$Study.Satisfaction, na.rm = TRUE)
79+
quartiles_Study_satisfaction <- quantile(data$Study.Satisfaction, probs = c(0, 0.25, 0.5, 0.75, 1), na.rm = TRUE)
80+
summary_stats_Study_satisfaction <- summary(data$Study.Satisfaction)
81+
82+
print(summary_stats_Study_satisfaction)
83+
84+
85+
#colonne Job Satisfaction
86+
mean_Job_Satisfaction <- mean(data$Job.Satisfaction, na.rm = TRUE)
87+
median_Job_Satisfaction <- median(data$Job.Satisfaction, na.rm = TRUE)
88+
variance_Job_Satisfaction <- var(data$Job.Satisfaction, na.rm = TRUE)
89+
sd_Job_Satisfaction <- sd(data$Job.Satisfaction, na.rm = TRUE)
90+
min_Job_Satisfaction <- min(data$Job.Satisfaction, na.rm = TRUE)
91+
max_Job_Satisfaction <- max(data$Job.Satisfaction, na.rm = TRUE)
92+
quartiles_Job_Satisfaction <- quantile(data$Job.Satisfaction, probs = c(0, 0.25, 0.5, 0.75, 1), na.rm = TRUE)
93+
summary_stats_Job_Satisfaction <- summary(data$Job.Satisfaction)
94+
95+
print(summary_stats_Job_Satisfaction)
96+
97+
98+
99+
100+
101+
#colonne Academic Pressure
102+
mean_Academic_Pressure <- mean(data$Academic.Pressure, na.rm = TRUE)
103+
median_Academic_Pressure <- median(data$Academic.Pressure, na.rm = TRUE)
104+
variance_Academic_Pressure <- var(data$Academic.Pressure, na.rm = TRUE)
105+
sd_Academic_Pressure <- sd(data$Academic.Pressure, na.rm = TRUE)
106+
min_Academic_Pressure <- min(data$Academic.Pressure, na.rm = TRUE)
107+
max_Academic_Pressure <- max(data$Academic.Pressure, na.rm = TRUE)
108+
quartiles_Academic_Pressure <- quantile(data$Academic.Pressure, probs = c(0, 0.25, 0.5, 0.75, 1), na.rm = TRUE)
109+
summary_stats_Academic_Pressure <- summary(data$Academic.Pressure)
110+
111+
print(summary_stats_Academic_Pressure)
112+
113+
114+
#colonne Work Pressure
115+
mean_Work_Pressure <- mean(data$Work.Pressure, na.rm = TRUE)
116+
median_Work_Pressure <- median(data$Work.Pressure, na.rm = TRUE)
117+
variance_Work_Pressure <- var(data$Work.Pressure, na.rm = TRUE)
118+
sd_Work_Pressure <- sd(data$Work.Pressure, na.rm = TRUE)
119+
min_Work_Pressure <- min(data$Work.Pressure, na.rm = TRUE)
120+
max_Work_Pressure <- max(data$Work.Pressure, na.rm = TRUE)
121+
quartiles_Work_Pressure <- quantile(data$Work.Pressure, probs = c(0, 0.25, 0.5, 0.75, 1), na.rm = TRUE)
122+
summary_stats_Work_Pressure <- summary(data$Work.Pressure)
123+
124+
print(summary_stats_Work_Pressure)
125+
126+
127+
128+
#colonne Financial Stress
129+
mean_Financial_stress <- mean(data$Financial.Stress, na.rm = TRUE)
130+
median_Financial_stress <- median(data$Financial.Stress, na.rm = TRUE)
131+
variance_Financial_stress <- var(data$Financial.Stress, na.rm = TRUE)
132+
sd_Financial_stress <- sd(data$Financial.Stress, na.rm = TRUE)
133+
min_Financial_stress <- min(data$Financial.Stress, na.rm = TRUE)
134+
max_Financial_stress <- max(data$Financial.Stress, na.rm = TRUE)
135+
quartiles_Financial_stress <- quantile(data$Financial.Stress, probs = c(0, 0.25, 0.5, 0.75, 1), na.rm = TRUE)
136+
summary_stats_Financial_stress <- summary(data$Financial.Stress)
137+
138+
print(summary_stats_Financial_stress)
139+
140+
#correlation
141+
correlation_matrix <- cor(data[, c("Age", "CGPA", "Study.Satisfaction", "Job.Satisfaction", "Academic.Pressure", "Work.Pressure", "Financial.Stress")], use = "complete.obs")
142+
print(correlation_matrix)
143+
144+
145+
146+
147+
## Visualisation
148+
149+
#Données catégorielles : Diagrammes en barres (geom_bar())
150+
library(ggplot2)
151+
ggplot(data, aes(x = gender_fct)) +
152+
geom_bar(fill = "blue") +
153+
labs(title = "Distribution of Gender", x = "Gender", y = "Count")
154+
155+
ggplot(data, aes(x = city_fct)) +
156+
geom_bar(fill = "green") +
157+
labs(title = "Distribution by City", x = "City", y = "Count") +
158+
theme(axis.text.x = element_text(angle = 90, hjust = 1)) # Rotation des labels si nécessaire
159+
160+
ggplot(data, aes(x = profession_fct)) +
161+
geom_bar(fill = "red") +
162+
labs(title = "Distribution of Profession", x = "Profession", y = "Count")
163+
164+
# Pour Sleep Duration
165+
ggplot(data, aes(x = Sleep_duration_fct)) +
166+
geom_bar(fill = "purple") +
167+
labs(title = "Distribution of Sleep Duration", x = "Sleep Duration", y = "Count")
168+
169+
# Pour Dietary Habits
170+
ggplot(data, aes(x = dietary_habits_fct)) +
171+
geom_bar(fill = "orange") +
172+
labs(title = "Distribution of Dietary Habits", x = "Dietary Habits", y = "Count")
173+
174+
# Pour Family History of Mental Illness
175+
ggplot(data, aes(x = family_history_fct)) +
176+
geom_bar(fill = "cyan") +
177+
labs(title = "Family History of Mental Illness", x = "Family History", y = "Count")
178+
179+
# Pour Suicidal Thoughts
180+
ggplot(data, aes(x = suicidal_thoughts_fct)) +
181+
geom_bar(fill = "magenta") +
182+
labs(title = "Have you ever had suicidal thoughts?", x = "Suicidal Thoughts", y = "Count")
183+
184+
# Pour Degree
185+
ggplot(data, aes(x = degree_fct)) +
186+
geom_bar(fill = "yellow") +
187+
labs(title = "Distribution of Degrees", x = "Degree", y = "Count")
188+
189+
# Pour Depression Status
190+
ggplot(data, aes(x = depression_status_fct)) +
191+
geom_bar(fill = "lightblue") +
192+
labs(title = "Depression Status", x = "Depression", y = "Count")
193+
194+
195+
# Données continues : Histogrammes (geom_histogram())
196+
197+
library(ggplot2)
198+
199+
# Histogramme pour la colonne 'Age'
200+
ggplot(data, aes(x = Age)) +
201+
geom_histogram(bins = 30, fill = "cornflowerblue", color = "black") +
202+
labs(title = "Histogram of Age", x = "Age", y = "Frequency")
203+
204+
# Histogramme pour la colonne 'CGPA'
205+
ggplot(data, aes(x = CGPA)) +
206+
geom_histogram(bins = 30, fill = "steelblue", color = "black") +
207+
labs(title = "Histogram of CGPA", x = "CGPA", y = "Frequency")
208+
209+
# Histogramme pour la colonne 'Study Satisfaction'
210+
ggplot(data, aes(x = Study.Satisfaction)) +
211+
geom_histogram(bins = 30, fill = "lightblue", color = "black") +
212+
labs(title = "Histogram of Study Satisfaction", x = "Study Satisfaction", y = "Frequency")
213+
214+
# Histogramme pour la colonne 'Job Satisfaction'
215+
ggplot(data, aes(x = Job.Satisfaction)) +
216+
geom_histogram(bins = 30, fill = "gray", color = "black") +
217+
labs(title = "Histogram of Job Satisfaction", x = "Job Satisfaction", y = "Frequency")
218+
219+
# Histogramme pour la colonne 'Academic Pressure'
220+
ggplot(data, aes(x = Academic.Pressure)) +
221+
geom_histogram(bins = 30, fill = "darkblue", color = "black") +
222+
labs(title = "Histogram of Academic Pressure", x = "Academic Pressure", y = "Frequency")
223+
224+
# Histogramme pour la colonne 'Work Pressure'
225+
ggplot(data, aes(x = Work.Pressure)) +
226+
geom_histogram(bins = 30, fill = "purple", color = "black") +
227+
labs(title = "Histogram of Work Pressure", x = "Work Pressure", y = "Frequency")
228+
229+
# Histogramme pour la colonne 'Financial Stress'
230+
ggplot(data, aes(x = Financial.Stress)) +
231+
geom_histogram(bins = 30, fill = "red", color = "black") +
232+
labs(title = "Histogram of Financial Stress", x = "Financial Stress", y = "Frequency")
233+
234+
235+
236+
237+
#Relations entre variables : Nuages de points (geom_point())
238+
data_frame <- data.frame(Variable1 = data$Age, Variable2 = data$CGPA)
239+
ggplot(data_frame, aes(x = Variable1, y = Variable2)) +
240+
geom_point(color = "darkorange") +
241+
theme_minimal() +
242+
labs(title = " Age vs CGPA", x = "Variable 1", y = "Variable 2")
243+
244+
245+
data_frame <- data.frame(Variable3 = data$Study.Satisfaction, Variable4 = data$Job.Satisfaction)
246+
ggplot(data_frame, aes(x = Variable3, y = Variable4)) +
247+
geom_point(color = "darkorange") +
248+
theme_minimal() +
249+
labs(title = " Study Satisfaction vs Job Satisfaction", x = "Variable 3", y = "Variable 4")
250+
251+
252+
data_frame <- data.frame(Variable5 = data$Academic.Pressure, Variable6 = data$Work.Pressure)
253+
ggplot(data_frame, aes(x = Variable5, y = Variable6)) +
254+
geom_point(color = "darkorange") +
255+
theme_minimal() +
256+
labs(title = " Academic Pressure vs Work Pressure", x = "Variable 5", y = "Variable 6")
257+
258+
259+
data_frame <- data.frame(Variable5 = data$Academic.Pressure, Variable7 = data$Financial.Stress, Variable4=data$Job.Satisfaction)
260+
ggplot(data_frame, aes(x = Variable5, y = Variable7, color = factor(Study.Satisfaction), size =Variable4)) +
261+
geom_point(color = "darkorange") +
262+
theme_minimal() +
263+
scale_color_manual(values = c("blue", "green", "red", "purple")) + # Exemple avec des couleurs prédéfinies
264+
labs(title = "Age vs CGPA with Study Satisfaction and Job Satisfaction",
265+
x = "Variable 5",
266+
y = "Variable 7",
267+
color = "Study Satisfaction",
268+
size = "Job Satisfaction")
269+
270+
labs(title = " Academic Pressure vs Financial Stress", x = "Variable 5", y = "Variable 7")
271+
272+
273+
274+
#Détection des valeurs aberrantes : Boîtes à moustaches (geom_boxplot())
275+
data_frame <- data.frame(Variable1=data$Age, Variable2=data$CGPA, Variable3=data$Study.Satisfaction, Variable4=data$Job.Satisfaction, Variable5 = data$Academic.Pressure, Variable6 = data$Financial.Stress, Variable7=data$Work.Pressure)
276+
boxplot(data_frame, main="Données aléatoires | Graphiques en boîte comparatifs", ylab="Fréquence",
277+
outpch=18, outcex=1.7, outcol="red",
278+
col="lightblue")
279+
280+
281+
282+
283+
### ETAPE 4 : ANALYSE STATISTIQUE
284+
285+
## Tendances centrales
286+
287+
#Moyenne et Mediane
288+
mean_age <- mean(data$Age, na.rm = TRUE)
289+
median_age <- median(data$Age, na.rm = TRUE)
290+
291+
mean_CGPA <- mean(data$CGPA, na.rm = TRUE)
292+
median_CGPA <- median(data$CGPA, na.rm = TRUE)
293+
294+
mean_Study_satisfaction <- mean(data$Study.Satisfaction, na.rm = TRUE)
295+
median_Study_satisfaction <- median(data$Study.Satisfaction, na.rm = TRUE)
296+
297+
mean_Job_Satisfaction <- mean(data$Job.Satisfaction, na.rm = TRUE)
298+
median_Job_Satisfaction <- median(data$Job.Satisfaction, na.rm = TRUE)
299+
300+
mean_Academic_Pressure <- mean(data$Academic.Pressure, na.rm = TRUE)
301+
median_Academic_Pressure <- median(data$Academic.Pressure, na.rm = TRUE)
302+
303+
mean_Work_Pressure <- mean(data$Work.Pressure, na.rm = TRUE)
304+
median_Work_Pressure <- median(data$Work.Pressure, na.rm = TRUE)
305+
306+
mean_Financial_stress <- mean(data$Financial.Stress, na.rm = TRUE)
307+
median_Financial_stress <- median(data$Financial.Stress, na.rm = TRUE)
308+
309+
310+
#Variance et ecart type
311+
variance_values <- sapply(data[c("Age", "CGPA", "Study.Satisfaction", "Job.Satisfaction", "Academic.Pressure", "Work.Pressure", "Financial.Stress")], var, na.rm = TRUE)
312+
print(variance_values)
313+
314+
sd_values <- sapply(data[c("Age", "CGPA", "Study.Satisfaction", "Job.Satisfaction", "Academic.Pressure", "Work.Pressure", "Financial.Stress")], sd, na.rm = TRUE)
315+
print(sd_values)
316+
317+
318+
#Plage Interquartile
319+
iqr_values <- sapply(data[c("Age", "CGPA", "Study.Satisfaction","Job.Satisfaction","Academic.Pressure","Work.Pressure","Financial.Stress")], IQR, na.rm = TRUE)
320+
print(iqr_values)
321+
322+
323+
#Valeurs aberrantes
324+
variables <- c("Age", "CGPA", "Study.Satisfaction", "Job.Satisfaction", "Academic.Pressure", "Work.Pressure", "Financial.Stress")
325+
outliers <- sapply(data[variables], function(x) boxplot.stats(x)$out)
326+
print(outliers)
327+
328+
329+
330+
331+
## Analyse de la symétrie et de l’asymétrie
332+
333+
library(e1071)
334+
skewness_values <- sapply(data[c("Age", "CGPA", "Study.Satisfaction", "Job.Satisfaction", "Academic.Pressure", "Work.Pressure", "Financial.Stress")], skewness)
335+
print(skewness_values)
336+
337+
## *COMMENTAIRES SUR RESULTATS* Les variables Age, CGPA, Study.Satisfaction, et Academic.Pressure ont des valeurs de skewness très proches de zéro, ce qui indique que leurs distributions sont relativement symétriques.
338+
339+
340+
## Analyse des correlations
341+
correlation_matrix <- cor(data[c("Age", "CGPA", "Study.Satisfaction", "Job.Satisfaction", "Academic.Pressure", "Work.Pressure", "Financial.Stress")], use = "complete.obs")
342+
print(correlation_matrix)
343+
344+
345+
346+
## Visualisation avec nuage de points
347+

‎StudentDepressionDataset.csv

+27,902
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)
Please sign in to comment.