Skip to content

Commit ac1deb4

Browse files
committedMay 14, 2025
Placing Tensor_normalize_dataset and Tensor_shuffle_dataset into utils.c from basic.c
- Follows the principle of "separation of concerns" by keeping the core tensor operations separate from dataset preprocessing utilities. - https://en.wikipedia.org/wiki/Separation_of_concerns
1 parent cdfb434 commit ac1deb4

File tree

3 files changed

+59
-58
lines changed

3 files changed

+59
-58
lines changed
 

‎include/cten.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,6 @@ Tensor Tensor_transpose(Tensor self);
4444

4545
float Tensor_get(Tensor self, int i, int j, int k, int l);
4646
void Tensor_set(Tensor self, int i, int j, int k, int l, float value);
47-
void Tensor_normalize_dataset(const float (*X)[4], float (*X_norm)[4], int n_samples, int n_train_samples, int n_features);Tensor Tensor_detach(Tensor self);
48-
void Tensor_shuffle_dataset(const float (*X)[4], const int *y,float (*X_shuffled)[4], int *y_shuffled, int n_samples, int n_features);
4947
void Tensor_backward(Tensor self, Tensor grad);
5048
int Tensor_backward_apply(Tensor self, void (*f)(Tensor, void*), void* ctx);
5149

@@ -114,6 +112,9 @@ void cten_begin_eval();
114112
bool cten_is_eval();
115113
void cten_end_eval();
116114

115+
/* Utils */
116+
void Tensor_normalize_dataset(const float (*X)[4], float (*X_norm)[4], int n_samples, int n_train_samples, int n_features);Tensor Tensor_detach(Tensor self);
117+
void Tensor_shuffle_dataset(const float (*X)[4], const int *y,float (*X_shuffled)[4], int *y_shuffled, int n_samples, int n_features);
117118
void cten_assert(bool cond, const char* fmt, ...);
118119
void cten_assert_shape(const char* title, TensorShape a, TensorShape b);
119120
void cten_assert_dim(const char* title, int a, int b);

‎src/basic.c

Lines changed: 0 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -8,62 +8,6 @@
88
#include <math.h>
99
#include <time.h>
1010

11-
void Tensor_normalize_dataset(const float (*X)[4], float (*X_norm)[4], int n_samples, int n_train_samples, int n_features) {
12-
float mean[4] = {0}, std[4] = {0};
13-
14-
for (int i = 0; i < n_train_samples; i++) {
15-
for (int j = 0; j < n_features; j++) {
16-
mean[j] += X[i][j];
17-
}
18-
}
19-
for (int j = 0; j < n_features; j++) {
20-
mean[j] /= n_train_samples;
21-
}
22-
23-
for (int i = 0; i < n_train_samples; i++) {
24-
for (int j = 0; j < n_features; j++) {
25-
std[j] += (X[i][j] - mean[j]) * (X[i][j] - mean[j]);
26-
}
27-
}
28-
for (int j = 0; j < n_features; j++) {
29-
std[j] = sqrtf(std[j] / n_train_samples);
30-
// Avoid division by zero
31-
if (std[j] == 0) std[j] = 1.0f;
32-
}
33-
34-
for (int i = 0; i < n_samples; i++) {
35-
for (int j = 0; j < n_features; j++) {
36-
X_norm[i][j] = (X[i][j] - mean[j]) / std[j];
37-
}
38-
}
39-
}
40-
41-
void Tensor_shuffle_dataset(const float (*X)[4], const int *y,float (*X_shuffled)[4], int *y_shuffled, int n_samples, int n_features) {
42-
int* indices = malloc(n_samples * sizeof(int));
43-
for (int i = 0; i < n_samples; i++) {
44-
indices[i] = i;
45-
}
46-
47-
// Fisher-Yates shuffle
48-
srand((unsigned)time(NULL));
49-
for (int i = n_samples - 1; i > 0; i--) {
50-
int j = rand() % (i + 1);
51-
int tmp = indices[i];
52-
indices[i] = indices[j];
53-
indices[j] = tmp;
54-
}
55-
56-
for (int i = 0; i < n_samples; i++) {
57-
int idx = indices[i];
58-
for (int j = 0; j < n_features; j++) {
59-
X_shuffled[i][j] = X[idx][j];
60-
}
61-
y_shuffled[i] = y[idx];
62-
}
63-
64-
free(indices);
65-
}
66-
6711
int TensorShape_numel(TensorShape shape) {
6812
int numel = 1;
6913
for(int i = 0; i < sizeof(TensorShape) / sizeof(shape[0]); i++) {

‎src/utils.c

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,3 +99,59 @@ bool cten_elemwise_broadcast(Tensor* a, Tensor* b) {
9999
}
100100
return true;
101101
}
102+
103+
void Tensor_normalize_dataset(const float (*X)[4], float (*X_norm)[4], int n_samples, int n_train_samples, int n_features) {
104+
float mean[4] = {0}, std[4] = {0};
105+
106+
for (int i = 0; i < n_train_samples; i++) {
107+
for (int j = 0; j < n_features; j++) {
108+
mean[j] += X[i][j];
109+
}
110+
}
111+
for (int j = 0; j < n_features; j++) {
112+
mean[j] /= n_train_samples;
113+
}
114+
115+
for (int i = 0; i < n_train_samples; i++) {
116+
for (int j = 0; j < n_features; j++) {
117+
std[j] += (X[i][j] - mean[j]) * (X[i][j] - mean[j]);
118+
}
119+
}
120+
for (int j = 0; j < n_features; j++) {
121+
std[j] = sqrtf(std[j] / n_train_samples);
122+
// Avoid division by zero
123+
if (std[j] == 0) std[j] = 1.0f;
124+
}
125+
126+
for (int i = 0; i < n_samples; i++) {
127+
for (int j = 0; j < n_features; j++) {
128+
X_norm[i][j] = (X[i][j] - mean[j]) / std[j];
129+
}
130+
}
131+
}
132+
133+
void Tensor_shuffle_dataset(const float (*X)[4], const int *y,float (*X_shuffled)[4], int *y_shuffled, int n_samples, int n_features) {
134+
int* indices = malloc(n_samples * sizeof(int));
135+
for (int i = 0; i < n_samples; i++) {
136+
indices[i] = i;
137+
}
138+
139+
// Fisher-Yates shuffle
140+
srand((unsigned)time(NULL));
141+
for (int i = n_samples - 1; i > 0; i--) {
142+
int j = rand() % (i + 1);
143+
int tmp = indices[i];
144+
indices[i] = indices[j];
145+
indices[j] = tmp;
146+
}
147+
148+
for (int i = 0; i < n_samples; i++) {
149+
int idx = indices[i];
150+
for (int j = 0; j < n_features; j++) {
151+
X_shuffled[i][j] = X[idx][j];
152+
}
153+
y_shuffled[i] = y[idx];
154+
}
155+
156+
free(indices);
157+
}

0 commit comments

Comments
 (0)
Please sign in to comment.