Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Zstandard streaming decompression #47

Merged
merged 12 commits into from
Aug 14, 2024
Merged
6 changes: 3 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
node-version: [16.x, 18.x, 20.x]
node-version: [18.x, 20.x, 22.x]
steps:
- uses: actions/checkout@v3
- uses: actions/setup-node@v3
- uses: actions/setup-node@v4
with:
node-version: ${{ matrix.node-version }}
- run: npm ci
Expand All @@ -29,7 +29,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/setup-node@v3
- uses: actions/setup-node@v4
with:
node-version: 20
- run: npm ci
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,6 @@ dist/

# ignore codec builds
codecs/**/build/
# pixi environments
.pixi
*.egg-info
4 changes: 3 additions & 1 deletion codecs/zstd/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,18 @@ cd ../../../
${OPTIMIZE} \
-I "$CODEC_DIR/lib" \
--closure 1 \
--bind \
-fwasm-exceptions \
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I see you did use wasm exceptions!

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this is listed as "legacy" now, but the alternative proposal isn't supported in any runtimme at all so I'm good with using for now: https://webassembly.org/features/

-s ALLOW_MEMORY_GROWTH=1 \
-s MODULARIZE=1 \
-s EXPORT_ES6=1 \
-s USE_ES6_IMPORT_META=0 \
-s ENVIRONMENT="webview" \
-s MALLOC=emmalloc \
-s EXPORT_NAME="zstd_codec" \
-s EXPORT_EXCEPTION_HANDLING_HELPERS=1 \
-x c++ \
--std=c++17 \
-lembind \
-lzstd \
-L "$BUILD_DIR/lib" \
-o "zstd_codec.js"
Expand Down
106 changes: 102 additions & 4 deletions codecs/zstd/zstd_codec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,114 @@ val compress(std::string source, int level)

val decompress(std::string source)
{
// number of bytes to grow the output buffer if more space is needed
const size_t DEST_GROWTH_SIZE = ZSTD_DStreamOutSize();

// setup source buffer
const char *source_ptr = source.c_str();
int source_size = source.size();

// create and initialize decompression stream / context
// use the streaming API so that we can handle unkown frame content size
ZSTD_DStream *zds = ZSTD_createDStream();

size_t status = ZSTD_initDStream(zds);
if (ZSTD_isError(status)) {
ZSTD_freeDStream(zds);
throw std::runtime_error("zstd codec error: " + std::string(ZSTD_getErrorName(status)));
}

ZSTD_inBuffer input = {
.src = (void*) source.c_str(),
.size = (size_t) source.size(),
.pos = 0
};
ZSTD_outBuffer output = {
.dst = NULL,
.size = 0,
.pos = 0,
};

// setup destination buffer
int dest_size = ZSTD_getFrameContentSize(source_ptr, source_size);
dest_ptr = (char *)malloc((size_t)dest_size);
unsigned long long dest_size = ZSTD_getFrameContentSize(source_ptr, source_size);

// If Zstd_compressStream was used, we may not know the frame content size.
// https://github.com/manzt/numcodecs.js/issues/46
if (dest_size == ZSTD_CONTENTSIZE_UNKNOWN) {
// guess decompressed buffer size based on source size
dest_size = source_size*2;

// Initialize the destination size to DEST_GROWTH_SIZE (default: 128 KiB) at minimum
if (dest_size < DEST_GROWTH_SIZE)
dest_size = DEST_GROWTH_SIZE;

} else if (dest_size == ZSTD_CONTENTSIZE_ERROR) {
ZSTD_freeDStream(zds);
throw std::runtime_error("zstd codec error: content size error");
} else if (dest_size < 0) {
// unknown error
ZSTD_freeDStream(zds);
throw std::runtime_error("zstd codec error: unknown ZSTD_getFrameContentSize error");
}

// the output buffer will either be assigned to dest_ptr to be freed by free_result, or freed on error
output.dst = malloc((size_t) dest_size);

if (output.dst == NULL) {
// error, cannot allocate memory
ZSTD_freeDStream(zds);
throw std::runtime_error("zstd codec error: cannot allocate output buffer");
}

output.size = dest_size;

// Call ZSTD_decompressStream repeatedly until status == 0 or error (status < 0)
do {
status = ZSTD_decompressStream(zds, &output, &input);

if (ZSTD_isError(status)) {
if (dest_ptr == output.dst)
dest_ptr = (char *) NULL;
ZSTD_freeDStream(zds);
free(output.dst);
throw std::runtime_error("zstd codec error: " + std::string(ZSTD_getErrorName(status)));
}

if (status > 0 && output.pos == output.size ) {
// attempt to expand output buffer in DEST_GROWTH_SIZE increments
size_t new_size = output.size + DEST_GROWTH_SIZE;

if (new_size < output.size || new_size < DEST_GROWTH_SIZE) {
// overflow error
ZSTD_freeDStream(zds);
free(output.dst);
throw std::runtime_error("zstd codec error: output buffer overflow");
}

// Increase output buffer size
void *new_dst = realloc(output.dst, new_size);

if (new_dst == NULL) {
// free the original pointer if realloc fails.
ZSTD_freeDStream(zds);
free(output.dst);
throw std::runtime_error("zstd codec error: could not expand output buffer");
}
// the old output.dst is freed by realloc is it succeeds
output.dst = new_dst;

output.size = new_size;
}

// status > 0 indicates there are additional bytes to process in this frame
// status == 0 and input.pos < input.size suggests there may be an additional frame
} while (status > 0 || input.pos < input.size);

ZSTD_freeDStream(zds);

dest_ptr = (char *) output.dst;

int decompressed_size = ZSTD_decompress(dest_ptr, dest_size, source_ptr, source_size);
return val(typed_memory_view(decompressed_size, (uint8_t *)dest_ptr));
return val(typed_memory_view(output.pos, (uint8_t *)dest_ptr));
}

void free_result()
Expand Down
1 change: 1 addition & 0 deletions codecs/zstd/zstd_codec.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ export interface ZstdModule extends EmscriptenModule {
compress(data: BufferSource, level: number): Uint8Array;
decompress(data: BufferSource): Uint8Array;
free_result(): void;
getExceptionMessage(err: WebAssembly.Exception): [string, string];
}

declare const moduleFactory: EmscriptenModuleFactory<ZstdModule>;
Expand Down
Loading
Loading