Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compressed resource optimization #93

Merged
merged 5 commits into from
Jul 3, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 0 additions & 13 deletions .eslintrc.js

This file was deleted.

15 changes: 13 additions & 2 deletions cmd/rwp/cmd/serve/api.go
Original file line number Diff line number Diff line change
@@ -17,7 +17,9 @@ import (
httprange "github.com/gotd/contrib/http_range"
"github.com/pkg/errors"
"github.com/readium/go-toolkit/cmd/rwp/cmd/serve/cache"
"github.com/readium/go-toolkit/pkg/archive"
"github.com/readium/go-toolkit/pkg/asset"
"github.com/readium/go-toolkit/pkg/fetcher"
"github.com/readium/go-toolkit/pkg/manifest"
"github.com/readium/go-toolkit/pkg/pub"
"github.com/readium/go-toolkit/pkg/streamer"
@@ -243,8 +245,17 @@ func (s *Server) getAsset(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusPartialContent)
}

// Stream the asset
_, rerr = res.Stream(w, start, end)
cres, ok := res.(fetcher.CompressedResource)
if ok && cres.CompressedAs(archive.CompressionMethodDeflate) && start == 0 && end == 0 && supportsDeflate(r) {
// Stream the asset in compressed format
w.Header().Set("content-encoding", "deflate")
w.Header().Set("content-length", strconv.FormatInt(cres.CompressedLength(), 10))
_, err = cres.StreamCompressed(w)
} else {
// Stream the asset
_, rerr = res.Stream(w, start, end)
}

if rerr != nil {
if errors.Is(err, syscall.EPIPE) || errors.Is(err, syscall.ECONNRESET) {
// Ignore client errors
26 changes: 26 additions & 0 deletions cmd/rwp/cmd/serve/helpers.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package serve

import (
"net/http"
"strings"

"github.com/readium/go-toolkit/pkg/manifest"
@@ -62,3 +63,28 @@ func conformsToAsMimetype(conformsTo manifest.Profiles) string {
}
return mime
}

func supportsDeflate(r *http.Request) bool {
vv := r.Header.Values("Accept-Encoding")
for _, v := range vv {
for _, sv := range strings.Split(v, ",") {
coding := parseCoding(sv)
if coding == "" {
continue
}
if coding == "deflate" {
return true
}
}
}
return false
}

func parseCoding(s string) (coding string) {
p := strings.IndexRune(s, ';')
if p == -1 {
p = len(s)
}
coding = strings.ToLower(strings.TrimSpace(s[:p]))
return
}
3 changes: 2 additions & 1 deletion pkg/archive/archive.go
Original file line number Diff line number Diff line change
@@ -55,9 +55,10 @@ type Entry interface {
Path() string // Absolute path to the entry in the archive.
Length() uint64 // Uncompressed data length.
CompressedLength() uint64 // Compressed data length.
CompressedAs(compressionMethod CompressionMethod) bool // Whether the entry is compressed using the given method.
Read(start int64, end int64) ([]byte, error) // Reads the whole content of this entry, or a portion when [start] or [end] are specified.
Stream(w io.Writer, start int64, end int64) (int64, error) // Streams the whole content of this entry to a writer, or a portion when [start] or [end] are specified.
// Close()
StreamCompressed(w io.Writer) (int64, error) // Streams the compressed content of this entry to a writer.
}

// Represents an immutable archive.
8 changes: 8 additions & 0 deletions pkg/archive/archive_exploded.go
Original file line number Diff line number Diff line change
@@ -26,6 +26,10 @@ func (e explodedArchiveEntry) CompressedLength() uint64 {
return 0
}

func (e explodedArchiveEntry) CompressedAs(compressionMethod CompressionMethod) bool {
return false
}

func (e explodedArchiveEntry) Read(start int64, end int64) ([]byte, error) {
if end < start {
return nil, errors.New("range not satisfiable")
@@ -82,6 +86,10 @@ func (e explodedArchiveEntry) Stream(w io.Writer, start int64, end int64) (int64
return n, nil
}

func (e explodedArchiveEntry) StreamCompressed(w io.Writer) (int64, error) {
return -1, errors.New("entry is not compressed")
}

// An archive exploded on the file system as a directory.
type explodedArchive struct {
directory string // Directory, already cleaned!
19 changes: 19 additions & 0 deletions pkg/archive/archive_zip.go
Original file line number Diff line number Diff line change
@@ -31,6 +31,13 @@ func (e gozipArchiveEntry) CompressedLength() uint64 {
return e.file.CompressedSize64
}

func (e gozipArchiveEntry) CompressedAs(compressionMethod CompressionMethod) bool {
if compressionMethod != CompressionMethodDeflate {
return false
}
return e.file.Method == zip.Deflate
}

// This is a special mode to minimize the number of reads from the underlying reader.
// It's especially useful when trying to stream the ZIP from a remote file, e.g.
// cloud storage. It's only enabled when trying to read the entire file and compression
@@ -145,6 +152,18 @@ func (e gozipArchiveEntry) Stream(w io.Writer, start int64, end int64) (int64, e
return n, nil
}

func (e gozipArchiveEntry) StreamCompressed(w io.Writer) (int64, error) {
if e.file.Method != zip.Deflate {
return -1, errors.New("not a compressed resource")
}
f, err := e.file.OpenRaw()
if err != nil {
return -1, err
}

return io.Copy(w, f)
}

// An archive from a zip file using go's stdlib
type gozipArchive struct {
zip *zip.Reader
10 changes: 10 additions & 0 deletions pkg/archive/compression.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package archive

import "archive/zip"

type CompressionMethod uint16

const (
CompressionMethodStore CompressionMethod = CompressionMethod(zip.Store)
CompressionMethodDeflate CompressionMethod = CompressionMethod(zip.Deflate)
)
19 changes: 19 additions & 0 deletions pkg/fetcher/fetcher_archive.go
Original file line number Diff line number Diff line change
@@ -152,6 +152,25 @@ func (r *entryResource) Stream(w io.Writer, start int64, end int64) (int64, *Res
return -1, Other(err)
}

// CompressedAs implements CompressedResource
func (r *entryResource) CompressedAs(compressionMethod archive.CompressionMethod) bool {
return r.entry.CompressedAs(compressionMethod)
}

// CompressedLength implements CompressedResource
func (r *entryResource) CompressedLength() int64 {
return int64(r.entry.CompressedLength())
}

// StreamCompressed implements CompressedResource
func (r *entryResource) StreamCompressed(w io.Writer) (int64, *ResourceError) {
i, err := r.entry.StreamCompressed(w)
if err == nil {
return i, nil
}
return -1, Other(err)
}

// Length implements Resource
func (r *entryResource) Length() (int64, *ResourceError) {
return int64(r.entry.Length()), nil
29 changes: 29 additions & 0 deletions pkg/fetcher/resource.go
Original file line number Diff line number Diff line change
@@ -3,12 +3,14 @@ package fetcher
import (
"encoding/json"
"encoding/xml"
"errors"
"fmt"
"io"
"net/http"
"os"
"strings"

"github.com/readium/go-toolkit/pkg/archive"
"github.com/readium/go-toolkit/pkg/manifest"
"github.com/readium/xmlquery"
"golang.org/x/text/encoding/unicode"
@@ -365,6 +367,33 @@ func (r ProxyResource) ReadAsXML(prefixes map[string]string) (*xmlquery.Node, *R
return r.Res.ReadAsXML(prefixes)
}

// CompressedAs implements CompressedResource
func (r ProxyResource) CompressedAs(compressionMethod archive.CompressionMethod) bool {
cres, ok := r.Res.(CompressedResource)
if !ok {
return false
}
return cres.CompressedAs(compressionMethod)
}

// CompressedLength implements CompressedResource
func (r ProxyResource) CompressedLength() int64 {
cres, ok := r.Res.(CompressedResource)
if !ok {
return -1
}
return cres.CompressedLength()
}

// StreamCompressed implements CompressedResource
func (r ProxyResource) StreamCompressed(w io.Writer) (int64, *ResourceError) {
cres, ok := r.Res.(CompressedResource)
if !ok {
return -1, Other(errors.New("resource is not compressed"))
}
return cres.StreamCompressed(w)
}

/**
* Transforms the bytes of [resource] on-the-fly.
*
13 changes: 13 additions & 0 deletions pkg/fetcher/traits.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package fetcher

import (
"io"

"github.com/readium/go-toolkit/pkg/archive"
)

type CompressedResource interface {
CompressedAs(compressionMethod archive.CompressionMethod) bool
CompressedLength() int64
StreamCompressed(w io.Writer) (int64, *ResourceError)
}
53 changes: 43 additions & 10 deletions pkg/parser/epub/deobfuscator.go
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@ import (
"strings"

"github.com/pkg/errors"
"github.com/readium/go-toolkit/pkg/archive"
"github.com/readium/go-toolkit/pkg/fetcher"
)

@@ -32,15 +33,23 @@ type DeobfuscatingResource struct {
identifier string
}

func (d DeobfuscatingResource) Read(start, end int64) ([]byte, *fetcher.ResourceError) {
func (d DeobfuscatingResource) obfuscation() (string, int64) {
algorithm := ""
penc := d.Res.Link().Properties.Encryption()
if penc != nil {
algorithm = penc.Algorithm
}

v, ok := algorithm2length[algorithm]
if ok {
if !ok {
return algorithm, 0
}
return algorithm, v
}

func (d DeobfuscatingResource) Read(start, end int64) ([]byte, *fetcher.ResourceError) {
algorithm, v := d.obfuscation()
if v > 0 {
data, err := d.ProxyResource.Read(start, end)
if err != nil {
return nil, err
@@ -62,14 +71,8 @@ func (d DeobfuscatingResource) Read(start, end int64) ([]byte, *fetcher.Resource
}

func (d DeobfuscatingResource) Stream(w io.Writer, start int64, end int64) (int64, *fetcher.ResourceError) {
algorithm := ""
penc := d.Res.Link().Properties.Encryption()
if penc != nil {
algorithm = penc.Algorithm
}

v, ok := algorithm2length[algorithm]
if ok {
algorithm, v := d.obfuscation()
if v > 0 {
if start >= v {
// We're past the obfuscated part, just proxy it
return d.ProxyResource.Stream(w, start, end)
@@ -141,6 +144,36 @@ func (d DeobfuscatingResource) Stream(w io.Writer, start int64, end int64) (int6
return d.ProxyResource.Stream(w, start, end)
}

// CompressedAs implements CompressedResource
func (d DeobfuscatingResource) CompressedAs(compressionMethod archive.CompressionMethod) bool {
_, v := d.obfuscation()
if v > 0 {
return false
}

return d.ProxyResource.CompressedAs(compressionMethod)
}

// CompressedLength implements CompressedResource
func (d DeobfuscatingResource) CompressedLength() int64 {
_, v := d.obfuscation()
if v > 0 {
return -1
}

return d.ProxyResource.CompressedLength()
}

// StreamCompressed implements CompressedResource
func (d DeobfuscatingResource) StreamCompressed(w io.Writer) (int64, *fetcher.ResourceError) {
_, v := d.obfuscation()
if v > 0 {
return 0, fetcher.Other(errors.New("cannot stream compressed resource when obfuscated"))
}

return d.ProxyResource.StreamCompressed(w)
}

func (d DeobfuscatingResource) getHashKeyAdobe() []byte {
hexbytes, _ := hex.DecodeString(
strings.Replace(
Loading