Skip to content

Commit c132136

Browse files
committedFeb 26, 2025
Add WIP image analyzer
1 parent f9184ca commit c132136

File tree

4 files changed

+595
-10
lines changed

4 files changed

+595
-10
lines changed
 

‎cmd/analyzer/main.go

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
package main
2+
3+
import (
4+
"encoding/json"
5+
"os"
6+
7+
"github.com/readium/go-toolkit/pkg/analyzer"
8+
"github.com/readium/go-toolkit/pkg/manifest"
9+
)
10+
11+
func main() {
12+
if len(os.Args) < 2 {
13+
panic("usage: " + os.Args[0] + " <test dir image name>")
14+
}
15+
16+
r, err := os.OpenRoot("./test")
17+
if err != nil {
18+
panic(err)
19+
}
20+
defer r.Close()
21+
fs := r.FS()
22+
23+
link, _, err := analyzer.Image(fs, manifest.Link{
24+
Href: manifest.MustNewHREFFromString(os.Args[1], false),
25+
}, true)
26+
if err != nil {
27+
panic(err)
28+
}
29+
30+
bin, _ := json.Marshal(link)
31+
println(string(bin))
32+
}

‎go.mod

+10-7
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,19 @@
11
module github.com/readium/go-toolkit
22

3-
go 1.22.0
4-
5-
toolchain go1.23.5
3+
go 1.24.0
64

75
require (
86
github.com/CAFxX/httpcompression v0.0.9
97
github.com/agext/regexp v1.3.0
108
github.com/andybalholm/cascadia v1.3.3
9+
github.com/azr/phash v0.2.0
10+
github.com/bbrks/go-blurhash v1.1.1
1111
github.com/deckarep/golang-set v1.8.0
12+
github.com/disintegration/imaging v1.6.2
1213
github.com/go-viper/mapstructure/v2 v2.2.1
1314
github.com/gorilla/mux v1.8.1
1415
github.com/gotd/contrib v0.21.0
16+
github.com/kettek/apng v0.0.0-20220823221153-ff692776a607
1517
github.com/pdfcpu/pdfcpu v0.9.1
1618
github.com/pkg/errors v0.9.1
1719
github.com/readium/xmlquery v0.0.0-20230106230237-8f493145aef4
@@ -21,14 +23,17 @@ require (
2123
github.com/trimmer-io/go-xmp v1.0.0
2224
github.com/vmihailenco/go-tinylfu v0.2.2
2325
github.com/zeebo/xxh3 v1.0.2
26+
go4.org v0.0.0-20230225012048-214862532bf5
2427
golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c
28+
golang.org/x/image v0.23.0
2529
golang.org/x/net v0.34.0
26-
golang.org/x/text v0.21.0
30+
golang.org/x/text v0.22.0
2731
)
2832

2933
require (
3034
github.com/andybalholm/brotli v1.1.1 // indirect
3135
github.com/antchfx/xpath v1.3.3 // indirect
36+
github.com/azr/gift v1.1.2 // indirect
3237
github.com/cespare/xxhash/v2 v2.3.0 // indirect
3338
github.com/davecgh/go-spew v1.1.1 // indirect
3439
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect
@@ -39,9 +44,7 @@ require (
3944
github.com/klauspost/cpuid/v2 v2.2.9 // indirect
4045
github.com/pmezard/go-difflib v1.0.0 // indirect
4146
github.com/spf13/pflag v1.0.6 // indirect
42-
golang.org/x/image v0.23.0 // indirect
43-
golang.org/x/sys v0.29.0 // indirect
44-
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 // indirect
47+
golang.org/x/sys v0.30.0 // indirect
4548
gopkg.in/yaml.v2 v2.4.0 // indirect
4649
gopkg.in/yaml.v3 v3.0.1 // indirect
4750
)

‎go.sum

+223-3
Large diffs are not rendered by default.

‎pkg/analyzer/image.go

+330
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,330 @@
1+
package analyzer
2+
3+
import (
4+
"bytes"
5+
"crypto/md5"
6+
"crypto/sha256"
7+
"encoding/base64"
8+
"encoding/binary"
9+
"image"
10+
"image/gif"
11+
_ "image/png"
12+
"io"
13+
"io/fs"
14+
"time"
15+
16+
"github.com/azr/phash"
17+
"github.com/bbrks/go-blurhash"
18+
"github.com/disintegration/imaging"
19+
"github.com/kettek/apng"
20+
"github.com/pkg/errors"
21+
"github.com/readium/go-toolkit/pkg/manifest"
22+
"github.com/readium/go-toolkit/pkg/mediatype"
23+
"go4.org/media/heif"
24+
"golang.org/x/image/riff"
25+
"golang.org/x/image/webp"
26+
)
27+
28+
type ImageProperties struct {
29+
Size uint64
30+
ModTime time.Time
31+
Width uint32
32+
Height uint32
33+
Animated bool
34+
Hashes struct {
35+
Sha256 []byte
36+
Md5 []byte
37+
PhashDCT []byte
38+
BlurHash string
39+
}
40+
}
41+
42+
func (p *ImageProperties) EnhanceLink(link *manifest.Link) {
43+
link.Height = uint(p.Height)
44+
link.Width = uint(p.Width)
45+
link.Size = uint(p.Size)
46+
if link.Properties == nil {
47+
link.Properties = manifest.Properties{}
48+
}
49+
50+
// TODO: more sophisticated handling of pre-existing values, conversion to dedicated struct like encryption is
51+
hashes := []map[string]string{
52+
{
53+
"algorithm": "sha256",
54+
"value": base64.StdEncoding.EncodeToString(p.Hashes.Sha256),
55+
},
56+
{
57+
"algorithm": "md5",
58+
"value": base64.StdEncoding.EncodeToString(p.Hashes.Md5),
59+
},
60+
}
61+
if len(p.Hashes.PhashDCT) > 0 {
62+
hashes = append(hashes, map[string]string{
63+
"algorithm": "phash-dct",
64+
"value": base64.StdEncoding.EncodeToString(p.Hashes.PhashDCT),
65+
})
66+
}
67+
if len(p.Hashes.BlurHash) > 0 {
68+
hashes = append(hashes, map[string]string{
69+
"algorithm": "https://blurha.sh",
70+
"value": p.Hashes.BlurHash,
71+
})
72+
}
73+
link.Properties["hash"] = hashes
74+
link.Properties["animated"] = p.Animated
75+
}
76+
77+
func Image(system fs.FS, link manifest.Link, visualHash bool) (*manifest.Link, *ImageProperties, error) {
78+
path := link.Href.String()
79+
file, err := system.Open(path)
80+
if err != nil {
81+
return nil, nil, err
82+
}
83+
defer file.Close()
84+
85+
reopen := func() error {
86+
if of, ok := file.(io.ReadSeeker); ok {
87+
of.Seek(0, 0)
88+
} else {
89+
file.Close()
90+
file, err = system.Open(path)
91+
if err != nil {
92+
return err
93+
}
94+
}
95+
return nil
96+
}
97+
98+
stat, err := file.Stat()
99+
if err != nil {
100+
return nil, nil, err
101+
}
102+
if stat.IsDir() {
103+
return nil, nil, errors.New("must be a file, not a directory")
104+
}
105+
106+
p := &ImageProperties{
107+
Size: uint64(stat.Size()),
108+
ModTime: stat.ModTime(),
109+
}
110+
if p.Size == 0 {
111+
return nil, nil, errors.New("file is empty")
112+
}
113+
114+
var mt *mediatype.MediaType
115+
if link.MediaType != nil {
116+
mt = link.MediaType
117+
} else {
118+
mt = mediatype.OfFileOnly(file)
119+
if mt == nil {
120+
return nil, nil, errors.New("file has unknown media type")
121+
}
122+
}
123+
if !mt.IsBitmap() {
124+
return nil, nil, errors.New("file is not a bitmap image")
125+
}
126+
// Reopen because the sniffer may have read the file
127+
err = reopen()
128+
if err != nil {
129+
return nil, nil, errors.Wrap(err, "failed reopening file")
130+
}
131+
132+
// Gather image width/height, and weed out unsuppored formats
133+
var iconfig image.Config
134+
if mt.Equal(&mediatype.AVIF) {
135+
var hf *heif.File
136+
if of, ok := file.(io.ReaderAt); ok {
137+
hf = heif.Open(of)
138+
} else {
139+
// Fall back to reading the file into memory
140+
buf, err := fs.ReadFile(system, path)
141+
if err != nil {
142+
return nil, nil, errors.Wrap(err, "failed reading AVIF file into memory")
143+
}
144+
hf = heif.Open(bytes.NewReader(buf))
145+
}
146+
pi, err := hf.PrimaryItem()
147+
if err != nil {
148+
return nil, nil, errors.Wrap(err, "failed decoding supposed AVIF file metadata")
149+
}
150+
w, h, ok := pi.VisualDimensions()
151+
if !ok {
152+
return nil, nil, errors.New("failed reading AVIF image dimensions")
153+
}
154+
iconfig.Width = w
155+
iconfig.Height = h
156+
} else if mt.Equal(&mediatype.JXL) {
157+
magicBytes := make([]byte, 12)
158+
_, err = io.ReadFull(file, magicBytes)
159+
if err != nil {
160+
return nil, nil, errors.Wrap(err, "failed reading JXL file for magic numbers")
161+
}
162+
jxlCodestream := []byte{0xFF, 0x0A}
163+
jxlBmff := []byte{0x00, 0x00, 0x00, 0x0C, 0x4A, 0x58, 0x4C, 0x20, 0x0D, 0x0A, 0x87, 0x0A}
164+
if !bytes.Equal(magicBytes[:2], jxlCodestream) && !bytes.Equal(magicBytes, jxlBmff) {
165+
return nil, nil, errors.New("supposed JXL file is invalid")
166+
}
167+
return nil, nil, errors.New("JXL file format is currently unsupported")
168+
} else {
169+
var format string
170+
iconfig, format, err = image.DecodeConfig(file)
171+
if err != nil {
172+
return nil, nil, errors.Wrap(err, "failed decoding image metadata")
173+
}
174+
175+
// Special case for animated PNG which gets registered by the apng package
176+
if format == "apng" {
177+
if !mt.Equal(&mediatype.PNG) {
178+
return nil, nil, errors.New("file mediatype not equal to decoded image format")
179+
}
180+
} else {
181+
imt := mediatype.OfExtension(format)
182+
if imt == nil {
183+
return nil, nil, errors.New("failed determining mediatype from image format \"" + format + "\"")
184+
}
185+
if !mt.Equal(imt) {
186+
return nil, nil, errors.New("file mediatype not equal to decoded image format")
187+
}
188+
}
189+
}
190+
p.Width = uint32(iconfig.Width)
191+
p.Height = uint32(iconfig.Height)
192+
if p.Width == 0 || p.Height == 0 {
193+
return nil, nil, errors.New("image has zero width or height")
194+
}
195+
196+
// Decoder the image so the animation can be checked, and the perceptual hash calculated
197+
err = reopen()
198+
if err != nil {
199+
return nil, nil, errors.Wrap(err, "failed reopening file")
200+
}
201+
hashVisually := func(img image.Image) {
202+
if !visualHash {
203+
return
204+
}
205+
// First downsize the image because:
206+
// - Phash/DCT already does this, down to 32x32px
207+
// - Blurhash encoding with a large image is very slow
208+
if img.Bounds().Dx() > 128 {
209+
img = imaging.Resize(img, 128, 0, imaging.Lanczos)
210+
}
211+
212+
// Create phash and put it in a byte array
213+
p.Hashes.PhashDCT = make([]byte, 8)
214+
binary.BigEndian.PutUint64(p.Hashes.PhashDCT, phash.DTC(img))
215+
216+
// Create the blurhash
217+
blurhash, _ := blurhash.Encode(5, 5, img)
218+
p.Hashes.BlurHash = blurhash
219+
}
220+
if mt.Equal(&mediatype.GIF) {
221+
gi, err := gif.DecodeAll(file)
222+
if err != nil {
223+
return nil, nil, errors.Wrap(err, "failed decoding GIF file")
224+
}
225+
if len(gi.Image) > 1 {
226+
p.Animated = true
227+
}
228+
hashVisually(gi.Image[0])
229+
} else if mt.Equal(&mediatype.PNG) {
230+
pi, err := apng.DecodeAll(file)
231+
if err != nil {
232+
return nil, nil, errors.Wrap(err, "failed decoding (A)PNG file")
233+
}
234+
if len(pi.Frames) > 1 {
235+
p.Animated = true
236+
}
237+
hashVisually(pi.Frames[0].Image)
238+
} else if mt.Equal(&mediatype.AVIF) {
239+
// Not sure how to determine if an AVIF is animated!
240+
if visualHash {
241+
return nil, nil, errors.New("AVIF perceptual hash is not yet supported")
242+
}
243+
} else if mt.Equal(&mediatype.WEBP) {
244+
var wi image.Image
245+
if _, ok := file.(io.ReadSeeker); ok {
246+
p.Animated, err = isWEBPAnimated(file)
247+
if err != nil {
248+
return nil, nil, errors.Wrap(err, "failed checking if WEBP file is animated")
249+
}
250+
if visualHash {
251+
if p.Animated {
252+
return nil, nil, errors.New("perceptual hash of animated WEBP is not yet supported")
253+
}
254+
err = reopen()
255+
if err != nil {
256+
return nil, nil, errors.Wrap(err, "failed reopening file")
257+
}
258+
wi, err = webp.Decode(file)
259+
}
260+
} else {
261+
// Only read the file once into memory since we need to read it two times in a row
262+
buf := make([]byte, p.Size)
263+
_, err = io.ReadFull(file, buf)
264+
if err != nil {
265+
return nil, nil, errors.Wrap(err, "failed reading WEBP file into memory")
266+
}
267+
r := bytes.NewReader(buf)
268+
p.Animated, err = isWEBPAnimated(r)
269+
if err != nil {
270+
return nil, nil, errors.Wrap(err, "failed checking if WEBP file is animated")
271+
}
272+
if visualHash {
273+
if p.Animated {
274+
return nil, nil, errors.New("perceptual hash of animated WEBP is not yet supported")
275+
}
276+
r.Seek(0, 0)
277+
wi, err = webp.Decode(r)
278+
}
279+
}
280+
if err != nil {
281+
return nil, nil, errors.Wrap(err, "failed decoding WEBP file")
282+
}
283+
if visualHash {
284+
hashVisually(wi)
285+
}
286+
} else if visualHash {
287+
// Any other format can be generically decoded since it doesn't support animation
288+
img, _, err := image.Decode(file)
289+
if err != nil {
290+
return nil, nil, errors.Wrap(err, "failed decoding image file")
291+
}
292+
hashVisually(img)
293+
}
294+
295+
// Now compute the cryptographic hashes
296+
err = reopen()
297+
if err != nil {
298+
return nil, nil, errors.Wrap(err, "failed reopening file")
299+
}
300+
s2hash := sha256.New()
301+
mdhash := md5.New()
302+
mw := io.MultiWriter(s2hash, mdhash)
303+
if _, err := io.Copy(mw, file); err != nil {
304+
panic(err)
305+
}
306+
p.Hashes.Sha256 = s2hash.Sum(nil)
307+
p.Hashes.Md5 = mdhash.Sum(nil)
308+
309+
p.EnhanceLink(&link)
310+
return &link, p, nil
311+
}
312+
313+
func isWEBPAnimated(file io.Reader) (bool, error) {
314+
_, data, err := riff.NewReader(file)
315+
if err != nil {
316+
return false, errors.Wrap(err, "failed reading RIFF data from WEBP file")
317+
}
318+
id, _, _, err := data.Next()
319+
var frames uint32
320+
for err == nil {
321+
if id == riff.FourCC([4]byte{'A', 'N', 'M', 'F'}) {
322+
frames++
323+
}
324+
id, _, _, err = data.Next()
325+
}
326+
if err != io.EOF {
327+
return false, errors.Wrap(err, "failed reading RIFF chunks from WEBP file")
328+
}
329+
return frames > 1, nil
330+
}

0 commit comments

Comments
 (0)
Please sign in to comment.