diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..c44943df --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "test/readium-test-files"] + path = test/readium-test-files + url = git@github.com:readium/readium-test-files.git diff --git a/decoder/adobe_fonts.go b/decoder/adobe_fonts.go new file mode 100644 index 00000000..c07ac530 --- /dev/null +++ b/decoder/adobe_fonts.go @@ -0,0 +1,67 @@ +package decoder + +import ( + "bytes" + "errors" + "io" + "io/ioutil" + "strconv" + "strings" + "unicode" + + "github.com/feedbooks/r2-streamer-go/models" +) + +func init() { + decoderList = append(decoderList, List{decoderAlgorithm: "http://ns.adobe.com/pdf/enc#RC", decoder: DecodeAdobeFont}) +} + +// DecodeAdobeFont decode obfuscate fonts using idpf spec http://www.idpf.org/epub/20/spec/FontManglingSpec.html +func DecodeAdobeFont(publication models.Publication, link models.Link, reader io.ReadSeeker) (io.ReadSeeker, error) { + var count int + + key := getAdobeHashKey(publication) + if string(key) == "" { + return nil, errors.New("can't find hash key") + } + + buff, _ := ioutil.ReadAll(reader) + if len(buff) > 1024 { + count = 1024 + } else { + count = len(buff) + } + + j := 0 + for i := 0; i < count; i++ { + buff[i] = buff[i] ^ key[j] + + j++ + if j == 16 { + j = 0 + } + } + readerSeeker := bytes.NewReader(buff) + return readerSeeker, nil +} + +func getAdobeHashKey(publication models.Publication) []byte { + var stringKey []rune + var key []byte + + id := strings.Replace(publication.Metadata.Identifier, "urn:uuid:", "", -1) + id = strings.Replace(id, "-", "", -1) + for _, c := range id { + if !unicode.IsSpace(c) { + stringKey = append(stringKey, c) + } + } + + for i := 0; i < 16; i++ { + byteHex := stringKey[i*2 : i*2+2] + byteNumer, _ := strconv.ParseInt(string(byteHex), 16, 32) + key = append(key, byte(byteNumer)) + } + + return key +} diff --git a/decoder/decoder.go b/decoder/decoder.go new file mode 100644 index 00000000..57969f11 --- /dev/null +++ b/decoder/decoder.go @@ -0,0 +1,30 @@ +package decoder + +import ( + "errors" + "fmt" + "io" + + "github.com/feedbooks/r2-streamer-go/models" +) + +// List TODO add doc +type List struct { + decoderAlgorithm string + decoder (func(models.Publication, models.Link, io.ReadSeeker) (io.ReadSeeker, error)) +} + +var decoderList []List + +// Decode decode the ressource +func Decode(publication models.Publication, link models.Link, reader io.ReadSeeker) (io.ReadSeeker, error) { + + fmt.Println(link.CryptAlgorithm) + for _, decoderFunc := range decoderList { + if link.CryptAlgorithm == decoderFunc.decoderAlgorithm { + return decoderFunc.decoder(publication, link, reader) + } + } + + return nil, errors.New("can't find fetcher") +} diff --git a/decoder/fonts_test.go b/decoder/fonts_test.go new file mode 100644 index 00000000..c3f8b0f0 --- /dev/null +++ b/decoder/fonts_test.go @@ -0,0 +1,52 @@ +package decoder + +import ( + "bytes" + "io/ioutil" + "os" + "testing" + + "github.com/feedbooks/r2-streamer-go/models" + "github.com/feedbooks/r2-streamer-go/parser" + . "github.com/smartystreets/goconvey/convey" +) + +var testPublication models.Publication +var testFonts []byte + +func init() { + + testPublication, _ = parser.Parse("../test/readium-test-files/functional/smoke-tests/SmokeTestFXL") + ft, _ := os.Open("../test/readium-test-files/functional/smoke-tests/SmokeTestFXL/fonts/cut-cut.woff") + testFonts, _ = ioutil.ReadAll(ft) +} + +func TestAdobeFonts(t *testing.T) { + + f, _ := os.Open("../test/readium-test-files/functional/smoke-tests/SmokeTestFXL/fonts/cut-cut.adb.woff") + + Convey("Given cut-cut.adb.woff fonts", t, func() { + fd, _ := DecodeAdobeFont(testPublication, models.Link{}, f) + buff, _ := ioutil.ReadAll(fd) + Convey("The adobe fonts is deobfuscated", func() { + So(bytes.Equal(buff, testFonts), ShouldBeTrue) + }) + + }) + +} + +func TestIdpfFonts(t *testing.T) { + + f, _ := os.Open("../test/readium-test-files/functional/smoke-tests/SmokeTestFXL/fonts/cut-cut.obf.woff") + + Convey("Given cut-cut.obf.woff fonts", t, func() { + fd, _ := DecodeIdpfFont(testPublication, models.Link{}, f) + buff, _ := ioutil.ReadAll(fd) + Convey("The idpf fonts is deobfuscated", func() { + So(bytes.Equal(buff, testFonts), ShouldBeTrue) + }) + + }) + +} diff --git a/decoder/idpf_fonts.go b/decoder/idpf_fonts.go new file mode 100644 index 00000000..a2178e00 --- /dev/null +++ b/decoder/idpf_fonts.go @@ -0,0 +1,62 @@ +package decoder + +import ( + "bytes" + "crypto/sha1" + "errors" + "fmt" + "io" + "io/ioutil" + "unicode" + + "github.com/feedbooks/r2-streamer-go/models" +) + +func init() { + decoderList = append(decoderList, List{decoderAlgorithm: "http://www.idpf.org/2008/embedding", decoder: DecodeIdpfFont}) +} + +// DecodeIdpfFont decode obfuscate fonts using idpf spec http://www.idpf.org/epub/20/spec/FontManglingSpec.html +func DecodeIdpfFont(publication models.Publication, link models.Link, reader io.ReadSeeker) (io.ReadSeeker, error) { + var count int + + key := getHashKey(publication) + fmt.Println(key) + if string(key) == "" { + return nil, errors.New("can't find hash key") + } + + buff, _ := ioutil.ReadAll(reader) + if len(buff) > 1040 { + count = 1040 + } else { + count = len(buff) + } + + j := 0 + for i := 0; i < count; i++ { + buff[i] = buff[i] ^ key[j] + + j++ + if j == 20 { + j = 0 + } + } + readerSeeker := bytes.NewReader(buff) + return readerSeeker, nil +} + +func getHashKey(publication models.Publication) []byte { + var stringKey []rune + + for _, c := range publication.Metadata.Identifier { + if !unicode.IsSpace(c) { + stringKey = append(stringKey, c) + } + } + + h := sha1.New() + io.WriteString(h, string(stringKey)) + + return h.Sum(nil) +} diff --git a/fetcher/epub.go b/fetcher/epub.go index 0eff5731..e22841b2 100644 --- a/fetcher/epub.go +++ b/fetcher/epub.go @@ -6,6 +6,7 @@ import ( "io" "io/ioutil" + "github.com/feedbooks/r2-streamer-go/decoder" "github.com/feedbooks/r2-streamer-go/models" ) @@ -18,6 +19,7 @@ func FetchEpub(publication models.Publication, publicationResource string) (io.R var mediaType string var reader *zip.ReadCloser var assetFd io.ReadCloser + var link models.Link for _, data := range publication.Internal { if data.Name == "epub" { @@ -32,9 +34,18 @@ func FetchEpub(publication models.Publication, publicationResource string) (io.R } } + for _, linkRes := range publication.Resources { + if publicationResource == linkRes.Href { + link = linkRes + } + } buff, _ := ioutil.ReadAll(assetFd) assetFd.Close() readerSeeker := bytes.NewReader(buff) + readerSeekerDecode, err := decoder.Decode(publication, link, readerSeeker) + if err != nil { + return readerSeekerDecode, mediaType, nil + } return readerSeeker, mediaType, nil } diff --git a/fetcher/epub_dir.go b/fetcher/epub_dir.go index 530ce1e1..a6f62131 100644 --- a/fetcher/epub_dir.go +++ b/fetcher/epub_dir.go @@ -6,6 +6,7 @@ import ( "os" "path" + "github.com/feedbooks/r2-streamer-go/decoder" "github.com/feedbooks/r2-streamer-go/models" ) @@ -18,6 +19,7 @@ func FetchEpubDir(publication models.Publication, publicationResource string) (i var mediaType string var basePath string var rootFile string + var link models.Link for _, data := range publication.Internal { if data.Name == "basepath" { @@ -34,5 +36,15 @@ func FetchEpubDir(publication models.Publication, publicationResource string) (i fmt.Println(err) } + for _, linkRes := range publication.Resources { + if publicationResource == linkRes.Href { + link = linkRes + } + } + + readerSeekerDecode, err := decoder.Decode(publication, link, fd) + if err == nil { + return readerSeekerDecode, mediaType, nil + } return fd, mediaType, nil } diff --git a/models/publication.go b/models/publication.go index 3a526d13..d3e223f7 100644 --- a/models/publication.go +++ b/models/publication.go @@ -31,16 +31,17 @@ type Internal struct { // Link object used in collections and links type Link struct { - Href string `json:"href"` - TypeLink string `json:"type,omitempty"` - Rel []string `json:"rel,omitempty"` - Height int `json:"height,omitempty"` - Width int `json:"width,omitempty"` - Title string `json:"title,omitempty"` - Properties *Properties `json:"properties,omitempty"` - Duration string `json:"duration,omitempty"` - Templated bool `json:"templated,omitempty"` - Children []Link `json:"children,omitempty"` + Href string `json:"href"` + TypeLink string `json:"type,omitempty"` + Rel []string `json:"rel,omitempty"` + Height int `json:"height,omitempty"` + Width int `json:"width,omitempty"` + Title string `json:"title,omitempty"` + Properties *Properties `json:"properties,omitempty"` + Duration string `json:"duration,omitempty"` + Templated bool `json:"templated,omitempty"` + Children []Link `json:"children,omitempty"` + CryptAlgorithm string `json:"-"` } // PublicationCollection is used as an extension points for other collections in a Publication diff --git a/parser/epub.go b/parser/epub.go index 209e1c78..4e2bef68 100644 --- a/parser/epub.go +++ b/parser/epub.go @@ -2,6 +2,7 @@ package parser import ( "errors" + "path" "path/filepath" "strconv" "strings" @@ -34,16 +35,19 @@ func EpubParser(filePath string) (models.Publication, error) { fileExt := filepath.Ext(filePath) if fileExt == "" { book, err = epub.OpenDir(filePath) + if err != nil { + return models.Publication{}, errors.New("can't open or parse epub file with err : " + err.Error()) + } publication.Internal = append(publication.Internal, models.Internal{Name: "type", Value: "epub_dir"}) publication.Internal = append(publication.Internal, models.Internal{Name: "basepath", Value: filePath}) } else { book, err = epub.Open(filePath) + if err != nil { + return models.Publication{}, errors.New("can't open or parse epub file with err : " + err.Error()) + } publication.Internal = append(publication.Internal, models.Internal{Name: "type", Value: "epub"}) publication.Internal = append(publication.Internal, models.Internal{Name: "epub", Value: book.ZipReader()}) } - if err != nil { - return models.Publication{}, errors.New("can't open or parse epub file with err : " + err.Error()) - } if book.Container.Rootfile.Version != "" { epubVersion = book.Container.Rootfile.Version @@ -106,6 +110,8 @@ func EpubParser(filePath string) (models.Publication, error) { } fillCalibreSerieInfo(&publication, book) + fillEncryptionInfo(&publication, book) + return publication, nil } @@ -558,3 +564,33 @@ func fillCalibreSerieInfo(publication *models.Publication, book *epub.Book) { } } + +func fillEncryptionInfo(publication *models.Publication, book *epub.Book) { + + for _, encInfo := range book.Encryption.EncryptedData { + resURI := encInfo.CipherData.CipherReference.URI + for i, l := range publication.Resources { + if resURI == FilePath(*publication, l.Href) { + publication.Resources[i].CryptAlgorithm = encInfo.EncryptionMethod.Algorithm + } + } + for i, l := range publication.Spine { + if resURI == FilePath(*publication, l.Href) { + publication.Spine[i].CryptAlgorithm = encInfo.EncryptionMethod.Algorithm + } + } + } +} + +// FilePath return the complete path for the ressource +func FilePath(publication models.Publication, publicationResource string) string { + var rootFile string + + for _, data := range publication.Internal { + if data.Name == "rootfile" { + rootFile = data.Value.(string) + } + } + + return path.Join(path.Dir(rootFile), publicationResource) +} diff --git a/test/readium-test-files b/test/readium-test-files new file mode 160000 index 00000000..321967f5 --- /dev/null +++ b/test/readium-test-files @@ -0,0 +1 @@ +Subproject commit 321967f52980ed905eee7ace96d3f09ed4263728