diff --git a/AtomDB/.idea/compiler.xml b/AtomDB/.idea/compiler.xml
index d49be18..71ba321 100644
--- a/AtomDB/.idea/compiler.xml
+++ b/AtomDB/.idea/compiler.xml
@@ -6,8 +6,104 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/AtomDB/.idea/misc.xml b/AtomDB/.idea/misc.xml
index 2289c27..d7c3eea 100644
--- a/AtomDB/.idea/misc.xml
+++ b/AtomDB/.idea/misc.xml
@@ -8,7 +8,7 @@
-
+
diff --git a/AtomDB/.mvn/maven.config b/AtomDB/.mvn/maven.config
new file mode 100644
index 0000000..e69de29
diff --git a/AtomDB/Docs/.Todo.md.un~ b/AtomDB/Docs/.Todo.md.un~
new file mode 100644
index 0000000..67f3e75
Binary files /dev/null and b/AtomDB/Docs/.Todo.md.un~ differ
diff --git a/AtomDB/Docs/Compaction.md b/AtomDB/Docs/Compaction.md
new file mode 100644
index 0000000..e8b1a0a
--- /dev/null
+++ b/AtomDB/Docs/Compaction.md
@@ -0,0 +1,53 @@
+The idea around compaction.
+
+* It about creating overlapping free and compact org.g2n.atomdb.sst.
+ * Compact SST meaning is that we should have org.g2n.atomdb.sst which holds all the keys which lies in the min and max key.
+ * By making it compact we gonna reduce the overlap. so if there is a org.g2n.atomdb.sst having keys 10-50 range and has all the elements in it. the for searching we only gonna access this org.g2n.atomdb.sst.
+ * if there is overlapping then we will have ssts which are like 10-70 and 30-70. here for org.g2n.atomdb.search we will need to access 2 ssts.
+
+
+
+Plan:
+table, org.g2n.atomdb.search engine, and benchmark.
+benchmark should yield super fast results.
+then we optimize and clean our existing code.
+then we work compaction by planning on smallest how things will move and then implementations.
+then optimize and make the code clean.
+benchmark and optimize.
+write improved unit test, integration test, crash test, performance test under different loads.
+
+
+
+LevelDB compaction:
+1. 4mb file is level0
+2. pick one file from level L and all overlapping files from level L+1.
+3. While org.g2n.atomdb.Level 0 -> 1, we take all the overlapping files from 0 and 1 as well. since this is very special level.
+4. New create a new org.g2n.atomdb.sst for every 2 mb file.
+5. we also switch to a new org.g2n.atomdb.sst when we have grown enough to cover 10 level+2 files. (so that we wont pickup more files from l+2 for next compaction)
+6. we remember greatest key of level l so that next time we pick files from greatest key.
+
+
+TODO:
+1. Stable compaction
+2. Value updating and delete.
+3. Scheduling compaction in background thread.
+4. org.g2n.atomdb.Table recreation and manifest file.
+5. Search improvement (Cache of blocks)
+6. Improve overall code and clean up.
+7. Unit test & integration test
+8. org.g2n.atomdb.Benchmark
+9. Maven deploy
+10. Great readme, explaining how to install, use, benchmarks, limitations, ideas, motivation and future work, Pictorial representation of architecture and org.g2n.atomdb.sst.
+11. Handling architecture shortcoming, for example the checksum check.
+
+For updation, we can make the iteration in sorted order of latest -> old.
+and in sstPersist we can use a set, with this we will only have a unique key and old values will be discarded.
+
+https://github.com/facebook/rocksdb/wiki/Leveled-Compaction
+https://blog.senx.io/demystifying-leveldb/
+https://stackoverflow.com/questions/61684116/compaction-causes-write-to-hang-until-finished
+https://tonyz93.blogspot.com/2016/11/leveldb-source-reading-3-compaction.html
+https://www.speedb.io/blog-posts/understanding-leveled-compaction
+https://github.com/google/leveldb/blob/main/db/version_set.cc
+https://www.google.com/search?q=what+is+a+weak+key+map+guava+java&sca_esv=be2d3384baa617c2&sca_upv=1&rlz=1C1CHBF_enIN1024IN1024&biw=1536&bih=695&sxsrf=ACQVn0_daYOv836fgUD-zntx6kJ9qE1WNg%3A1712689998040&ei=TpMVZtiVAsanseMPvvmVkA8&ved=0ahUKEwjY9MW367WFAxXGU2wGHb58BfIQ4dUDCBA&uact=5&oq=what+is+a+weak+key+map+guava+java&gs_lp=Egxnd3Mtd2l6LXNlcnAaAhgCIiF3aGF0IGlzIGEgd2VhayBrZXkgbWFwIGd1YXZhIGphdmEyBxAhGAoYoAEyBxAhGAoYoAEyBxAhGAoYoAEyBxAhGAoYoAFI9RxQhQRYoRtwBHgBkAEAmAH6AaABjxOqAQUwLjUuN7gBA8gBAPgBAZgCEKACqxPCAgoQABhHGNYEGLADwgIEECMYJ8ICChAhGAoYoAEYiwPCAgQQIRgVmAMAiAYBkAYIkgcFNC41LjegB-cu&sclient=gws-wiz-serp
+https://stackoverflow.com/questions/48139062/behaviour-of-caffeine-cache-asmap-views
diff --git a/AtomDB/Docs/Overview.md b/AtomDB/Docs/Overview.md
new file mode 100644
index 0000000..64484e2
--- /dev/null
+++ b/AtomDB/Docs/Overview.md
@@ -0,0 +1,192 @@
+AtomDB Release 1.0
+
+# Objective
+1. Redesign WAL, for faster read and write, if required.
+2. Memtable, storing of one memtable extra, stating that latest written is query for frequently.
+3. 1 disk seek per request.
+ 1. With 10% rule we can get done with one disk seek and load of reads, PER SST.
+ 2. To improve on for 1 disk seek per request.
+ 1. Bloom
+ 1. Global Bloom filter to discard any non-existent key.
+ 2. org.g2n.atomdb.Level wise Bloom filter.
+ 3. File wise bloom filter. (here we are good with binary org.g2n.atomdb.search on 10% keys).
+ 2. Upper and lower bound check (SK and LK)
+ 3. Then binary org.g2n.atomdb.search from the latest file to the older files
+ 4. Can same the crc32c of keys in memory (Future)
+4. Cache all the open file descriptors.
+5. Minimize the number of files being accessed, need a better predictor and cache.
+6. improvement of Read, write
+7. improvement of compaction
+8. Making two org.g2n.atomdb.sst in memory to write as one.
+9. Let's implement the WAL, MEM, SST, TABLE, COMPACTION, CACHE correctly and then add on with multithreading, lock free, virtual thread, SIMD, etc.
+10. can use treeset to find the overlapping intervals. and use posix_fadvise for compaction.
+11. can use posix_fadvise for writing org.g2n.atomdb.sst.
+12. can use posix_fadvise for sequential reads.
+13. for random reads use mmap.
+14. Implement Manifest file https://github.com/google/leveldb/blob/main/doc/impl.md#manifest
+15. We will be having 3 caches.
+ 1. smallest the primary cache which will cache k-v directly.
+ 2. File blocks
+ 3. File descriptor, which will be evicted based on the avaliable descriptors.
+16. Reading of org.g2n.atomdb.sst metadata when org.g2n.atomdb.db restarts. Thinking needed.
+17. Batch writing and reading.
+18. MMap the whole file for compaction.
+19. we mandatorily not store the 10% keys in memory, we can flush based on memory available and reread when required. but this will have some performance impact.
+20. finalise how big can be the key and value.
+21. idea is to make files obsolete and then delete them periodically.
+22. benchmark between partial mapping and full mapping of file.
+23. https://github.com/google/jimfs can be used for testing, its a in-memory files.
+24. Performances Tuning can be done with
+ 1. using of better hash class for table in the org.g2n.atomdb.sst chunks
+ 2. use of full mmap of reading files.
+25. remove of contributors list.
+26. wherever possible convert to the latest java features. makes class and record and using sealed.
+27. can we utilize the async file io to read the block and keep while searching. so while searching we might encounter 2-3 files to org.g2n.atomdb.search, here we can load in a async was. and the reading wont go waste as they will be cache and will be used by someone else.
+28. before release do benchmark based on the different writers and readers, see if the bufferedWriter is of any help when we have mmapped the file. since mmap does the same thing.
+29. can we use cleaner and cleanable apis for unmapping ? and also does unmap calls the gc ? and also can't we just learn all the maping and unmapping be done and call System.gc() when we close the class ?
+30. is common prefix adding any value ? need to do benchmark.
+31. Observations
+ 1. for entries of 50000, and k and v size 1024 both.
+ 2. common saved ~50000 bytes and compression ate 700000 bytes.
+ 3. common prefix saved but compression didnt, (mind random class was used to generate byte data)
+32. Try to get rid of guava library, otherwise try to use guava library extensively.
+33. If we mandatoryily going to store the pointers then we need not have sK and Lk since this can be acquired by the pointers key list.
+34. see if all the classes follow the single principle.
+35. we should cache the checksum & location block.
+36. all position var should be int and file should be limited.
+37. we should only map only the blocks. rather than mmapping whole file since we already have read the pointers. and header.
+38. we can write checksums in sorted order so that we can retrive checksums together (cache them) and perform binary org.g2n.atomdb.search.
+39. can we share a mmaped file with other readers ? or is it that we do share internally.
+
+# Bottlenecks
+1. multiple org.g2n.atomdb.sst reads. (Partially solved by Objective->3)
+2. heavy compaction.
+3. No proper caching for recently retrieved block, k-v,
+4. multiple disk seeks and reads for single request. **(Many org.g2n.atomdb.sst)**
+5. multiple jumps in a **single SST**
+6. N org.g2n.atomdb.sst -> N bloom filter reads. in worst case N SST binary org.g2n.atomdb.search. ( can't we unify things)
+7. Compacting random files with any patterns.
+8. Do not compress the plain primitives, they are not compressible.
+
+
+
+
+### TODO:
+- [ ] **IMP** we can implement the shared keys in org.g2n.atomdb.sst block down the list but need to understand the cost to write org.g2n.atomdb.sst and also compaction cost and also does it help in reading.
+ - basically when we have n keys in a sorted order, some keys will have thier prefix similar which can taken common out.
+ - we need not store the prefix since we already have the smallest key in the memory, we just need to prefix length.
+ - this will help in comparing as well as storing, since we will store less now.
+ - before we implement we need to find the cost.
+ - writing computation cost.
+ - compaction reading and writing cost. (Very important)
+ - reading cost, how many disk seeks and reads.
+ - Put this in chatGPT "i have a list of strings i want to take its prefix common out."
+- [ ] Thoughts on this,
+ - If a file doesn't generate positive results for reads then it can be taken for compaction.
+ this file has very sparse data, for reading efficiency we need concentrated data.
+- [ ] Read, Write improvement.
+- [ ] org.g2n.atomdb.Compaction Improvement.
+- [ ] Cache.
+- [ ] Use of LevelDB interfaces.
+- [ ] Faster Log writing and reader, is there a way were we convert the log into org.g2n.atomdb.sst ?
+- [ ] org.g2n.atomdb.Table, basically all the meta data storage.
+- [ ] Only block or big chunks compression.
+- [x] LZ4 at use.
+- [ ] recently read blog cache.
+- [ ] have a look at design and patterns.
+- [ ] data intensive book read.
+- [ ] how to write efficient wal.
+- [ ] multithreading, lock free database.
+- [ ] separate thread for compaction.
+- [ ] latest java we will use.
+- [ ] learn new features of java.
+- [x] Crc32c comparison, java's builtIn is super fast.
+ - benchmarked, java's implementation uses some low level stuff for computing.
+- [x] Use of 64bit checksum, to find key present in the 10% block.
+ - xxhash 64bit hash, and a bit slower than crc32c
+ - use of crc32c seems viable. as the error is less than compared to the bloom filter.
+ - bloom filter is not native for our system, where we dont have much flexibility
+ - don't know the creation and restoration cost of the bloom filter.
+- [x] Check what works best, the hashes or bloom filter.
+ - hash seems good. and that took crc32c seems perfect.
+- [ ] compaction, the org.g2n.atomdb.sst file which close to each other based on the overlaps. here the idea is to find a org.g2n.atomdb.sst which has a big range and then compact it with other org.g2n.atomdb.sst which has small ranges together.
+- [ ] Providing ACID
+- [ ] OPTIMIZATION THINK: creating of tree of the spare keys to find the correct file.
+- [ ] OPTIMIZATION THINK: in a block we can store partial keys, for example maybe the smallest key defines the commons prefix and further keys has suffix.
+- [ ] can we make atomDB into sqlDB ?
+- [ ] Need to have a look on rocksDB and levelDB docs.
+ - https://github.com/facebook/rocksdb
+ - https://www.youtube.com/watch?v=jvmvezq8Twg
+- [ ] Read Virtual threads.
+- [ ] Guava's bloom filter is just not it, need to find another one.
+- [ ] benchmark our system with plane filechannel, mmap, and posix_fadvise. write the ChannelBackedWriter in a very abstract way.
+- [ ] before writing have a look at leveldb and the other project, for unit and integration test cases. as well as how they used the option class.
+- [ ] what if we manage the block of pages by our own ? just like wiredTiger (mongodb)
+ - https://source.wiredtiger.com/develop/arch-index.html
+- [ ] posix_fadvise seems viable option for compaction, where we give hint to the OS that we will read the ssts in sequential manner.
+ - org.g2n.atomdb.search on chatgpt, database basic channel "posix_fadvise vs mmap"
+- [ ] before writing the new org.g2n.atomdb.sst, just check if there is a need for compaction and see if the file overlaps with other. so that directly compacted these files.
+- [ ] need to add magic number at the end of the org.g2n.atomdb.sst. to mark the end of org.g2n.atomdb.sst.
+- [ ] Think on what we can cache.
+- [ ] what really is cache obvious data structure.
+- cant we apply common prefix to values.
+
+
+#### Information links
+* LZ4
+ * https://github.com/lz4/lz4-java/blob/master/src/java/net/jpountz/lz4/LZ4DecompressorWithLength.java
+* Deign patterns
+ * https://medium.com/@apusingh1967/every-senior-java-developer-must-know-how-and-when-to-use-generics-lower-and-upper-bound-89372c10d282
+* Different Architectures.
+* Bloom filter
+ * https://github.com/google/guava/blob/master/guava/src/com/google/common/hash/BloomFilter.java
+* Vector in java
+ * https://jbaker.io/2022/06/09/vectors-in-java/
+ * https://www.youtube.com/watch?v=ZQFzMfHIxng
+* JMH Java Microbenchmark Harnees & other benchmark
+ * https://jenkov.com/tutorials/java-performance/jmh.html
+ * https://github.com/OpenHFT/JLBH?tab=readme-ov-file
+* Lock-free Programming
+ * https://preshing.com/20120612/an-introduction-to-lock-free-programming/
+ * https://www.linkedin.com/advice/0/what-some-examples-lock-free-wait-free-data-structures
+ * https://www.cs.cmu.edu/~410-s05/lectures/L31_LockFree.pdf
+ * https://medium.com/@tylerneely/fear-and-loathing-in-lock-free-programming-7158b1cdd50c
+ * http://15418.courses.cs.cmu.edu/spring2013/article/46
+ * https://www.youtube.com/watch?v=c1gO9aB9nbs
+ * https://www.youtube.com/watch?v=lVBvHbJsg5Y
+ * https://www.1024cores.net/home/lock-free-algorithms/introduction
+ * https://lwn.net/Articles/844224/
+* Risc vs Cisc
+ * https://cs.stanford.edu/people/eroberts/courses/soco/projects/risc/risccisc/
+* MMap
+ * https://news.ycombinator.com/item?id=25701959
+ * https://sasha-f.medium.com/why-mmap-is-faster-than-system-calls-24718e75ab37
+ * https://news.ycombinator.com/item?id=29936104
+ * https://www.youtube.com/watch?v=1BRGU_AS25c
+* Posix_fadvise
+ * https://github.com/hopshadoop/hops/blob/master/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/nativeio/NativeIO.java
+* LevelDB explanation
+ * https://segmentfault.com/a/1190000040286395/en (Very well written)
+ * https://docs.riak.com/riak/kv/latest/setup/planning/backend/leveldb/index.html#:~:text=Comparison%20of%20eLevelDB%20and%20Bitcask&text=Bitcask%20stores%20keys%20in%20memory,LevelDB%20will%20need%20two%20seeks. (org.g2n.atomdb.Compaction)
+ * https://axlgrep.github.io/tech/leveldb-sst-file.html
+ * https://chenju2k6.github.io/blog/2018/11/leveldb
+ * https://rocksdb.org/blog/
+* org.g2n.atomdb.Compaction
+ * https://smalldatum.blogspot.com/2018/08/name-that-compaction-algorithm.html
+ * https://github.com/facebook/rocksdb/wiki/Compaction
+* Sysetm programming
+ * https://www.cs.cornell.edu/courses/cs4414/2023sp/
+* Write optimization
+ * https://www.reddit.com/r/cpp_questions/comments/18g513s/resources_for_learning_writing_optimized_code/
+* Cache obvious
+ * https://www.reddit.com/r/programming/comments/90o1g/cache_oblivious_algorithms/
+ * https://www.reddit.com/r/programming/comments/8smna/algorithmconscious_cacheoblivious/
+ * https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.44.5650
+ * https://www.reddit.com/r/programming/comments/fvqz4/cacheoblivious_algorithms/
+ * http://blog.netduma.com/2013/01/cache-money-hoes-attached-code.html
+ * https://rcoh.me/posts/cache-oblivious-datastructures/
+* Page cache
+ * https://manybutfinite.com/post/page-cache-the-affair-between-memory-and-files/
+* Testing learning
+ * https://junit.org/junit5/docs/current/user-guide/
+ * https://javadoc.io/doc/org.mockito/mockito-core/latest/org/mockito/Mockito.html
\ No newline at end of file
diff --git a/AtomDB/Docs/Read efficiency b/AtomDB/Docs/Read efficiency
new file mode 100644
index 0000000..e69de29
diff --git a/AtomDB/Docs/SST_Architecture b/AtomDB/Docs/SST_Architecture
new file mode 100644
index 0000000..8351f02
--- /dev/null
+++ b/AtomDB/Docs/SST_Architecture
@@ -0,0 +1 @@
+we store the cluster kvs as compressed and k
\ No newline at end of file
diff --git a/AtomDB/Docs/Table.md b/AtomDB/Docs/Table.md
new file mode 100644
index 0000000..27e010e
--- /dev/null
+++ b/AtomDB/Docs/Table.md
@@ -0,0 +1,8 @@
+# org.g2n.atomdb.Table
+
+## Role
+* Holding files of different levels
+* Giving different file level file names
+* Removing of obsolete files
+* Writes log of operations on files, like Deleting, Compacting, Creating.
+* Reading of the log file (Manifest file) to under what process was under executing and what files are in proper shape.
\ No newline at end of file
diff --git a/AtomDB/Docs/Threads b/AtomDB/Docs/Threads
new file mode 100644
index 0000000..e69de29
diff --git a/AtomDB/Docs/Todo.md b/AtomDB/Docs/Todo.md
new file mode 100644
index 0000000..5d64f0d
--- /dev/null
+++ b/AtomDB/Docs/Todo.md
@@ -0,0 +1,121 @@
+# Todo
+
+make a wal as our first org.g2n.atomdb.sst and then use compaction
+
+# current org.g2n.atomdb.sst architecture
+* current SST Structure
+* BS-> Binary Search position pointer
+* CH-> checksum
+* EN-> number of entries in the org.g2n.atomdb.sst
+* VID-> version id of that org.g2n.atomdb.sst
+* LEN-> length
+* SK-> smallest Key
+* LK-> largest Key
+* LEV-> level of that org.g2n.atomdb.sst
+* K-> key
+* V-> value
+* MAR-> MARKER
+* P->pointer to key
+
+```
+[
+ {VID | LEV | BS | EN | SK_LEN | SK | SK_CH | LK_LEN | LK | LK_CH} = header
+ {K_LEN | K | MAR | V_LEN | V | CH} = middleBlock, multiple such block
+ {K_LEN | K | MAR | CH} = middleBlock, when key is deleted
+ {P1,P2,P3....Pn} = Key positions for binary org.g2n.atomdb.search
+ {Bloom filter} = Bloom filter
+]
+```
+
+// todo shrink some value to its native size, like some places long is used
+// even though that thing is int
+
+// todo make all bytebuffer direct
+
+// todo change all arrays.compare to org.g2n.atomdb.db comparator
+
+// todo improve error messages
+
+# The MVP
+- [ ] Unit tests
+- [ ] Integration tests
+- [ ] org.g2n.atomdb.Benchmark
+- [ ] Maven release
+- [ ] GitHub readme page
+- [ ] Document for me, about every aspect of the code
+- [ ] Future plans
+
+# Extras
+- [ ] Write a blog about it
+
+
+# Branch Optimization Roadmap
+1. Batch write
+2. Increase size of bytebuffer
+3. Use of MappedByteBuffer
+4. Batch Read of header (2 times read will retrieve the whole header). Adding of a long
+5. Replace Long with Int
+6. Performing partial binary org.g2n.atomdb.search
+7. Increase ByteBuffer size (400KB is max item size in dynamoDB)
+8. use a single byte for Marker.
+9. We keep a extra memtable in memory since recently written data is frequenctly used.
+10. utlising threads to create compaction.
+11. can use binary org.g2n.atomdb.search to find in level files.
+
+# Think tank for this for **Branch Optimization RoadMap**
+1. fast key org.g2n.atomdb.search in the segment. segment is the block divided by 10. 10% rule.
+ 1. LevelDB use restart points in the SST file.
+ 2. Cache Obvious data structures
+ 3. Fractal tree
+2. best way to do compaction, how many files to take, which files to take, when to trigger, how to reduce the cost.
+3. better caching mechanism
+ 1. loadingCache of guava
+ 2. LRU
+ 3. Linux caching mechanism can be seen
+4. better and faster compression.
+5. lock free database.
+ 1. can look at gauva algos
+6. lots of abstractions
+7. design patterns to be used.
+8. can utilise fork and join for reads.
+9. Using Java 21 virtual threads.
+10. Using of herustics, basically creating a tree and guessing where the items might exists. need to do the math for this. and cost estimation.
+
+
+## Elaboration of features used in Optimization Roadmap
+### 1. Performing partial binary org.g2n.atomdb.search
+First we calculate how many reads will be required to fully retrieve the pointers.
+now do NumberOfPointers/(TotalNumberOfReadsToReadWholePointerList) = partials
+Now get the keys at every partials and store them together.(Compress them together)
+While reading we read all the keys which we stored together and find the given key in it.
+once we know what range is it, we can calculate which section of pointers we need to retrieve,
+and now it's a one read to get the required pointers and from here its usual stuff.
+With this we reduced the need to get all the pointers in memory and save multiple reads.
+Catch is that the keys present at the partials shouldn't be too long as this will increase the read cost.
+
+Overheads for a 50000 entries
+
+Current architecture
+97ConstantPointerRead + 16ConstantKSizeRead + 16XKeyRead = 129
+
+New
+97XKeyWrite
+97XKeyRead + 1ConstantRead + 9ConstantKSizeRead + 9XKeyRead = 116
+
+97XKeyRead will be batch read and also can be compressed.
+
+# Enhancement
+1. Allow Key value to be of any variable size or at least some big number like 100mb.
+2. Find bloom filter whose storage and retrieval is in our hand
+3. Value and Key together overflowing from 4096bytes byteBuffer.
+4. More information(Metadata) should be included in the SST.
+5. we can make block of 4-5 keys or values depending upon bytebuffer size and mmap. so that we bulk retrieve
+
+# Think tank
+1. we have wal, and then we have level one org.g2n.atomdb.sst. Basically writing of same data twice. can we improve
+2. Storing all the values smallest then keys, basically when binary org.g2n.atomdb.search done we can improve the disk needle movement
+since now the needle need not move to a huge gap as we reduced the values inbetween its jumps.
+3. We need now get the whole byte array key in to the memory, look at how the comparison is done in the code.
+basically it compares each bytes so partial key can be retrieved and checked.
+4. Map and unMap as required. MappedByteBuffer https://github.com/dain/leveldb/blob/130db6965ebba2c19106c5355bee0c8dc59f57db/leveldb/src/main/java/org/iq80/leveldb/impl/MMapLogWriter.java#L214
+ invokeCleaner to unmap the map. https://github.com/dain/leveldb/blob/130db6965ebba2c19106c5355bee0c8dc59f57db/leveldb/src/main/java/org/iq80/leveldb/util/ByteBufferSupport.java
diff --git a/AtomDB/pom.xml b/AtomDB/pom.xml
index 82a3135..5cb2ce3 100644
--- a/AtomDB/pom.xml
+++ b/AtomDB/pom.xml
@@ -9,8 +9,8 @@
1.0-SNAPSHOT
- 17
- 17
+ 22
+ 22
@@ -44,5 +44,91 @@
5.8.2
test
+
+ org.lz4
+ lz4-java
+ 1.8.0
+
+
+ com.github.ben-manes.caffeine
+ caffeine
+ 3.1.8
+
+
+ org.mockito
+ mockito-core
+ 4.8.1
+ test
+
+
+ org.mockito
+ mockito-junit-jupiter
+ 4.8.1
+ test
+
+
+ org.powermock
+ powermock-module-junit4
+ 2.0.9
+ test
+
+
+ org.powermock
+ powermock-api-mockito2
+ 2.0.9
+ test
+
+
+ com.google.jimfs
+ jimfs
+ 1.3.0
+ test
+
+
+ com.google.errorprone
+ error_prone_core
+ 2.28.0
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.12.1
+
+ 22
+
+
+ com.google.errorprone
+ error_prone_core
+ 2.23.0
+
+
+
+
+ -Xplugin:ErrorProne
+
+
+ -XDcompilePolicy=simple
+ -J--add-exports=jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED
+ -J--add-exports=jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED
+ -J--add-exports=jdk.compiler/com.sun.tools.javac.main=ALL-UNNAMED
+ -J--add-exports=jdk.compiler/com.sun.tools.javac.model=ALL-UNNAMED
+ -J--add-exports=jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED
+ -J--add-exports=jdk.compiler/com.sun.tools.javac.processing=ALL-UNNAMED
+ -J--add-exports=jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED
+ -J--add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED
+ -J--add-opens=jdk.compiler/com.sun.tools.javac.code=ALL-UNNAMED
+ -J--add-opens=jdk.compiler/com.sun.tools.javac.comp=ALL-UNNAMED
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/AtomDB/src/main/java/Compaction/Compaction.java b/AtomDB/src/main/java/Compaction/Compaction.java
deleted file mode 100644
index 16608e8..0000000
--- a/AtomDB/src/main/java/Compaction/Compaction.java
+++ /dev/null
@@ -1,146 +0,0 @@
-package Compaction;
-
-/**
- *
- * level 0 -> 4, 4, 4 around 12 size
- * 0 * 2 + 2 = 2 = 2 * 4 = 8
- * level 1 -> 8 * 10 around 100
- * 1 * 2 + 2 = 4 = 4 * 8 = 32
- * level 2 -> 32 * 32 around 1000
- * 2 * 2 + 2 = 6 = 6 * 32 = 192
- * level 3 -> 192 * 52 around 10000
- *
- *
- *
- */
-
-
-import Table.Table;
-import Tools.Validate;
-import db.DBOptions;
-
-import java.io.File;
-import java.time.Instant;
-import java.util.List;
-
-import Level.Level;
-import util.Util;
-
-/**
- * todo
- * need to get the lowest rank sst file from the level to compact
- * otherwise some files doesnt get compacted
- *
- */
-
-public class Compaction {
- // todo find a optimized solution
- private final static int[] LEVEL_FILES_TO_COMPACT = {
- (int) (2),
- (int) (3),
- (int) (3),
- (int) (3),
- (int) (3),
- (int) (3),
- (int) (3),
- (int) (3)
- };
-
- private Table table;
- private DBOptions dbOptions;
- public Compaction(DBOptions dbOptions, Table table) {
- this.table = table;
- this.dbOptions = dbOptions;
- }
-
- private void compactionMaybe0(Level level) throws Exception {
- List levelFiles = table.getLevelList(level);
- switch (level) {
- case LEVEL_ZERO -> {
- if (levelFiles.size() > 3) {
- doCompaction(level, levelFiles);
- compactionMaybe0(level.next());
- }
- }
- case LEVEL_ONE -> {
- if (levelFiles.size() >= 10/*count >= 10*/) { // normally 10
- doCompaction(level, levelFiles);
- compactionMaybe0(level.next());
- }
- }
- case LEVEL_TWO -> {
- if (levelFiles.size() >= 32/*count >= 10*/) {
- doCompaction(level, levelFiles);
- compactionMaybe0(level.next());
- }
- }
- case LEVEL_THREE -> {
- if (levelFiles.size() >= 52/*count >= 10*/) {
- doCompaction(level, levelFiles);
- compactionMaybe0(level.next());
- }
- }
- }
- }
-
- public void compactionMaybe() throws Exception {
- compactionMaybe0(Level.LEVEL_ZERO);
- }
-
- // todo can be made good
- private String doCompaction(Level level, List levelFiles) throws Exception {
- int numberOfFiles = LEVEL_FILES_TO_COMPACT[(int) Level.toID(level)];
-
- // important because the list is in decreasing order
- // eg 10, 7, 5, 4, 2, 1
- // you need to compact the oldest to newest, so u need maybe 1, 2
- // thats why this
- List filesToCompact = levelFiles.subList(
- levelFiles.size() - numberOfFiles, levelFiles.size());
-
- String createdFileName = performCompaction(filesToCompact, level);
-
-
-
- table.removeFiles(level, filesToCompact);
- deleteCompactedFiles(filesToCompact);
- return createdFileName;
- }
-
- private void deleteCompactedFiles(List levelFiles) {
- for (String file : levelFiles) {
- boolean delete = new File(file).delete();
- if (delete) {
- System.out.println("deleted " + file);
- } else {
- System.out.println("not deleted " + file);
- }
- }
- }
-
-
- private String performCompaction(List files,
- Level level) throws Exception {
-
- // need to be in sync with table creating file path
- var file = new File(dbOptions.getDBfolder() + File.separator +
- level.value() + "_" + (Instant.now().toString().replace(':', '_'))
- + ".inMaking");
-
- Util.requireTrue(file.createNewFile(), "unable to create file");
-
- var compactor = new Compactor(files, file, level);
-// System.out.println("running compaction and will be written on " + file);
- compactor.compact();
-
-// // debug
-// var vali = new Validate(file);
-// vali.isValid();
-
- String newSST = table.getNewSST(level.next());
- Util.requireTrue(file.renameTo(new File(newSST)), "unable to rename file");
-
- table.addSST(level.next(), newSST);
- return newSST;
- }
-}
diff --git a/AtomDB/src/main/java/Compaction/Compactor.java b/AtomDB/src/main/java/Compaction/Compactor.java
deleted file mode 100644
index 05fa961..0000000
--- a/AtomDB/src/main/java/Compaction/Compactor.java
+++ /dev/null
@@ -1,147 +0,0 @@
-package Compaction;
-
-
-import Constants.DBConstant;
-import Level.Level;
-import com.google.common.hash.BloomFilter;
-import com.google.common.hash.Funnels;
-import db.DBComparator;
-import sst.Header;
-import sst.MiddleBlock;
-import sst.ValueUnit;
-import util.Util;
-
-import java.io.*;
-import java.nio.ByteBuffer;
-import java.nio.channels.FileChannel;
-import java.util.*;
-
-/**
- * todo
- * deletion is limited
- * the key and the deletion marker doesn't get removed fully
- *
- * Solution
- * will coming across a key which is deleted we can do a search in the down levels
- * and level files in that same level and check if the key is present.
- * if present then keep the marker and if not then delete it here
- * further this can be improved with bloom
- */
-
-public class Compactor {
- private final List files;
- private final ByteBuffer byteBuffer = ByteBuffer.allocateDirect(4096);
- private final File compactionFile;
- private final Level level;
-
- public Compactor(List files, File compactionFile, Level level) {
- Objects.requireNonNull(files);Objects.requireNonNull(compactionFile);
- this.level = level;
- this.files = files;
- this.compactionFile = compactionFile;
- }
-
- //todo breakDown compact method
- public void compact() {
-
- // debug
- long a, b;
- a = System.nanoTime();
-
- try(FileOutputStream outputStream = new FileOutputStream(compactionFile);
- FileChannel channel = outputStream.getChannel();
- ) {
- List helperList = getHelperList(files);
- byte[][] firstAndLast = getSmallestANDLargest(helperList);
- var header = new Header(DBConstant.SST_VERSION,
- firstAndLast[0],
- firstAndLast[1],
- level.next(),
- compactionFile.toPath().toString());
- header.writeHeader(channel, byteBuffer);
-
- PriorityQueue qu = new PriorityQueue<>(helperList);
- qu.forEach(Helper::iterate);
-
- BloomFilter filter = BloomFilter.create(
- Funnels.byteArrayFunnel(),
- helperList.stream().mapToLong(e -> e.getEntries()).sum(),
- 0.01);
-
- List pointers = new ArrayList<>();
- long numberOfEntries = 0;
- byte[] previousKey = null;
- while (!qu.isEmpty()) {
- Helper helper = qu.remove();
-
- Map.Entry next = helper.next();
-
- if (Arrays.compare(previousKey, next.getKey()) != 0) {
- pointers.add(channel.position());
-
- // bloom
- filter.put(next.getKey());
-
- numberOfEntries++;
- MiddleBlock.writeBlock(channel, byteBuffer, next);
- }
-
- previousKey = next.getKey();
-
- if (helper.hasNext()) {
- qu.add(helper);
- } else {
- helper.close();
- }
- }
- long bs = channel.position();
-
- MiddleBlock.writePointers(channel, byteBuffer, pointers);
-
- // bloom
- MiddleBlock.writeBloom(outputStream, filter);
-
- header.writeBS(channel, byteBuffer, bs);
- Util.requireEquals(pointers.size(), numberOfEntries, "entry number misMatch with arrayList");
- header.writeEntries(channel, byteBuffer, pointers.size());
- header.close();
- } catch (Exception e) {
- throw new RuntimeException("while compacting_file=" + compactionFile, e);
- }
-
- // debug
- b = System.nanoTime();
- System.out.println("took ="+(b - a) + " nano for level="+level+" sst to create from compact");
- }
-
-
- private byte[][] getSmallestANDLargest(List helperList) {
- byte[] smallest = helperList.get(0).getSmallestKey();
- byte[] largest = helperList.get(0).getlargestKey();
-
- for (Helper helper : helperList) {
- if (DBComparator.byteArrayComparator
- .compare(smallest, helper.getSmallestKey()) > 0) {
- smallest = helper.getSmallestKey();
- }
- if (DBComparator.byteArrayComparator
- .compare(largest, helper.getlargestKey()) < 0) {
- largest = helper.getlargestKey();
- }
- }
-
- return new byte[][] {
- smallest, largest
- };
- }
-
- private List getHelperList(List files) throws Exception {
- List list = new ArrayList<>(files.size());
- for (String file : files) {
- list.add(
- new Helper(file)
- );
- }
- return list;
- }
-}
diff --git a/AtomDB/src/main/java/Compaction/Helper.java b/AtomDB/src/main/java/Compaction/Helper.java
deleted file mode 100644
index ad4cd8e..0000000
--- a/AtomDB/src/main/java/Compaction/Helper.java
+++ /dev/null
@@ -1,130 +0,0 @@
-package Compaction;
-
-import db.DBComparator;
-import sst.Header;
-import sst.MiddleBlock;
-import sst.ValueUnit;
-import util.Util;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.channels.FileChannel;
-import java.util.*;
-
-public class Helper implements Comparable, Iterator>, AutoCloseable {
- private final FileChannel channel;
- private final ByteBuffer byteBuffer;
- private final List pointers;
- private final Header header;
- private int pointerIndex;
- private final long fileRankInLevel;
- private final short fileLevel;
- private boolean startedIter = false;
- public Helper(String file) throws Exception {
- this.channel = new FileInputStream(file).getChannel();
- this.byteBuffer = ByteBuffer.allocate(4096);
- System.out.printf("helper file="+file);
- // file rank and level
- String[] pieces = file.trim().split(File.separator);
- pieces = pieces[pieces.length - 1].trim().split("_");
- this.fileLevel = Short.parseShort(pieces[0].trim());
- this.fileRankInLevel = Long.parseLong(pieces[1].replace(".sst", ""));
-
- this.header = Header.getHeader(file, channel, byteBuffer);
- // todo can be retrived from cache
- this.pointers = MiddleBlock.readPointers(channel,
- byteBuffer,
- header.getBinarySearchLocation(),
- new ArrayList<>((int) header.getEntries()),
- header.getEntries());
-
- Util.requireEquals(header.getEntries(), pointers.size(), "pointers and entries from header doesn't match");
- }
-
- public long getEntries() {
- return header.getEntries();
- }
-
- public byte[] getSmallestKey() {
- return header.getSmallestKey();
- }
-
- public byte[] getlargestKey() {
- return header.getLargestKey();
- }
-
- @Override
- public int compareTo(Helper provided) {
- if (provided.hasNext()) {
- if (this.hasNext()) {
- int val = 0;
-
- try {
- val = DBComparator.byteArrayComparator.compare(
- this.peekKey(), provided.peekKey());
-
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
-
- if (val == 0) {
- if (this.fileLevel == provided.fileLevel) {
- // purposely
- return Long.compare(provided.fileRankInLevel, this.fileRankInLevel);
- }
- return Short.compare(this.fileLevel, provided.fileLevel);
- }
- return val;
- } else {
- return 1; // giving more value for provided
- }
- } else {
- return this.hasNext() ? -1 : 0;
- }
- }
-
-
- public void iterate() {
- Util.requireFalse(startedIter, "iterator started in between the current run");
- startedIter = true;
- pointerIndex = 0;
- }
-
- public byte[] peekKey() throws IOException {
- if (hasNext())
- return MiddleBlock.readKey(channel, byteBuffer, pointers.get(pointerIndex));
- return null;
- }
-
- @Override
- public boolean hasNext() {
- return pointerIndex < pointers.size();
- }
-
- @Override
- public Map.Entry next() {
- try {
- long bloomLine = header.getBinarySearchLocation() + Long.BYTES * header.getEntries();
- if (channel.position() == bloomLine) {
- throw new RuntimeException("Bloom filter line access");
- }
-
- return MiddleBlock.readKeyValue(channel,
- byteBuffer,
- pointers.get(pointerIndex++));
- } catch (Exception e) {
- throw new RuntimeException(e);
- }
- }
-
- @Override
- public void close() throws Exception {
- channel.close();
- header.close();
- if (startedIter) {
- Util.requireEquals(pointerIndex, pointers.size(), " iterator mistake");
- }
- }
-}
diff --git a/AtomDB/src/main/java/Constants/DBConstant.java b/AtomDB/src/main/java/Constants/DBConstant.java
deleted file mode 100644
index 6107217..0000000
--- a/AtomDB/src/main/java/Constants/DBConstant.java
+++ /dev/null
@@ -1,8 +0,0 @@
-package Constants;
-
-public class DBConstant {
- public static final int INITIAL_BUFFER_SIZE = 4096;
-
- // todo can be made int
- public static final long SST_VERSION = 123456789L;
-}
diff --git a/AtomDB/src/main/java/Constants/Operations.java b/AtomDB/src/main/java/Constants/Operations.java
deleted file mode 100644
index 680c47a..0000000
--- a/AtomDB/src/main/java/Constants/Operations.java
+++ /dev/null
@@ -1,43 +0,0 @@
-package Constants;
-
-import util.BytesConverter;
-
-import java.util.Arrays;
-
-public enum Operations {
- WRITE,
- READ,
- UPDATE,
- DELETE;
-
- private static final byte[] write = BytesConverter.bytes("WRIT");
- private static final byte[] read = BytesConverter.bytes("READ");
- private static final byte[] update = BytesConverter.bytes("UPDA");
- private static final byte[] delete = BytesConverter.bytes("DELE");
-
- public static Operations getOP(byte[] array) throws Exception {
- if (Arrays.compare(array, write) == 0) return WRITE;
- if (Arrays.compare(array, read) == 0) return READ;
- if (Arrays.compare(array, update) == 0) return UPDATE;
- if (Arrays.compare(array, delete) == 0) return DELETE;
- throw new Exception("not of Constants.Operations type");
- }
-
- public byte[] value() {
- return switch (this) {
- case WRITE -> write;
- case READ -> read;
- case UPDATE -> update;
- case DELETE -> delete;
- };
- }
-
- public static int bytesLength() {
- if (read.length != write.length ||
- write.length != update.length ||
- update.length != delete.length) {
- throw new RuntimeException("operation byte length not same");
- }
- return read.length;
- }
-}
diff --git a/AtomDB/src/main/java/Logs/FileChannelLogReader.java b/AtomDB/src/main/java/Logs/FileChannelLogReader.java
deleted file mode 100644
index ea74c24..0000000
--- a/AtomDB/src/main/java/Logs/FileChannelLogReader.java
+++ /dev/null
@@ -1,121 +0,0 @@
-package Logs;
-
-import Constants.DBConstant;
-import db.DB;
-import db.DBOptions;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.File;
-import java.io.RandomAccessFile;
-import java.nio.ByteBuffer;
-import java.nio.channels.FileChannel;
-import java.time.Instant;
-import java.time.LocalDateTime;
-import java.util.*;
-
-import static util.BytesConverter.bytes;
-/*
-* todo
-* log reader and log writer both have lot of common things
-* try to reduce the duplication of code
-* */
-
-public class FileChannelLogReader implements LogReader{
- private static final Logger logger = LoggerFactory.getLogger(FileChannelLogReader.class);
- private static String LOG = "LOG";
- private File logFile = null;
- private ByteBuffer byteBuffer;
- private static final byte[] delimiter = bytes(System.lineSeparator());
- private static final int DateTimeLength = bytes(LocalDateTime.now().toString()).length;
-
- public FileChannelLogReader(DBOptions dbOptions, String currentLogFile) throws Exception {
- var folder = new File(dbOptions.getDBfolder());
- List logFiles = new ArrayList<>();
-
- for (File file : Objects.requireNonNull(folder.listFiles())) {
- if (file.getName().contains(LOG) && !file.getName().equals(currentLogFile)) {
- logFiles.add(file);
- }
- }
- if (logFiles.size() == 0) throw new Exception("No log file found");
- logFile = getLatestLog(logFiles);
- byteBuffer = ByteBuffer.allocate(DBConstant.INITIAL_BUFFER_SIZE);
- }
-
- // todo
- // need to move this method to the class where the creation of
- // log name is done
- private File getLatestLog(List logFiles) throws Exception {
- Instant latest = Instant.MIN;
- File foundLog = null;
- for (File file : logFiles) {
- String log = file.getName().replace(LOG + "-", "")
- .replace('_', ':');
- Instant dateTime = Instant.parse(log);
-
- if (latest.isBefore(dateTime)) {
- latest = dateTime;
- foundLog = file;
- }
- }
-
- if (latest.equals(Instant.MIN)) {
- throw new Exception("no log file found");
- }
- if (logFiles.size() > 2) {
- logger.debug("Multiple Log File found " + logFiles);
- logger.debug("using " + (foundLog.getName()));
- System.out.println("Multiple Log File found " + logFiles);
- System.out.println("using " + (foundLog.getName()));
- }
-
- return foundLog;
- }
-
- @Override
- public void readWAL(DB db) throws Exception {
- LogBlock current = null;
- try (RandomAccessFile reader =
- new RandomAccessFile(logFile, "r")) {
- FileChannel channel = reader.getChannel();
-
- for (long i = 0; i < channel.size(); i += current.totalBytesRead()) {
- current = LogBlock.read(channel, byteBuffer);
- switch (current.getOperations()) {
- case WRITE -> db.put(current.getKey(), current.getValue());
- case DELETE -> { // todo need to improve
- db.delete(current.getKey());
-// byte[] value = db.get(current.getKey());
-// if (value != null) {
-// if (Arrays.compare(value, current.getValue()) == 0) {
-// db.delete(current.getKey());
-// } else {
-// throw new Exception("previous value mismatch for the key");
-// }
-// } else {
-// throw new Exception("key not found in db");
-// }
- }
-
- case UPDATE -> {
- db.put(current.getKey(), current.getValue());
- // todo below code is wrong, but will work for current time
-// if (db.put(current.getKey(), current.getValue()) == null) {
-// throw new Exception("updation before any key entered");
-// }
- }
- }
- }
- channel.close();
-
- } catch (Exception e) {
- throw e;
- }
- logger.debug("deleting log="+logFile.getName());
- System.out.println("deleting log="+logFile.getName());
- if (!logFile.delete()) {
- throw new Exception(logFile + " not deleted");
- }
- }
-}
diff --git a/AtomDB/src/main/java/Logs/FileChannelLogWriter.java b/AtomDB/src/main/java/Logs/FileChannelLogWriter.java
deleted file mode 100644
index 3c775c8..0000000
--- a/AtomDB/src/main/java/Logs/FileChannelLogWriter.java
+++ /dev/null
@@ -1,85 +0,0 @@
-package Logs;
-
-import Constants.DBConstant;
-import Constants.Operations;
-import db.DBOptions;
-import util.Util;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.RandomAccessFile;
-import java.nio.ByteBuffer;
-import java.nio.channels.FileChannel;
-import java.time.Instant;
-import java.time.LocalDateTime;
-
-import static util.BytesConverter.bytes;
-
-public class FileChannelLogWriter implements LogWriter {
- private FileChannel channel;
- private static String LOG = "LOG";
- private String currentFileName;
- private ByteBuffer byteBuffer;
- private static final byte[] delimiter = bytes(System.lineSeparator());
- private static final int DateTimeLength = 8 + 4;
- private final DBOptions dbOptions;
-
- public FileChannelLogWriter(DBOptions options) throws IOException {
- byteBuffer = ByteBuffer.allocate(DBConstant.INITIAL_BUFFER_SIZE);
- dbOptions = options;
- createLogFile();
- }
-
- public void deleteAndCreateNewLogFile() throws IOException {
- deleteLogFile();
- createLogFile();
- }
-
- private void deleteLogFile() throws IOException {
- closeLogFile();
- if (!new File(dbOptions.getDBfolder() +
- File.separator + currentFileName).delete()) {
- throw new IOException("Log file not deleted");
- }
- }
-
- private void createLogFile() throws IOException {
- currentFileName = LOG + "-" + Instant.now().toString()
- .replace(':', '_');
-
- channel = new RandomAccessFile(dbOptions.getDBfolder() +
- File.separator + currentFileName,
- "rw").getChannel();
- channel.force(false);
- }
-
- @Override
- public String getCurrentFileName() {
- return currentFileName;
- }
-
- public void closeLogFile() throws IOException {
- channel.close();
- }
-
- @Override
- public void logOP(byte[] key, byte[] value, Operations operations) throws Exception {
- int length = key.length + value.length + delimiter.length * 2
- + DateTimeLength + Operations.bytesLength() + Long.BYTES * 3;
-
- byteBuffer = Util.getExtendedBufferIfNeeded(length, byteBuffer);
- byteBuffer.clear();
-
- LogBlock.write(channel, new LogBlock(
- Instant.now(),
- operations,
- key,
- value
- ), byteBuffer);
- }
-
- @Override
- public void close() throws IOException {
- closeLogFile();
- }
-}
diff --git a/AtomDB/src/main/java/Logs/LogBlock.java b/AtomDB/src/main/java/Logs/LogBlock.java
deleted file mode 100644
index 4fa6e73..0000000
--- a/AtomDB/src/main/java/Logs/LogBlock.java
+++ /dev/null
@@ -1,167 +0,0 @@
-package Logs;
-
-import Checksum.CheckSum;
-import Constants.Operations;
-import util.BytesConverter;
-import util.SizeOf;
-import util.Util;
-
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.channels.FileChannel;
-import java.time.Instant;
-import java.util.Arrays;
-public class LogBlock {
- private byte[] key, value;
- private Instant time;
- private Operations operations;
-
- private long checksum;
- public LogBlock(Instant time, Operations operations, byte[] key, byte[] value) {
- this.time = time;
- this.key = key;
- this.value = value;
- this.operations = operations;
- this.checksum = CheckSum.logBlock(time, operations.value(), key, value);
- }
-
- public LogBlock(byte[] timeInBytes, Operations operations, byte[] key, byte[] value) {
- this.time = Instant.ofEpochSecond(
- BytesConverter.toLong(timeInBytes, 0, 8),
- BytesConverter.toInt(timeInBytes, 8, 12)
- );
- this.key = key;
- this.value = value;
- this.operations = operations;
- this.checksum = CheckSum.logBlock(time, operations.value(), key, value);
- }
-
- // total bytes read for extraction of this block
- //
- public long totalBytesRead() {
- return SizeOf.InstantTimeLength +
- SizeOf.OperationsLength +
- SizeOf.LongLength + // key length
- key.length +
- SizeOf.LongLength + // value length
- value.length +
- SizeOf.LongLength; // checksum
- }
-
- public long getChecksum() {
- return checksum;
- }
-
- public byte[] getKey() {
- return key;
- }
-
- public byte[] getValue() {
- return value;
- }
-
- public Instant getTime() {
- return time;
- }
-
- public Operations getOperations() {
- return operations;
- }
-
- public static void write(FileChannel channel,
- LogBlock block,
- ByteBuffer buffer) throws IOException {
-
- long checksum = CheckSum.logBlock(block.time, block.operations.value(), block.key, block.value);
- buffer.clear();
- // storage
- // storing time
- Util.putTime(block.time, buffer);
- buffer.put(block.operations.value())
- .putLong(block.key.length)
- .put(block.key)
- .putLong(block.value.length)
- .put(block.value)
- .putLong(checksum);
- buffer.flip();
- channel.write(buffer);
- }
-
- /*
- * todo
- * maybe we can convert this into one read and then parse
- * */
- public static LogBlock read(FileChannel channel, ByteBuffer buffer) throws Exception {
- // time readd
- byte[] time = getByteArray(channel, buffer, SizeOf.InstantTimeLength);
-
- // operation read
- Operations op = Operations.getOP(
- getByteArray(channel, buffer, SizeOf.OperationsLength));
-
- // key read
- // todo need to come up with better name
- byte[] key = getByteArray(channel, buffer);
-
- //value read
- byte[] value = getByteArray(channel, buffer);
-
- // checksum read
- readNextArrayWithN(channel, buffer, SizeOf.LongLength);
- long checksum = buffer.getLong();
-
- LogBlock logBlock = new LogBlock(time, op, key, value);
-
- if (checksum != logBlock.checksum)
- throw new Exception("checksum error for logBlock=" + logBlock);
-// System.out.println("read block " + logBlock);
- return logBlock;
- }
-
- private static byte[] getByteArray(FileChannel channel, ByteBuffer buffer) throws Exception {
- int length = readNextArray(channel, buffer);
- return getByteArrayFromBuffer(buffer, length);
- }
-
- private static byte[] getByteArray(FileChannel channel,
- ByteBuffer buffer, int length) throws Exception {
- readNextArrayWithN(channel, buffer, length);
- return getByteArrayFromBuffer(buffer, length);
- }
-
- private static int readNextArray(FileChannel channel, ByteBuffer buffer) throws IOException {
- buffer.clear();
- buffer.limit(SizeOf.LongLength);
- channel.read(buffer);
- buffer.flip();
- int length = (int) buffer.getLong();
- buffer.clear();
- readNextArrayWithN(channel, buffer, length);
- return length;
- }
-
- private static void readNextArrayWithN(FileChannel channel, ByteBuffer buffer, int len) throws IOException {
- buffer.clear();
- buffer.limit(len);
- channel.read(buffer);
- buffer.flip();
- }
-
- private static byte[] getByteArrayFromBuffer(ByteBuffer buffer, int len) throws Exception {
- if (buffer.limit() == 0) throw new Exception("ByteBuffer empty");
- byte[] bytes = new byte[len];
- buffer.get(bytes);
- return bytes;
- }
-
- @Override
- public String toString() {
- return "Logs.LogBlock{" +
- "key=" + Arrays.toString(key) +
- ", value=" + Arrays.toString(value) +
- ", time=" + time +
- ", operations=" + operations +
- ", checksum=" + checksum +
- '}';
- }
-}
diff --git a/AtomDB/src/main/java/Logs/LogReader.java b/AtomDB/src/main/java/Logs/LogReader.java
deleted file mode 100644
index d17d8ab..0000000
--- a/AtomDB/src/main/java/Logs/LogReader.java
+++ /dev/null
@@ -1,11 +0,0 @@
-package Logs;
-
-import db.DB;
-
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.util.Map;
-
-public interface LogReader {
- void readWAL(DB db) throws Exception;
-}
diff --git a/AtomDB/src/main/java/Logs/LogWriter.java b/AtomDB/src/main/java/Logs/LogWriter.java
deleted file mode 100644
index c544adb..0000000
--- a/AtomDB/src/main/java/Logs/LogWriter.java
+++ /dev/null
@@ -1,15 +0,0 @@
-package Logs;
-
-import Constants.Operations;
-
-import java.io.IOException;
-
-public interface LogWriter {
- void logOP(byte[] key, byte[] value, Operations operations) throws Exception;
-
- void close() throws IOException;
-
- String getCurrentFileName();
-
- public void deleteAndCreateNewLogFile() throws IOException;
-}
diff --git a/AtomDB/src/main/java/Mem/Memtable.java b/AtomDB/src/main/java/Mem/Memtable.java
deleted file mode 100644
index 68ac189..0000000
--- a/AtomDB/src/main/java/Mem/Memtable.java
+++ /dev/null
@@ -1,14 +0,0 @@
-package Mem;
-
-public interface Memtable {
-
- byte[] put(byte[] key, byte[] value) throws Exception;
-
- byte[] get(byte[] key) throws Exception;
-
- void flush();
-
- byte[] delete(byte[] key) throws Exception;
-
- boolean delete(byte[] key, byte[] value) throws Exception;
-}
diff --git a/AtomDB/src/main/java/Mem/MemtableManager.java b/AtomDB/src/main/java/Mem/MemtableManager.java
deleted file mode 100644
index d29f868..0000000
--- a/AtomDB/src/main/java/Mem/MemtableManager.java
+++ /dev/null
@@ -1,141 +0,0 @@
-package Mem;
-
-import Constants.Operations;
-import Logs.*;
-import db.DB;
-import db.DBComparator;
-import db.DBOptions;
-import sst.SSTManager;
-import sst.ValueUnit;
-import util.BytesConverter;
-import util.SizeOf;
-
-import java.io.IOException;
-import java.util.Collections;
-import java.util.Map;
-import java.util.Objects;
-import java.util.concurrent.ConcurrentSkipListMap;
-
-public class MemtableManager implements Memtable{
- private ConcurrentSkipListMap memtable;
- private final int sizeLimit = 4 * SizeOf.MBinBytes;//4096 *4;
- private int currentSize = 0;
- private LogWriter logWriter;
- private final DBOptions dbOptions;
- // todo need to remove
- private SSTManager sstManager;
- public MemtableManager(DBOptions options, SSTManager sstManager) throws Exception {
- this.dbOptions = options;
- this.sstManager = sstManager;
- }
-
- public void start(DB db) throws Exception {
- fresh();
- if(!dbOptions.isNew())
- reconstructMem(db);
- }
-
- // todo
- // recosntruction failed then the whole program shouldn't be stopped
- private void reconstructMem(DB db) throws Exception {
- LogReader logReader = new FileChannelLogReader(dbOptions,
- logWriter.getCurrentFileName());
- logReader.readWAL(db);
- }
-
- private void fresh() throws IOException {
- memtable = new ConcurrentSkipListMap<>(DBComparator.byteArrayComparator);
- //todo
- // need to created logwriter with new only and use close() to close
- if (logWriter != null )
- logWriter.deleteAndCreateNewLogFile();
- else logWriter = new FileChannelLogWriter(dbOptions);
-// logWriter = new Logs.FileChannelLogWriter(dbOptions);
- currentSize = 0;
- }
-
- private void tryMakingSST() throws Exception {
- if (currentSize > sizeLimit) {
- System.out.println("creating sst, size= " + memtable.size());
- sstManager.createSST(Collections.unmodifiableSortedMap(memtable));
- fresh();
- }
- }
-
- //todo improve
- @Override
- public byte[] put(byte[] key, byte[] value) throws Exception {
- Objects.requireNonNull(key, "null not allowed");
- Objects.requireNonNull(value, "null not allowed");
-
- ValueUnit valueToReturn;
- if (memtable.containsKey(key)) {
- logWriter.logOP(key, value, Operations.UPDATE);
- } else {
- logWriter.logOP(key, value, Operations.WRITE);
- }
-
- valueToReturn = memtable.put(key, new ValueUnit(value, ValueUnit.ADDED));;
- currentSize += key.length + value.length;
- tryMakingSST();
- return valueToReturn != null ? valueToReturn.getValue() : null;
- }
-
- //todo fix the sstable get
- @Override
- public byte[] get(byte[] key) throws Exception {
- if (memtable.get(key) == null) {
- return sstManager.search(key);
- }
-// System.out.println("memtable");
- return memtable.get(key).getValue();
- }
-
- // todo fix api return type
- @Override
- public byte[] delete(byte[] key) throws Exception {
- logWriter.logOP(key,
- BytesConverter.bytes("DELETED_KEY_VALUE"),
- Operations.DELETE);
-
- // todo could be expensive
- byte[] foundValue = sstManager.search(key);
-
- if (foundValue == null) {
- ValueUnit value = memtable.remove(key);
- if (value != null && value.getValue() != null) {
- currentSize -= value.getValue().length;
- }
- return null;
- }
-
- // kv there in sst files,
- // adding a marker
- memtable.put(key, new ValueUnit(ValueUnit.DELETE));
-
- return null;
- }
-
- @Override
- public void flush() {
- // todo
- }
-
- @Override
- public boolean delete(byte[] key, byte[] value) throws Exception {
- // todo
-// sst.ValueUnit foundValue = memtable.get(key);
-// if (foundValue == null || Arrays.compare(foundValue.getValue(), value) != 0) return false;
-// logWriter.logOP(key, bytes, Constants.Operations.DELETE);
-// memtable.remove(key, value);
- return true;
- }
-
- public void close() throws IOException {
- //debug
-// System.out.println(memtable.entrySet().stream().map(each ->
-// Map.entry(new String(each.getKey()), new String(each.getValue().getValue())))
-// .toList());
- logWriter.close();
- }
-}
diff --git a/AtomDB/src/main/java/Table/Cache.java b/AtomDB/src/main/java/Table/Cache.java
deleted file mode 100644
index 5a2937a..0000000
--- a/AtomDB/src/main/java/Table/Cache.java
+++ /dev/null
@@ -1,71 +0,0 @@
-package Table;
-
-import db.DBOptions;
-import util.SizeOf;
-
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-
-// todo
-// this is very lose cache behaviour
-
-public class Cache {
- private final DBOptions dbOptions;
- private int capacity;
- private Map cache;
- private long currentSize = 0;
-
- public record CacheValue(byte[] smallKey, byte[] largeKey, List pointers, long size) {
- public CacheValue(byte[] smallKey, byte[] largeKey, List pointers) {
- this(smallKey, largeKey, pointers,
- smallKey.length +
- largeKey.length +
- (long) pointers.size() * Long.BYTES
- );
- }
- };
-
- public Cache(DBOptions dbOptions, int capacity) {
- this.dbOptions = dbOptions;
- this.capacity = capacity;
- initCache();
- }
-
- private void initCache() {
- cache = new LinkedHashMap<>(30, 0.75f, true){
- protected boolean removeEldestEntry(Map.Entry eldest) {
- if (currentSize > capacity) {
- currentSize -= ((CacheValue)eldest.getValue()).size;
- return true;
- }
- return false;
- }
- };
- }
-
- public Cache(DBOptions dbOptions) {
- this.dbOptions = dbOptions;
- this.capacity = 100 * SizeOf.MBinBytes;
- initCache();
- }
-
- public int getSize() {
- return capacity;
- }
-
- public void setSize(int size) {
- this.capacity = size;
- }
-
- public void put(String filename, byte[] small, byte[] large, List pointers) {
- CacheValue cacheValue = new CacheValue(small, large, pointers);
- currentSize += cacheValue.size;
- cache.put(filename, cacheValue);
- }
-
- public CacheValue get(String fileName) {
-// System.out.println("from cache cahce");
- return cache.getOrDefault(fileName, null);
- }
-}
diff --git a/AtomDB/src/main/java/Table/FileHelper.java b/AtomDB/src/main/java/Table/FileHelper.java
deleted file mode 100644
index b9c376d..0000000
--- a/AtomDB/src/main/java/Table/FileHelper.java
+++ /dev/null
@@ -1,32 +0,0 @@
-package Table;
-
-import com.google.common.hash.BloomFilter;
-import com.google.common.hash.Funnels;
-import sst.Header;
-
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.channels.FileChannel;
-
-public class FileHelper {
- private static ByteBuffer buffer = ByteBuffer.allocateDirect(4096);
-
- public static BloomFilter readBloom(String pathToFile) {
- try(FileInputStream inputStream = new FileInputStream(pathToFile);
- FileChannel channel = inputStream.getChannel();) {
-
- Header header = Header.getHeader(pathToFile, channel, buffer);
- long bloomStart = header.getBinarySearchLocation() + header.getEntries() * Long.BYTES;
- channel.position(bloomStart);
- BloomFilter bloomFilter = BloomFilter.readFrom(inputStream, Funnels.byteArrayFunnel());
- return bloomFilter;
- } catch (IOException e) {
- throw new RuntimeException(e);
- } catch (Exception e) {
- throw new RuntimeException(e);
- }
- }
-}
diff --git a/AtomDB/src/main/java/Table/Table.java b/AtomDB/src/main/java/Table/Table.java
deleted file mode 100644
index 8a69e56..0000000
--- a/AtomDB/src/main/java/Table/Table.java
+++ /dev/null
@@ -1,100 +0,0 @@
-package Table;
-
-import Level.Level;
-import com.google.common.hash.BloomFilter;
-import db.DBOptions;
-
-import java.io.File;
-import java.nio.channels.FileChannel;
-import java.util.*;
-
-public class Table {
- private Map> table;
- private int currentFileName = 0;
- private final DBOptions dbOptions;
-
- private Map> bloomMap;
- public Table(DBOptions dbOptions) {
- this.dbOptions = dbOptions;
- table = Map.of(Level.LEVEL_ZERO, createList(),
- Level.LEVEL_ONE, createList(),
- Level.LEVEL_TWO, createList(),
- Level.LEVEL_THREE, createList(),
- Level.LEVEL_FOUR, createList(),
- Level.LEVEL_FIVE, createList(),
- Level.LEVEL_SIX, createList(),
- Level.LEVEL_SEVEN, createList());
- bloomMap = new HashMap<>();
- fillLevels();
- }
-
- // todo can be made to read every file where inside there is level marked
- private void fillLevels() {
- String[] fileNames = new File(dbOptions.getDBfolder()).list();
-
- if (fileNames.length == 0) return; // new db
- int max = Integer.MIN_VALUE;
- for (String fileName : fileNames) {
- if (!fileName.contains(".sst")) continue;
-
- // todo make it neat
- int got = Integer.parseInt(fileName.trim().split("_")[1].trim().replace(".sst", ""));
- max = Math.max(got, max);
-
- Level level = Level.fromID(fileName.charAt(0) - 48);
- String file = dbOptions.getDBfolder() + File.separator + fileName;
- table.get(level).add(file);
- bloomMap.put(file, FileHelper.readBloom(file));
- }
- // todo same as above
- currentFileName = max;
- }
-
- public String getNewSST(Level level) {
- return dbOptions.getDBfolder() + File.separator +
- level.value() + "_" + (++currentFileName) + ".sst";
- }
-
- public void addSST(Level level, String sst) {
- table.get(level).add(sst);
- bloomMap.put(sst, FileHelper.readBloom(sst));
- }
-
- public List getLevelList(Level value) {
- return List.copyOf(table.get(value));
- }
-
- private List createList() {
- // todo improve this
- return new ArrayList<>() {
- public boolean add(String mt) {
- int index = Collections.binarySearch(this, mt, (s1, s2) -> {
- String[] pi = s1.trim().split(File.separator);
- var thisPi = pi[pi.length - 1].trim().split("_");
-
- pi = s2.trim().split(File.separator);
- var providedPi = pi[pi.length - 1].trim().split("_");
-
- if (!thisPi[0].equals(providedPi[0])) throw new RuntimeException("level mismatch");
- long a = Long.parseLong(providedPi[1].trim().replace(".sst", ""));
- long b = Long.parseLong(thisPi[1].trim().replace(".sst", ""));
- return Long.compare(a, b);
- });
- if (index < 0) index = ~index;
- super.add(index, mt);
- return true;
- }
- };
- }
-
- public void removeFiles(Level level, List filesToCompact) {
- table.get(level).removeAll(filesToCompact);
- for (String s : filesToCompact) {
- bloomMap.remove(s);
- }
- }
-
- public BloomFilter getBloom(String file){
- return bloomMap.get(file);
- }
-}
diff --git a/AtomDB/src/main/java/db/DB.java b/AtomDB/src/main/java/db/DB.java
deleted file mode 100644
index 240556c..0000000
--- a/AtomDB/src/main/java/db/DB.java
+++ /dev/null
@@ -1,16 +0,0 @@
-package db;
-
-import java.io.IOException;
-
-public interface DB{
-
- byte[] put(byte[] key, byte[] value) throws Exception;
-
- byte[] get(byte[] key) throws Exception;
-
- byte[] delete(byte[] key) throws Exception;
-
- void close() throws IOException;
-
- void destroy();
-}
diff --git a/AtomDB/src/main/java/db/DBComparator.java b/AtomDB/src/main/java/db/DBComparator.java
deleted file mode 100644
index a035c3b..0000000
--- a/AtomDB/src/main/java/db/DBComparator.java
+++ /dev/null
@@ -1,22 +0,0 @@
-package db;
-
-import java.util.Arrays;
-import java.util.Comparator;
-
-public class DBComparator {
- public static final Comparator byteArrayComparator = DBComparator::compare;
-
- private static int compare(byte[] left, byte[] right) {
-// if (left == null) return -1;
-// if (right == null) return 1;
-//
- int minLength = Math.min(left.length, right.length);
- for (int i = 0; i < minLength; i++) {
- int result = left[i] - right[i];
- if (result != 0) {
- return result;
- }
- }
- return left.length - right.length;
- }
-}
diff --git a/AtomDB/src/main/java/db/DBImpl.java b/AtomDB/src/main/java/db/DBImpl.java
deleted file mode 100644
index a925d50..0000000
--- a/AtomDB/src/main/java/db/DBImpl.java
+++ /dev/null
@@ -1,80 +0,0 @@
-package db;
-
-import Mem.MemtableManager;
-import sst.SSTManager;
-import Table.Table;
-import util.Util;
-
-import java.io.File;
-import java.io.IOException;
-
-// todo shrink some value to its native size, like some places long is used
-// even though that thing is int
-
-// todo make all bytebuffer direct
-
-// todo change all arrays.compare to db comparator
-public class DBImpl implements DB{
- private MemtableManager memtable;
- private SSTManager sstManager;
- private Table table;
- private DBOptions dbOptions;
-
- public DBImpl(DBOptions dbOptions) throws Exception {
- this.dbOptions = dbOptions;
-
- createDBFolder(dbOptions);
-
- this.table = new Table(dbOptions);
- this.sstManager = new SSTManager(dbOptions, table);
- this.memtable = new MemtableManager(dbOptions, sstManager);
- this.memtable.start(this);
- }
-
- // todo need to be reomoved
- public Table getTable() {
- return table;
- }
-
- private void createDBFolder(DBOptions dbOptions) throws IOException {
- File dbFolder = new File(dbOptions.getDBfolder());
- if (!dbFolder.isDirectory()) {
- if (!dbFolder.mkdirs()) {
- throw new IOException("unable to create db folder");
- }
- }
- }
-
- @Override
- public byte[] put(byte[] key, byte[] value) throws Exception {
- return this.memtable.put(key, value);
- }
-
- @Override
- public byte[] get(byte[] key) throws Exception {
- return this.memtable.get(key);
- }
-
- @Override
- public byte[] delete(byte[] key) throws Exception {
- return this.memtable.delete(key);
- }
-
-
- @Override
- public void close() throws IOException {
- this.memtable.close();
- }
-
- @Override
- public void destroy() {
- var file = new File(this.dbOptions.getDBfolder());
- Util.requireTrue(file.exists(), "folder="+file.toPath()+" does not exits");
- Util.requireTrue(file.isDirectory(), "file="+file.toPath()+" is not a folder");
-
- for (File listFile : file.listFiles()) {
- Util.requireTrue(listFile.delete(), "unable to delete file="+listFile.toPath());
- }
- Util.requireTrue(file.delete(), "unable to delete folder="+file.toPath());
- }
-}
diff --git a/AtomDB/src/main/java/db/DBOptions.java b/AtomDB/src/main/java/db/DBOptions.java
deleted file mode 100644
index 751236a..0000000
--- a/AtomDB/src/main/java/db/DBOptions.java
+++ /dev/null
@@ -1,21 +0,0 @@
-package db;
-
-import java.io.File;
-
-public final class DBOptions {
- private final String DBfolder;
- private boolean verifyChecksum;
- private final boolean isDBNew;
- public DBOptions(String DBfolder) {
- this.DBfolder = DBfolder;
- this.isDBNew = !new File(DBfolder).isDirectory();
- }
-
- public String getDBfolder() {
- return DBfolder;
- }
-
- public boolean isNew() {
- return isDBNew;
- }
-}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/Benchmark.java b/AtomDB/src/main/java/org/g2n/atomdb/Benchmark.java
new file mode 100644
index 0000000..bb76ffa
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Benchmark.java
@@ -0,0 +1,689 @@
+package org.g2n.atomdb;
+
+import org.g2n.atomdb.db.DBImpl;
+import org.g2n.atomdb.db.DbOptions;
+import org.xerial.snappy.Snappy;
+
+import java.io.*;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.*;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.Supplier;
+import java.util.stream.IntStream;
+
+import static org.g2n.atomdb.util.BytesConverter.bytes;
+
+public class Benchmark {
+
+ public static void main(String[] args) throws Exception {
+ var inputString = "qwertyuiopasdfghjklzxcvbnm<>?:}{+_)(*&^%$#@!)}1234567890`~".repeat(5);
+ System.out.println("Warm Up with 50k");
+ //searchBenchMark(500000, "benchmarkWithRandomKVBytesWithCompaction");
+ //searchBenchMark(500000, "benchmarkWithRandomKVBytesWithoutCompaction");
+ //searchBenchMark(500000, "IssueDB");
+
+// benchmark(inputString, 500000);
+// correctnessCheck(inputString, 500000);
+
+ // benchmark(inputString, 1000);
+// benchmark(inputString, 10000);
+// benchmark(inputString, 100000);
+// benchmark(inputString, 1000_000);
+// benchmarkWriting(inputString, 1000_000);
+// initialTest(inputString, 50000);
+// benchmark(inputString, 15000);
+// benchmarkWithRandomKVBytes(1000000, 50, 500); //500000
+
+ benchmarkWithRandomKVBytes(getRandomKV(1000000, () -> 50, () -> 500));
+
+// benchmarkWithRandomLengthKVBytes(1000_000);
+// benchmarkRandomRead(inputString, 1000_000, "asd"); //1000000
+ }
+
+ private static void benchmarkWithRandomLengthKVBytes(int totalEntryCount) throws Exception {
+ var dbName = "benchmarkWithRandomKVBytes";
+ var rand = new Random();
+ var map = getRandomKV(totalEntryCount, () -> rand.nextInt(50, 100), () -> rand.nextInt(50, 1000));
+
+ System.out.println("Number of threads: " + Thread.activeCount());
+ long beforeUsedMem = Runtime.getRuntime().totalMemory()-Runtime.getRuntime().freeMemory();
+ var opt = new DbOptions();
+ var db = new DBImpl(new File(dbName), opt);
+ long startTime , endTime, readingTime, writingTime;
+ try {
+ System.out.println("Writing... " + totalEntryCount);
+ startTime = System.nanoTime();
+ map.entrySet().forEach(each -> {
+ try {
+ db.put(each.getKey(), each.getValue());
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ });
+ endTime = System.nanoTime();
+
+ writingTime = endTime - startTime;
+
+ var list = new ArrayList<>(map.keySet());
+ Collections.shuffle(list);
+ System.out.println("Reading... ");
+ startTime = System.nanoTime();
+ list.forEach(each -> {
+ try {
+ db.get(each);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ });
+ endTime = System.nanoTime();
+
+ readingTime = endTime - startTime;
+ System.out.println("writing time=" + writingTime + " , reading time=" + readingTime);
+ long afterUsedMem=Runtime.getRuntime().totalMemory()-Runtime.getRuntime().freeMemory();
+ long actualMemUsed=afterUsedMem-beforeUsedMem;
+ System.out.println("memory utilised="+actualMemUsed);
+ System.out.println("Number of threads: " + Thread.activeCount());
+ } catch (Exception e) {
+ e.printStackTrace();
+ } finally {
+ db.close();
+ Files.walk(Path.of(dbName ))
+ .sorted(Comparator.reverseOrder())
+ .map(Path::toFile)
+ .forEach(File::delete);
+ }
+ }
+
+ private static void benchmarkWithRandomKVBytes(int totalEntryCount, int keyBytesLength, int valueBytesLength) throws Exception {
+ var dbName = "benchmarkWithRandomKVBytes";
+ var map = getRandomKV(totalEntryCount, () -> keyBytesLength, () -> valueBytesLength);
+
+ System.out.println("Number of threads: " + Thread.activeCount());
+ long beforeUsedMem = Runtime.getRuntime().totalMemory()-Runtime.getRuntime().freeMemory();
+ var opt = new DbOptions();
+ var db = new DBImpl(new File(dbName), opt);
+ long startTime , endTime, readingTime, writingTime;
+ try {
+ System.out.println("Writing... " + totalEntryCount);
+ startTime = System.nanoTime();
+ AtomicInteger i = new AtomicInteger();
+ map.entrySet().forEach(each -> {
+ try {
+ if (i.get() % 10000 == 0) {
+ System.out.println("progress="+i);
+ }
+ i.getAndIncrement();
+ db.put(each.getKey(), each.getValue());
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ });
+ endTime = System.nanoTime();
+
+ writingTime = endTime - startTime;
+
+ var list = new ArrayList<>(map.keySet());
+ Collections.shuffle(list);
+
+
+ System.out.println("Reading... ");
+ startTime = System.nanoTime();
+ list.forEach(each -> {
+ try {
+ db.get(each);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ });
+ endTime = System.nanoTime();
+
+ readingTime = endTime - startTime;
+ System.out.println("writing time=" + writingTime/1000_000_000.0 + " , reading time=" + readingTime/1000_000_000.0);
+ long afterUsedMem=Runtime.getRuntime().totalMemory()-Runtime.getRuntime().freeMemory();
+ long actualMemUsed=afterUsedMem-beforeUsedMem;
+ System.out.println("memory utilised="+actualMemUsed);
+ System.out.println("Number of threads: " + Thread.activeCount());
+ } catch (Exception e) {
+ e.printStackTrace();
+ } finally {
+ db.close();
+ Files.walk(Path.of(dbName ))
+ .sorted(Comparator.reverseOrder())
+ .map(Path::toFile)
+ .forEach(File::delete);
+ }
+ }
+
+ private static void benchmarkWithRandomKVBytes(Map map) throws Exception {
+ var dbName = "benchmarkWithRandomKVBytes";
+
+ System.out.println("Number of threads: " + Thread.activeCount());
+ long beforeUsedMem = Runtime.getRuntime().totalMemory()-Runtime.getRuntime().freeMemory();
+ var opt = new DbOptions();
+ var db = new DBImpl(new File(dbName), opt);
+ long startTime , endTime, readingTime, writingTime;
+ try {
+ System.out.println("Writing... " + map.size());
+ startTime = System.nanoTime();
+ AtomicInteger i = new AtomicInteger();
+ map.entrySet().forEach(each -> {
+ try {
+ if (i.get() % 10000 == 0) {
+ System.out.println("progress="+i);
+ }
+ i.getAndIncrement();
+ db.put(each.getKey(), each.getValue());
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ });
+ endTime = System.nanoTime();
+
+ writingTime = endTime - startTime;
+
+ var list = new ArrayList<>(map.keySet());
+ Collections.shuffle(list);
+
+
+ System.out.println("Reading... ");
+ startTime = System.nanoTime();
+ list.forEach(each -> {
+ try {
+ db.get(each);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ });
+ endTime = System.nanoTime();
+
+ readingTime = endTime - startTime;
+ System.out.println("writing time=" + writingTime/1000_000_000.0 + " , reading time=" + readingTime/1000_000_000.0);
+ long afterUsedMem=Runtime.getRuntime().totalMemory()-Runtime.getRuntime().freeMemory();
+ long actualMemUsed=afterUsedMem-beforeUsedMem;
+ System.out.println("memory utilised="+actualMemUsed);
+ System.out.println("Number of threads: " + Thread.activeCount());
+ } catch (Exception e) {
+ e.printStackTrace();
+ } finally {
+ db.close();
+ Files.walk(Path.of(dbName ))
+ .sorted(Comparator.reverseOrder())
+ .map(Path::toFile)
+ .forEach(File::delete);
+ }
+ }
+
+ private static Map getRandomKV(int totalEntryCount, Supplier keyBytesLength, Supplier valueBytesLength) {
+ // total entries
+ System.out.println("random generation");
+ var rand = new Random();
+ Map map = new HashMap<>(totalEntryCount);
+ for (int i = 0; i < totalEntryCount; i++) {
+ var key = new byte[keyBytesLength.get()];
+ var value = new byte[valueBytesLength.get()];
+ rand.nextBytes(key); rand.nextBytes(value);
+ map.put(key, value);
+ }
+ // end
+ return map;
+ }
+
+ private static void benchmarkRandomRead(String inputString, long totalEntryCount, String dbName) throws Exception {
+ System.out.println("Number of threads: " + Thread.activeCount());
+ long beforeUsedMem = Runtime.getRuntime().totalMemory()-Runtime.getRuntime().freeMemory();
+ var opt = new DbOptions();
+ var db = new DBImpl(new File(dbName), opt);
+ long startTime , endTime, readingTime, writingTime;
+ try {
+ System.out.println("Writing... " + totalEntryCount);
+ startTime = System.nanoTime();
+ for (int i = 0; i < totalEntryCount; i++) {
+ db.put(bytes(i + ""), bytes(i + "_" + inputString));
+ }
+ endTime = System.nanoTime();
+
+ writingTime = endTime - startTime;
+ System.out.println("Reading... ");
+ startTime = System.nanoTime();
+ List integers = new ArrayList<>(IntStream.range(0, (int) totalEntryCount).boxed().toList());
+ Collections.shuffle(integers);
+ Collections.shuffle(integers);
+ for (int i : integers) {
+ db.get(bytes(i + ""));
+ }
+ endTime = System.nanoTime();
+
+ readingTime = endTime - startTime;
+ System.out.println("writing time=" + writingTime + " , reading time=" + readingTime);
+ long afterUsedMem=Runtime.getRuntime().totalMemory()-Runtime.getRuntime().freeMemory();
+ long actualMemUsed=afterUsedMem-beforeUsedMem;
+ System.out.println("memory utilised="+actualMemUsed);
+ System.out.println("Number of threads: " + Thread.activeCount());
+ } catch (Exception e) {
+ e.printStackTrace();
+ } finally {
+ db.close();
+ Files.walk(Path.of(dbName ))
+ .sorted(Comparator.reverseOrder())
+ .map(Path::toFile)
+ .forEach(File::delete);
+ }
+ }
+
+ public static void initialTest(String inputString, long totalEntryCount) throws Exception {
+ var opt = new DbOptions();
+ var db = new DBImpl(new File(Benchmark.class.getName() + "DB"), opt);
+ var key = "somegood things";
+ System.out.println("compressed keysize around ="+Snappy.compress(bytes(key)).length);
+ System.out.println("compressed valueSize ="+Snappy.compress(bytes(inputString)).length);
+
+ try {
+ for (int i = 0; i < totalEntryCount; i++) {
+ if (i % 17 == 0 ) {
+ db.put(bytes(key + i), bytes(inputString));
+ continue;
+ }
+ db.put(bytes(i + ""), bytes(i + "_" + inputString));
+ }
+ for (int i = 0; i < totalEntryCount; i++) {
+ if (i % 17 == 0 ) {
+ byte[] bytes = db.get(bytes(key + i));
+ byte[] bytes1 = bytes(inputString);
+ if (Arrays.compare(bytes, bytes1) != 0) {
+ System.out.println("moye moye");
+ }
+ }
+ }
+ } catch (Exception e) {
+ e.printStackTrace();
+ } finally {
+ db.close();
+ db.destroy();
+ }
+ }
+
+ public static void benchmarkWriting(String inputString, long totalEntryCount) throws Exception {
+ System.out.println("Number of threads: " + Thread.activeCount());
+ long beforeUsedMem = Runtime.getRuntime().totalMemory()-Runtime.getRuntime().freeMemory();
+ var opt = new DbOptions();
+ var db = new DBImpl(new File(Benchmark.class.getName() + "DB"), opt);
+ long startTime , endTime, readingTime, writingTime;
+ try {
+ System.out.println("Writing... " + totalEntryCount);
+ startTime = System.nanoTime();
+ for (int i = 0; i < totalEntryCount; i++) {
+ db.put(bytes(i + ""), bytes(i + "_" + inputString));
+ }
+ endTime = System.nanoTime();
+
+ writingTime = endTime - startTime;
+ System.out.println("writing time=" + writingTime);
+ long afterUsedMem=Runtime.getRuntime().totalMemory()-Runtime.getRuntime().freeMemory();
+ long actualMemUsed=afterUsedMem-beforeUsedMem;
+ System.out.println("memory utilised="+actualMemUsed);
+ System.out.println("Number of threads: " + Thread.activeCount());
+ } catch (Exception e) {
+ e.printStackTrace();
+ } finally {
+ db.close();
+ db.destroy();
+ }
+ }
+
+ public static void benchmark(String inputString, long totalEntryCount) throws Exception {
+ System.out.println("Number of threads: " + Thread.activeCount());
+ long beforeUsedMem = Runtime.getRuntime().totalMemory()-Runtime.getRuntime().freeMemory();
+ var opt = new DbOptions();
+ var db = new DBImpl(new File(Benchmark.class.getName() + "DB"), opt);
+ long startTime , endTime, readingTime, writingTime;
+ try {
+ System.out.println("Writing... " + totalEntryCount);
+ startTime = System.nanoTime();
+ for (int i = 0; i < totalEntryCount; i++) {
+ db.put(bytes(i + ""), bytes(i + "_" + inputString));
+ }
+ endTime = System.nanoTime();
+
+ writingTime = endTime - startTime;
+ System.out.println("Writing ="+ writingTime);
+ System.out.println("Reading... ");
+ startTime = System.nanoTime();
+ for (int i = 0; i < totalEntryCount; i++) {
+ System.out.println("reading="+i);
+ db.get(bytes(i + ""));
+ }
+ endTime = System.nanoTime();
+
+ readingTime = endTime - startTime;
+ System.out.println("writing time=" + writingTime + " , reading time=" + readingTime);
+ long afterUsedMem=Runtime.getRuntime().totalMemory()-Runtime.getRuntime().freeMemory();
+ long actualMemUsed=afterUsedMem-beforeUsedMem;
+ System.out.println("memory utilised="+actualMemUsed);
+ System.out.println("Number of threads: " + Thread.activeCount());
+ } catch (Exception e) {
+ e.printStackTrace();
+ } finally {
+ db.close();
+ Files.walk(Path.of(Benchmark.class.getName() + "DB" ))
+ .sorted(Comparator.reverseOrder())
+ .map(Path::toFile)
+ .forEach(File::delete);
+ }
+ }
+
+ public static void correctnessCheck(String inputString, long totalEntryCount) throws Exception {
+ System.out.println("Number of threads: " + Thread.activeCount());
+ long beforeUsedMem = Runtime.getRuntime().totalMemory()-Runtime.getRuntime().freeMemory();
+ var opt = new DbOptions();
+ var db = new DBImpl(new File(Benchmark.class.getName() + "DB"), opt);
+ long startTime , endTime, readingTime, writingTime;
+ try {
+ System.out.println("Writing... " + totalEntryCount);
+ startTime = System.nanoTime();
+ for (int i = 0; i < totalEntryCount; i++) {
+ db.put(bytes(i + ""), bytes(inputString));
+ }
+ endTime = System.nanoTime();
+
+ writingTime = endTime - startTime;
+ System.out.println("Writing ="+ writingTime);
+ System.out.println("Reading... ");
+ startTime = System.nanoTime();
+ for (int i = 0; i < totalEntryCount; i++) {
+ System.out.println("reading="+i);
+ byte[] bytes = db.get(bytes(i + ""));
+ if (bytes == null) {
+ throw new RuntimeException("value is null for key+"+i);
+ }
+ if (Arrays.compare(bytes, bytes(inputString)) != 0) {
+ throw new RuntimeException("value is not same for key+"+i);
+ }
+ }
+ endTime = System.nanoTime();
+
+ readingTime = endTime - startTime;
+ System.out.println("writing time=" + writingTime + " , reading time=" + readingTime);
+ long afterUsedMem=Runtime.getRuntime().totalMemory()-Runtime.getRuntime().freeMemory();
+ long actualMemUsed=afterUsedMem-beforeUsedMem;
+ System.out.println("memory utilised="+actualMemUsed);
+ System.out.println("Number of threads: " + Thread.activeCount());
+ } catch (Exception e) {
+ e.printStackTrace();
+ } finally {
+ db.close();
+ Files.walk(Path.of(Benchmark.class.getName() + "DB" ))
+ .sorted(Comparator.reverseOrder())
+ .map(Path::toFile)
+ .forEach(File::delete);
+ }
+ }
+
+ public static void searchBenchMark(long totalEntryCount, String DBName) throws Exception {
+ System.out.println("Number of threads: " + Thread.activeCount());
+ long beforeUsedMem = Runtime.getRuntime().totalMemory()-Runtime.getRuntime().freeMemory();
+ var opt = new DbOptions();
+ var db = new DBImpl(new File(DBName), opt);
+ long startTime , endTime, readingTime, writingTime;
+ try {
+ System.out.println("Reading... ");
+ startTime = System.nanoTime();
+ for (int i = 0; i < totalEntryCount; i++) {
+ db.get(bytes(i + ""));
+ }
+ endTime = System.nanoTime();
+
+ readingTime = endTime - startTime;
+ System.out.println("reading time=" + (readingTime/1000_000_000.0));
+ long afterUsedMem=Runtime.getRuntime().totalMemory()-Runtime.getRuntime().freeMemory();
+ long actualMemUsed=afterUsedMem-beforeUsedMem;
+ System.out.println("memory utilised="+actualMemUsed);
+ System.out.println("Number of threads: " + Thread.activeCount());
+ } catch (Exception e) {
+ e.printStackTrace();
+ } finally {
+ db.close();
+ //org.g2n.atomdb.db.destroy();
+ }
+ }
+}
+
+// firefly
+//Warm Up with 50k
+//Writing... 50000
+//Reading...
+//writing time=1199177900 , reading time=1393644400
+//Writing... 1000
+//Reading...
+//writing time=20145900 , reading time=35852900
+//Writing... 10000
+//Reading...
+//writing time=256371600 , reading time=234508600
+//Writing... 100000
+//Reading...
+//writing time=1272501300 , reading time=2226460200
+//Writing... 1000000
+//Reading...
+//writing time=15237828200 , reading time=22578177500
+// 15 and 22
+
+//leveldb
+//Warm Up with 50k
+//Writing... 50000
+//Reading...
+//writing time=477759600 , reading time=152400700
+//Writing... 1000
+//Reading...
+//writing time=9359800 , reading time=1390100
+//Writing... 10000
+//Reading...
+//writing time=79710100 , reading time=14241500
+//Writing... 100000
+//Reading...
+//writing time=780837900 , reading time=276792700
+//Writing... 1000000
+//Reading...
+//writing time=10130438600 , reading time=2697346200
+// 10 sec and 2.69 sec
+
+//atomDB
+//Warm Up with 50k
+//Writing... 50000
+//Reading...
+//writing time=961983000 , reading time=7546111700
+//Writing... 1000
+//Reading...
+//writing time=4872700 , reading time=414700
+//Writing... 10000
+//Reading...
+//writing time=44763700 , reading time=4071100
+//Writing... 10000_0
+//Reading...
+//writing time=3879929800 , reading time=18500214900
+//Writing... 10000_00
+//Reading...
+//writing time=95125721700 , reading time=204819288000
+//95 sec and 3.4 minutes
+
+//writing time=75009226700 , reading time=159444240000
+// 75 sec and 2.65 minutes with snappy
+
+// levelDB writing 10.13 sec and 2.69 sec reading
+// 15 and 22 firefly
+
+//writing time=93463754000 , reading time=167366800200 without snappy
+// 93 sec and 167
+//writing time=54990968100 , reading time=162650142200 with snappy
+//54 sec and 162 secs
+
+// levelDB writing 10.13 sec and 2.69 sec reading
+// 15 and 22 firefly
+
+/**
+ * optimized Champion, without compaction and 0.8 sparse binary org.g2n.atomdb.search
+ * basically we have 2^(log(n) * 0.8) keys in memory.
+ * writing ~5sec for all cases and reading ~2sec and for random reading ~23
+ * need to work on random searches as this is the real world scenario.
+ * Warm Up with 50k
+ * Number of threads: 2
+ * Writing... 500000
+ * Writing =3381345200
+ * Reading...
+ * writing time=3381345200 , reading time=1014120400
+ * memory utilised=181201096
+ * Number of threads: 2
+ * Number of threads: 2
+ * Writing... 1000
+ * Writing =19922100
+ * Reading...
+ * writing time=19922100 , reading time=751600
+ * memory utilised=8125712
+ * Number of threads: 2
+ * Number of threads: 2
+ * Writing... 10000
+ * Writing =48545900
+ * Reading...
+ * writing time=48545900 , reading time=7901400
+ * memory utilised=25168168
+ * Number of threads: 2
+ * Number of threads: 2
+ * Writing... 100000
+ * Writing =776112400
+ * Reading...
+ * writing time=776112400 , reading time=133118300
+ * memory utilised=36181552
+ * Number of threads: 2
+ * Number of threads: 2
+ * Writing... 1000000
+ * Writing =5863752200
+ * Reading...
+ * writing time=5863752200 , reading time=2089110800
+ * memory utilised=415614712
+ * Number of threads: 2
+ * Number of threads: 2
+ * Writing... 1000000
+ * Reading...
+ * writing time=5419208300 , reading time=23297950100
+ * memory utilised=196422816
+ * Number of threads: 2
+ */
+
+
+/**
+ * Optimized branch, with full key -> pointer in memory. read is less than 2sec and for less than 20 sec for random
+ * Warm Up with 50k
+ * Number of threads: 2
+ * Writing... 500000
+ * Writing =3738520700
+ * Reading...
+ * writing time=3738520700 , reading time=856879800
+ * memory utilised=270531856
+ * Number of threads: 2
+ * Number of threads: 2
+ * Writing... 1000
+ * Writing =5127700
+ * Reading...
+ * writing time=5127700 , reading time=705500
+ * memory utilised=7937344
+ * Number of threads: 2
+ * Number of threads: 2
+ * Writing... 10000
+ * Writing =48499300
+ * Reading...
+ * writing time=48499300 , reading time=7948100
+ * memory utilised=25168168
+ * Number of threads: 2
+ * Number of threads: 2
+ * Writing... 100000
+ * Writing =604777900
+ * Reading...
+ * writing time=604777900 , reading time=152224100
+ * memory utilised=106527624
+ * Number of threads: 2
+ * Number of threads: 2
+ * Writing... 1000000
+ * Writing =5834791600
+ * Reading...
+ * writing time=5834791600 , reading time=1988543400
+ * memory utilised=265655992
+ * Number of threads: 2
+ * Number of threads: 2
+ * Writing... 1000000
+ * Reading...
+ * writing time=5660492600 , reading time=19979041500
+ * memory utilised=321943400
+ * Number of threads: 2
+ */
+
+
+/**
+ * Important
+ * benchmarkWithRandomKVBytes(DBProvider.get(DB.LEVELDB_NATIVE),1000000, 50, 500);
+ * writing time=93.0521691 , reading time=16.5716184
+ * memory utilised=708003848
+ * Number of threads: 4
+ *
+ * benchmarkWithRandomKVBytes(DBProvider.get(DB.LEVELDB),1000000, 50, 500);
+ * memory utilised=676719848
+ * writing time=75.4093294 , reading time=22.438489
+ * memory utilised=676719848
+ * Number of threads: 2
+ *
+ * benchmarkWithRandomKVBytes(DBProvider.get(DB.FIREFLYDB),1000000, 50, 500);
+ * writing time=14.2588117 , reading time=32.790273
+ * memory utilised=496074880
+ * Number of threads: 2
+ *
+ * ATOMDB
+ * writing time=17.8601132 , reading time=22.0283714
+ * memory utilised=889973640
+ * Number of threads: 7
+ * numberOfFilesRequiredToSearch=1 numberOfTimesThisHappened=985518
+ * numberOfFilesRequiredToSearch=2 numberOfTimesThisHappened=9936
+ *
+ writing time=25.6880718 , reading time=6.0737237
+ memory utilised=634179168
+ Number of threads: 10
+ numberOfFilesRequiredToSearch=1 numberOfTimesThisHappened=966622
+ numberOfFilesRequiredToSearch=2 numberOfTimesThisHappened=28525
+ numberOfFilesRequiredToSearch=3 numberOfTimesThisHappened=303
+ numberOfFilesRequiredToSearch=4 numberOfTimesThisHappened=4
+
+ writing time=22.1107311 , reading time=6.3042568
+ memory utilised=311031168
+ Number of threads: 8
+ numberOfFilesRequiredToSearch=1 numberOfTimesThisHappened=975473
+ numberOfFilesRequiredToSearch=2 numberOfTimesThisHappened=19890
+ numberOfFilesRequiredToSearch=3 numberOfTimesThisHappened=91
+
+ writing time=16.721739 , reading time=6.9919566 (last)
+ memory utilised=777919944
+ Number of threads: 9
+ numberOfFilesRequiredToSearch=1 numberOfTimesThisHappened=956064
+ numberOfFilesRequiredToSearch=2 numberOfTimesThisHappened=38839
+ numberOfFilesRequiredToSearch=3 numberOfTimesThisHappened=543
+ numberOfFilesRequiredToSearch=4 numberOfTimesThisHappened=8
+
+ writing time=19.1802351 , reading time=6.2431539 (Executors service)
+ memory utilised=182085760
+ Number of threads: 9
+ numberOfFilesRequiredToSearch=1 numberOfTimesThisHappened=975473
+ numberOfFilesRequiredToSearch=2 numberOfTimesThisHappened=19890
+ numberOfFilesRequiredToSearch=3 numberOfTimesThisHappened=91
+
+ writing time=19.640476 , reading time=5.8344966(Inline completableFuture)
+ memory utilised=631710240
+ Number of threads: 9
+ numberOfFilesRequiredToSearch=1 numberOfTimesThisHappened=975473
+ numberOfFilesRequiredToSearch=2 numberOfTimesThisHappened=19890
+ numberOfFilesRequiredToSearch=3 numberOfTimesThisHappened=91
+
+ writing time=17.7529537 , reading time=6.2738548
+ memory utilised=586745584
+ Number of threads: 7
+ numberOfFilesRequiredToSearch=1 numberOfTimesThisHappened=956467
+ numberOfFilesRequiredToSearch=2 numberOfTimesThisHappened=38370
+ numberOfFilesRequiredToSearch=3 numberOfTimesThisHappened=608
+ numberOfFilesRequiredToSearch=4 numberOfTimesThisHappened=8
+ numberOfFilesRequiredToSearch=5 numberOfTimesThisHappened=1
+ */
\ No newline at end of file
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/Checksum/AtomChecksum.java b/AtomDB/src/main/java/org/g2n/atomdb/Checksum/AtomChecksum.java
new file mode 100644
index 0000000..8a722b6
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Checksum/AtomChecksum.java
@@ -0,0 +1,8 @@
+package org.g2n.atomdb.Checksum;
+
+public interface AtomChecksum {
+
+ long compute(byte[] arr);
+
+ long compute(byte[] key, byte[] value);
+}
diff --git a/AtomDB/src/main/java/Checksum/CheckSum.java b/AtomDB/src/main/java/org/g2n/atomdb/Checksum/CheckSumStatic.java
similarity index 88%
rename from AtomDB/src/main/java/Checksum/CheckSum.java
rename to AtomDB/src/main/java/org/g2n/atomdb/Checksum/CheckSumStatic.java
index fe01025..d12ef21 100644
--- a/AtomDB/src/main/java/Checksum/CheckSum.java
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Checksum/CheckSumStatic.java
@@ -1,13 +1,13 @@
-package Checksum;
+package org.g2n.atomdb.Checksum;
import org.xerial.snappy.PureJavaCrc32C;
import java.time.Instant;
-import static util.BytesConverter.bytes;
+import static org.g2n.atomdb.util.BytesConverter.bytes;
// todo synchronize
-public class CheckSum {
+public class CheckSumStatic {
private static PureJavaCrc32C pureJavaCrc32C = new PureJavaCrc32C();
// private static Object obj = new Object();
@@ -28,6 +28,7 @@ public static long logBlock(Instant time, byte[] operation,
pureJavaCrc32C.reset();
//Instant time storage
+ //todo directly take bytes instead of instant object
pureJavaCrc32C.update(bytes(time.getEpochSecond()));
pureJavaCrc32C.update(time.getNano());
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/Checksum/Crc32cChecksum.java b/AtomDB/src/main/java/org/g2n/atomdb/Checksum/Crc32cChecksum.java
new file mode 100644
index 0000000..c613bf3
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Checksum/Crc32cChecksum.java
@@ -0,0 +1,46 @@
+package org.g2n.atomdb.Checksum;
+
+import java.nio.ByteBuffer;
+import java.util.zip.CRC32C;
+import java.util.zip.Checksum;
+
+
+public class Crc32cChecksum implements AtomChecksum {
+ private static final ThreadLocal crc32cThreadLocal = ThreadLocal.withInitial(CRC32C::new);
+ private static final int BUFFER_CAPACITY = 1024;
+
+ private static final ThreadLocal byteBufferThreadLocal = ThreadLocal.withInitial(() ->
+ ByteBuffer.allocateDirect(BUFFER_CAPACITY)
+ );
+
+
+ @Override
+ public long compute(byte[] arr) {
+ var crc32c = crc32cThreadLocal.get();
+ ByteBuffer buffer = prepareBuffer(arr);
+ crc32c.reset();
+ crc32c.update(buffer);
+ return crc32c.getValue();
+ }
+
+ @Override
+ public long compute(byte[] key, byte[] value) {
+ var crc32c = crc32cThreadLocal.get();
+ crc32c.reset();
+ crc32c.update(prepareBuffer(key));
+ crc32c.update(prepareBuffer(value));
+ return crc32c.getValue();
+ }
+
+ private ByteBuffer prepareBuffer(byte[] arr) {
+ ByteBuffer buffer = byteBufferThreadLocal.get();
+ buffer.clear();
+ if (arr.length > buffer.capacity()) {
+ buffer = ByteBuffer.allocateDirect(arr.length);
+ byteBufferThreadLocal.set(buffer);
+ }
+ buffer.put(arr);
+ buffer.flip();
+ return buffer;
+ }
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/Compaction/Compactor.java b/AtomDB/src/main/java/org/g2n/atomdb/Compaction/Compactor.java
new file mode 100644
index 0000000..add7337
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Compaction/Compactor.java
@@ -0,0 +1,327 @@
+package org.g2n.atomdb.Compaction;
+
+import org.g2n.atomdb.Level.Level;
+import org.g2n.atomdb.Mem.ImmutableMem;
+import org.g2n.atomdb.Table.Table;
+import org.g2n.atomdb.Table.SSTInfo;
+import org.g2n.atomdb.db.DBComparator;
+import org.g2n.atomdb.db.DbOptions;
+import org.g2n.atomdb.db.KVUnit;
+import org.g2n.atomdb.sstIo.SSTKeyRange;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.*;
+import java.util.concurrent.*;
+
+/**
+ * 3. choose the one which has many deleted entries. ( we have count the number of deleted entries in org.g2n.atomdb.sst and store in the header.)
+ * 4. we can have a hit count for org.g2n.atomdb.sst, which can tell us how optimized the org.g2n.atomdb.sst is. if more success hits then we might not consider
+ * the fileToWrite for compaction and choose the one with less hit success. (hit success is finding and getting data)
+ */
+public class Compactor implements AutoCloseable {
+ private final Table table;
+ private final DbOptions dbOptions;
+ private final SSTPersist sstPersist;
+ private final Set ongoingCompactions = ConcurrentHashMap.newKeySet();
+ private final ExecutorService executors = Executors.newCachedThreadPool();
+ private static final Logger logger = LoggerFactory.getLogger(Compactor.class.getName());
+
+ public Compactor(Table table, DbOptions dbOptions) {
+ this.table = table;
+ this.dbOptions = dbOptions;
+ this.sstPersist = new SSTPersist(table);
+ }
+
+ public void persistLevel0(ImmutableMem memtable) throws IOException {
+ sstPersist.writeSingleFile(Level.LEVEL_ZERO, memtable.getNumberOfEntries(), memtable.getKeySetIterator());
+ }
+
+ /**
+ *
+ * TODO:
+ * Compactions needs more thought
+ * we have to make sure that we always compact the old files and those files which doesn't overlap with the same level file.
+ *
+ * consider that level 1 has 2 files named A and B. where B is the new file and A is the old file.
+ * B has 23 and 29 number in it.
+ * A has 23 number in it.
+ * consider that we choose the files having 29 to compact, and we search for files having 29 and compact to Level 2.
+ * Now the issue starts, when we try to find number 23 we first look into the Level 1 and since we have file named A containing 23 in it we return that.
+ * But turns out the newest value for 23 was in B which is now compacted to Level 2
+ *
+ * So we have to make sure that all the olds files are compacted first and if we can't compact the old files, then we should choose the files which are not overlapping
+ * with the old files.
+ */
+
+ public synchronized void tryCompaction(Level level) {
+ if (shouldSkipCompaction(level)) {
+ return;
+ }
+ Collection overlapping = getOverlappingSSTs(level);
+
+ if (overlapping.size() <= 1) {
+ System.out.println("WOW".repeat(100));
+ return;
+ }
+ CompletableFuture.supplyAsync(() -> performCompaction(level, overlapping), executors).thenApply(returnedLevel -> {
+ if (Level.LEVEL_SEVEN != returnedLevel) {
+ tryCompaction(returnedLevel.nextLevel());
+ }
+ return CompletableFuture.completedFuture(null);
+ });
+ }
+
+ private Collection alternative3(Level level) {
+ System.out.println("Alternative 2");
+ SortedSet currentLevelSSTSet = table.getSSTInfoSet(level);
+ SortedSet nextLevelSSTSet = table.getSSTInfoSet(level.nextLevel());
+
+ var set = new TreeSet();
+ for (SSTInfo sst1: currentLevelSSTSet) {
+ for (SSTInfo sst2: currentLevelSSTSet) {
+ if (sst1.equals(sst2)) continue;
+ int overlap = calculateOverlap(sst1.getSstKeyRange(), sst2.getSstKeyRange());
+ if (overlap > 0) {
+ set.add(sst1);
+ set.add(sst2);
+ }
+ }
+ }
+ var finalSet = new TreeSet();
+ for (SSTInfo currLevelSST : set) {
+ for (SSTInfo nextLevelSST : nextLevelSSTSet) {
+ int overlap = calculateMaxNumOfKeysOverlap(currLevelSST.getSstKeyRange(), nextLevelSST.getSstKeyRange());
+ if (overlap > 0) {
+ finalSet.add(nextLevelSST);
+ }
+ }
+ }
+ set.addAll(finalSet);
+ System.out.println("number of selected files=" + set.size());
+ return set;
+ }
+
+ private Collection alternative2(Level level) {
+ System.out.println("Alternative 2");
+ SortedSet currentLevelSSTSet = table.getSSTInfoSet(level);
+ SortedSet nextLevelSSTSet = table.getSSTInfoSet(level.nextLevel());
+
+
+ for (SSTInfo currLevelSST : currentLevelSSTSet.reversed()) {
+ var map = new HashMap();
+ for (SSTInfo nextLevelSST : nextLevelSSTSet) {
+ int overlap = calculateMaxNumOfKeysOverlap(currLevelSST.getSstKeyRange(), nextLevelSST.getSstKeyRange());
+ if (overlap > 0) {
+// finalSet.add(nextLevelSST);
+ map.put(nextLevelSST, overlap);
+ }
+ }
+ if (!map.isEmpty()) {
+ int avg = map.values().stream().mapToInt(each -> each).sum() / map.size();
+ var myset = new TreeSet();
+ myset.add(currLevelSST);
+ map.entrySet().stream().filter(each -> each.getValue() >= avg).forEach(each -> myset.add(each.getKey()));
+ return myset;
+ }
+ }
+
+ var finalSet = new TreeSet();
+ for (SSTInfo currLevelSST : currentLevelSSTSet) {
+ for (SSTInfo nextLevelSST : currentLevelSSTSet) {
+ if (currLevelSST.equals(nextLevelSST)) continue;
+ int overlap = calculateMaxNumOfKeysOverlap(currLevelSST.getSstKeyRange(), nextLevelSST.getSstKeyRange());
+ if (overlap > 0) {
+ finalSet.add(nextLevelSST);
+ }
+ }
+ }
+
+ return finalSet;
+ }
+
+ private int calculateMaxNumOfKeysOverlap(SSTKeyRange r1, SSTKeyRange r2) {
+ Comparator comparator = DBComparator.byteArrayComparator;
+ if (comparator.compare(r1.end(), r2.start()) <= 0 || comparator.compare(r2.end(), r1.start()) <= 0) {
+ return 0; // No overlap
+ }
+
+ byte[] left = (comparator.compare(r1.start(), r2.start()) > 0) ? r1.start() : r2.start();
+ byte[] right = (comparator.compare(r1.end(), r2.end()) < 0) ? r1.end() : r2.end();
+
+ int keysOverlap = 0;
+ int minLength = Math.min(left.length, right.length);
+ for (int i = 0; i < minLength; i++) {
+ int result = left[i] - right[i];
+ if (result != 0) {
+ keysOverlap += 128 + ( -1 * Math.min(left[i], right[i]));
+ break;
+ }
+ }
+ if (minLength < left.length) {
+ keysOverlap += (minLength - left.length) * 256;
+ }
+ if (minLength < right.length) {
+ keysOverlap += (minLength - right.length) * 256;
+ }
+ return keysOverlap;
+ }
+
+
+ private Collection getOverlappingSSTsAlternative(Level level) {
+ System.out.println("Finding overlapping files for level" + level);
+ SortedSet currentLevelSSTSet = table.getSSTInfoSet(level);
+ SortedSet nextLevelSSTSet = table.getSSTInfoSet(level.nextLevel());
+
+ var map = new HashMap, Integer>();
+ var sum = 0;
+ var count = 0;
+ for (SSTInfo currLevelSST : currentLevelSSTSet) {
+ for (SSTInfo nextLevelSST : nextLevelSSTSet) {
+ int overlap = calculateOverlap(currLevelSST.getSstKeyRange(), nextLevelSST.getSstKeyRange());
+ if (overlap > 0) {
+ map.put(Map.entry(currLevelSST, nextLevelSST), overlap);
+ sum = sum + overlap;
+ count++;
+ }
+ }
+ }
+
+ for (SSTInfo sst1: currentLevelSSTSet) {
+ for (SSTInfo sst2: currentLevelSSTSet) {
+ if (sst1.equals(sst2)) continue;
+ int overlap = calculateOverlap(sst1.getSstKeyRange(), sst2.getSstKeyRange());
+ if (overlap < 0) System.exit(123123);
+ if (overlap > 0) {
+ map.put(Map.entry(sst1, sst2), overlap);
+ sum = sum + overlap;
+ count++;
+ }
+ }
+ }
+
+
+ var avg = sum / count;
+ var set = new TreeSet();
+ for (Map.Entry, Integer> entryIntegerEntry : map.entrySet()) {
+ if (entryIntegerEntry.getValue() >= avg) {
+ set.add(entryIntegerEntry.getKey().getKey());
+ set.add(entryIntegerEntry.getKey().getValue());
+ }
+ }
+
+ return set;
+ }
+
+ private static int calculateOverlap(SSTKeyRange r1, SSTKeyRange r2) {
+ // Check if ranges overlap
+ Comparator comparator = DBComparator.byteArrayComparator;
+ if (comparator.compare(r1.end(), r2.start()) <= 0 || comparator.compare(r2.end(), r1.start()) <= 0) {
+ return 0; // No overlap
+ }
+
+ // Calculate overlap
+ byte[] overlapStart = (comparator.compare(r1.start(), r2.start()) > 0) ? r1.start() : r2.start();
+ byte[] overlapEnd = (comparator.compare(r1.end(), r2.end()) < 0) ? r1.end() : r2.end();
+
+ return Math.abs(comparator.compare(overlapEnd, overlapStart));
+ }
+
+ private Collection getOverlappingSSTs(Level level) {
+ SortedSet currentLevelSSTSet = table.getSSTInfoSet(level);
+ SortedSet nextLevelSSTSet = table.getSSTInfoSet(level.nextLevel());
+
+ byte[] lastCompactedKey = table.getLastCompactedKey(level);
+
+ Collection overlapping = (lastCompactedKey != null)
+ ? findOverlapping(lastCompactedKey, currentLevelSSTSet, nextLevelSSTSet)
+ : Collections.emptyList();
+
+ if (overlapping.size() > 1) {
+ System.out.println("we took A, size="+overlapping.size());
+ return overlapping;
+ }
+ Collection overlapsByBoundaryKeys = findOverlapsByBoundaryKeys(currentLevelSSTSet, nextLevelSSTSet);
+ System.out.println("we took B, size="+overlapsByBoundaryKeys.size());
+ return overlapsByBoundaryKeys;
+ }
+
+ private Collection findOverlapsByBoundaryKeys(SortedSet currentLevelSet, SortedSet nextLevelSet) {
+ for (Iterator it = ((TreeSet) currentLevelSet).descendingIterator(); it.hasNext(); ) {
+ SSTInfo sstInfo = it.next();
+ byte[] greatestKey = sstInfo.getSstKeyRange().getGreatest();
+ byte[] smallestKey = sstInfo.getSstKeyRange().getSmallest();
+
+ var overlapping = findOverlapping(greatestKey, currentLevelSet, nextLevelSet);
+ if (overlapping.size() > 1) {
+ return overlapping;
+ }
+
+ overlapping = findOverlapping(smallestKey, currentLevelSet, nextLevelSet);
+ if (overlapping.size() > 1) {
+ return overlapping;
+ }
+ }
+ return Collections.emptyList();
+ }
+
+ private Collection findOverlapping(byte[] common, SortedSet currentLevelSet, SortedSet nextLevelSet) {
+ if (common == null) {
+ return Collections.emptyList();
+ }
+ Collection overlappingFiles = new ArrayList<>();
+ addFilesContainingKey(overlappingFiles, common, nextLevelSet);
+ addFilesContainingKey(overlappingFiles, common, currentLevelSet);
+ return overlappingFiles;
+ }
+
+ private void addFilesContainingKey(Collection collection, byte[] key, SortedSet levelFileSet) {
+ levelFileSet.stream()
+ .filter(sstInfo -> sstInfo.getSstKeyRange().inRange(key))
+ .forEach(collection::add);
+ }
+
+ private boolean shouldSkipCompaction(Level level) {
+ int size = table.getCurrentLevelSize(level);
+ return size <= level.limitingSize() ||
+ ongoingCompactions.contains(level) ||
+ ongoingCompactions.contains(level.nextLevel());
+ }
+
+ private Level performCompaction(Level level, Collection overlappingFiles) {
+ ongoingCompactions.add(level);
+ long start = System.nanoTime();
+ System.out.println(level + " org.g2n.atomdb.Compaction Started " + Thread.currentThread().getName());
+ try {
+ var iterator = new MergedClusterIterator(Collections.unmodifiableCollection(overlappingFiles));
+ sstPersist.writeManyFiles(level.nextLevel(), iterator, getAverageNumOfEntriesInSST(overlappingFiles));
+ overlappingFiles.forEach(table::removeSST);
+ } catch (Exception e) {
+ logger.error("Error during compaction for level {}: {}", level, e.getMessage());
+ e.printStackTrace();
+ System.exit(123);
+// throw new RuntimeException(e);
+// return level;
+ } finally {
+ ongoingCompactions.remove(level);
+ }
+ System.out.println(level + " org.g2n.atomdb.Compaction Ended " + Thread.currentThread().getName() + " took=" + (System.nanoTime() - start) / 1_000_000_000.0 + " Seconds");
+ return level;
+ }
+
+ private int getAverageNumOfEntriesInSST(Collection overlappingFiles) {
+ return (int) overlappingFiles.stream()
+ .map(SSTInfo::getNumberOfEntries)
+ .mapToInt(Integer::intValue)
+ .average()
+ .orElseThrow(() -> new IllegalStateException("Unexpected error during average calculation."));
+ }
+
+
+ @Override
+ public void close() throws Exception {
+ executors.close();
+ executors.shutdown();
+ }
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/Compaction/IndexedCluster.java b/AtomDB/src/main/java/org/g2n/atomdb/Compaction/IndexedCluster.java
new file mode 100644
index 0000000..9f18e09
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Compaction/IndexedCluster.java
@@ -0,0 +1,235 @@
+package org.g2n.atomdb.Compaction;
+
+import org.g2n.atomdb.Compression.DataCompressionStrategy;
+import org.g2n.atomdb.Compression.Lz4Compression;
+import org.g2n.atomdb.db.DBComparator;
+import org.g2n.atomdb.db.ExpandingByteBuffer;
+import org.g2n.atomdb.db.KVUnit;
+import org.g2n.atomdb.sstIo.MMappedReader;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Queue;
+import java.util.zip.CRC32C;
+
+public class IndexedCluster {
+ private static final int NOT_CALCULATED_YET = -1;
+ private final int MAX_NUM_OF_ENTRIES_IN_CLUSTER;
+ private final CRC32C checksum;
+ private final DataCompressionStrategy compression;
+ private int totatKVSize = 0;
+ private int commonPrefix;
+ private final List entries;
+ private static final long DUMMY_CHECKSUM = Long.MIN_VALUE;
+ public static final int DUMMY_LOCATION = Integer.MIN_VALUE;
+
+ public IndexedCluster(int clusterSize) {
+ this.MAX_NUM_OF_ENTRIES_IN_CLUSTER = clusterSize;
+ this.entries = new ArrayList<>(clusterSize);
+ this.commonPrefix = NOT_CALCULATED_YET;
+ this.checksum = new CRC32C(); // todo using naked crc32c
+ this.compression = Lz4Compression.getInstance();
+ }
+
+ public void add(KVUnit kv) {
+ if (entries.size() >= MAX_NUM_OF_ENTRIES_IN_CLUSTER) {
+ throw new IllegalStateException("IndexedCluster is full");
+ }
+
+ if (!entries.isEmpty() && DBComparator.byteArrayComparator.compare(kv.getKey(), entries.getLast().getKey()) <= 0) {
+ throw new IllegalArgumentException("Got a key that is not greater than the last key");
+ }
+
+ entries.add(kv);
+ totatKVSize += kv.getUnitSize();
+ calculateCommonPrefix(kv.getKey());
+ }
+
+ public List getEntries() {
+ return Collections.unmodifiableList(entries);
+ }
+
+ private void calculateCommonPrefix(byte[] key) {
+ if (commonPrefix == NOT_CALCULATED_YET) {
+ commonPrefix = key.length;
+ } else {
+ int newPrefixLength = 0;
+ var first = entries.getFirst().getKey();
+ int minLength = Math.min(commonPrefix, key.length);
+ for (int i = 0; i < minLength; i++) {
+ if (first[i] != key[i]) {
+ break; // Stop when a difference is found
+ }
+ newPrefixLength++;
+ }
+ commonPrefix = newPrefixLength;
+ }
+ }
+
+ public byte[] getFirstKey() {
+ return entries.getFirst().getKey();
+ }
+
+ public byte[] getLastKey() {
+ return entries.getLast().getKey();
+ }
+
+ // 10 long checksum + 11 integer locations + 1 integer commonPrefix + 10 clusters (kvs)
+ // n long checksum + n + 1 integer locations + 1 integer commonPrefix + n clusters (kvs)
+ public void storeAsBytes(ExpandingByteBuffer writer) throws IOException {
+ if (entries.isEmpty()) {
+ throw new RuntimeException("Indexed Cluster entries are empty");
+ }
+ List checksums = new ArrayList<>(MAX_NUM_OF_ENTRIES_IN_CLUSTER);
+ List kvs = new ArrayList<>(MAX_NUM_OF_ENTRIES_IN_CLUSTER);
+ List locations = new ArrayList<>(MAX_NUM_OF_ENTRIES_IN_CLUSTER);
+
+ int position = 0;
+ for (KVUnit entry : entries) {
+ checksums.add(getChecksum(entry.getKey()));
+ locations.add(position);
+
+ var block = createBlock(entry);
+ byte[] compressed = compression.compress(block.array());
+
+ kvs.add(compressed);
+
+ position += compressed.length;
+ }
+ locations.add(position); // will be used to get the last block data.
+
+ if (checksums.size() != MAX_NUM_OF_ENTRIES_IN_CLUSTER) {
+ fillDummyData(checksums, locations);
+ }
+ checksums.forEach(writer::putLong);
+ locations.forEach(writer::putInt);
+ writer.putInt(commonPrefix);
+ kvs.forEach(writer::put);
+ }
+
+ private ByteBuffer createBlock(KVUnit entry) {
+ byte[] key = entry.getKey();
+ byte[] value = entry.getValue();
+ byte isDelete = entry.getDeletedStatus().value();
+
+ int requiredSize = getRequiredSize(entry);
+ ByteBuffer buffer = ByteBuffer.allocate(requiredSize);
+
+ checksum.reset();
+ checksum.update(key, commonPrefix, key.length - commonPrefix);
+ checksum.update(isDelete);
+ buffer.putInt(key.length - commonPrefix)
+ .put(key, commonPrefix, key.length - commonPrefix)
+ .put(isDelete);
+
+ if (!entry.isDeleted()) {
+ buffer.putInt(value.length)
+ .put(value);
+ checksum.update(value);
+ }
+
+ buffer.putLong(checksum.getValue());
+
+ if (buffer.remaining() != 0) {
+ throw new RuntimeException("Math gone wrong");
+ }
+
+ buffer.flip();
+ return buffer;
+ }
+
+ private void fillDummyData(List checksums, List locations) {
+ for (int i = checksums.size(); i < MAX_NUM_OF_ENTRIES_IN_CLUSTER; i++) {
+ checksums.add(DUMMY_CHECKSUM);
+ locations.add(DUMMY_LOCATION);
+ }
+ }
+
+ private long getChecksum(byte[] key) {
+ checksum.reset();
+ checksum.update(key);
+ return checksum.getValue();
+ }
+
+ private int getRequiredSize(KVUnit unit) {
+ byte[] key = unit.getKey();
+ byte[] value = unit.getValue();
+ return Integer.BYTES + key.length + Byte.BYTES
+ + (!unit.isDeleted() ? Integer.BYTES + value.length : 0)
+ + Long.BYTES // checksum
+ - commonPrefix;
+ }
+
+ public int getTotalSize() {
+ return totatKVSize;
+ }
+
+ public int getNumberOfEntries() {
+ return entries.size();
+ }
+
+ public static void fillQueue(MMappedReader reader, Pointer pointer, byte numberOfKeysInSingleCluster, Queue queue) throws IOException {
+ reader.position((int) (pointer.position() + Long.BYTES * numberOfKeysInSingleCluster)); // skip checksums
+ List locations = getLocationList(getBytes(reader, Integer.BYTES * (numberOfKeysInSingleCluster + 1)), numberOfKeysInSingleCluster);
+ int commonPrefix = reader.getInt();
+ ByteBuffer bytes = getBytes(reader, getTotalSizeToReadForKVs(locations));
+ DataCompressionStrategy decompressor = Lz4Compression.getInstance();
+
+ for (int i = 0; bytes.hasRemaining(); i++) {
+ var block = decompressBlock(locations, i, bytes, decompressor);
+ queue.add(parseKV(pointer, block, commonPrefix));
+ }
+ }
+
+ private static KVUnit parseKV(Pointer pointer, ByteBuffer block, int commonPrefix) {
+ int keyLength = block.getInt();
+
+ byte[] key = new byte[keyLength + commonPrefix];
+ System.arraycopy(pointer.key(), 0, key, 0, commonPrefix);
+ block.get(key, commonPrefix, keyLength);
+
+ byte isDeleted = block.get();
+
+ if (KVUnit.DeletionStatus.DELETED == KVUnit.DeletionStatus.of(isDeleted)) {
+ return new KVUnit(key);
+ } else {
+ int valueLength = block.getInt();
+ byte[] value = new byte[valueLength];
+ block.get(value);
+ return new KVUnit(key, value);
+ }
+ }
+
+ private static ByteBuffer decompressBlock(List locations, int index, ByteBuffer bytes, DataCompressionStrategy decompressor) throws IOException {
+ byte[] block = new byte[locations.get(index + 1) - locations.get(index)];
+ bytes.get(block);
+ byte[] decompress = decompressor.decompress(block);
+ return ByteBuffer.wrap(decompress);
+ }
+
+ private static int getTotalSizeToReadForKVs(List locations) {
+ for (int i = 0; i < locations.size(); i++) {
+ if (locations.get(i) == DUMMY_LOCATION) {
+ return locations.get(i - 1);
+ }
+ }
+ return locations.getLast();
+ }
+
+ private static ByteBuffer getBytes(MMappedReader reader, int size) throws IOException {
+ byte[] bytes = new byte[size];
+ reader.read(bytes);
+ return ByteBuffer.wrap(bytes);
+ }
+
+ private static List getLocationList(ByteBuffer wrap, int sizeOfCluster) {
+ List locations = new ArrayList<>();
+ for (int i = 0; i < sizeOfCluster + 1; i++) {
+ locations.add(wrap.getInt());
+ }
+ return locations;
+ }
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/Compaction/IndexedClusterIterator.java b/AtomDB/src/main/java/org/g2n/atomdb/Compaction/IndexedClusterIterator.java
new file mode 100644
index 0000000..67e4985
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Compaction/IndexedClusterIterator.java
@@ -0,0 +1,100 @@
+package org.g2n.atomdb.Compaction;
+
+import org.g2n.atomdb.Constants.DBConstant;
+import org.g2n.atomdb.Table.SSTInfo;
+import org.g2n.atomdb.db.KVUnit;
+import org.g2n.atomdb.sstIo.MMappedReader;
+
+import java.io.IOException;
+import java.util.ArrayDeque;
+import java.util.Objects;
+
+class IndexedClusterIterator implements AutoCloseable {
+ private final MMappedReader reader;
+ private final int clusterEndPoint;
+ private final SSTInfo sstInfo;
+ private final byte numberOfKeysInSingleCluster;
+ private final ArrayDeque queue;
+ private int retrievedClusterCount = 0;
+
+ public IndexedClusterIterator(SSTInfo sstInfo) throws IOException {
+ this.sstInfo = Objects.requireNonNull(sstInfo, "SSTInfo cannot be null");
+ this.reader = new MMappedReader(sstInfo.getSst());
+ this.clusterEndPoint = (int) Math.abs(sstInfo.getPointers().get(sstInfo.getPointers().size() - 1).position());
+ this.numberOfKeysInSingleCluster = sstInfo.getNumberOfKeysInSingleCluster();
+ this.queue = new ArrayDeque<>(DBConstant.CLUSTER_SIZE);
+ }
+
+ public boolean hasNext() {
+ return !queue.isEmpty() || !isFileLimitReached();
+ }
+
+ private boolean isFileLimitReached() {
+ // todo which one comes the first, we can optimize this
+ return reader.position() == clusterEndPoint || retrievedClusterCount == sstInfo.getPointers().size();
+ }
+
+ public byte[] nextClusterSmallestKey() {
+ ensureNotAtEnd("No more elements IndexedClusterIterator");
+ return sstInfo.getPointers().get(retrievedClusterCount).key();
+ }
+
+ public KVUnit getNextKVUnit() {
+ ensureHasNext("Attempt to fetch KVUnit without available clusters");
+ if (queue.isEmpty()) {
+ loadNextClusterToQueue();
+ }
+ return queue.getFirst();
+ }
+
+ private void loadNextClusterToQueue() {
+ ensureNotAtEnd("Cannot load cluster, end of file reached");
+ try {
+ IndexedCluster.fillQueue(reader, sstInfo.getPointers().get(retrievedClusterCount++), numberOfKeysInSingleCluster, queue);
+ } catch (IOException e) {
+ throw new IllegalStateException("Error while reading the next cluster", e);
+ }
+ }
+
+ public KVUnit pollNextKVUnit() {
+ if (queue.isEmpty()) {
+ loadNextClusterToQueue();
+ }
+ return queue.pollFirst();
+ }
+
+ private void ensureHasNext(String errorMessage) {
+ if (!hasNext()) {
+ throw new IllegalStateException(errorMessage);
+ }
+ }
+
+ private void ensureNotAtEnd(String errorMessage) {
+ if (isFileLimitReached()) {
+ throw new IllegalStateException(errorMessage);
+ }
+ }
+
+ public SSTInfo getSSTInfo() {
+ return sstInfo;
+ }
+
+ private void validateStateBeforeClose() {
+ if (reader.position() != clusterEndPoint) {
+ throw new IllegalStateException("File not read completely");
+ }
+ if (!queue.isEmpty()) {
+ throw new IllegalStateException("Queue not empty, current size: " + queue.size());
+ }
+ }
+
+ @Override
+ public void close() {
+ validateStateBeforeClose();
+ try {
+ reader.close();
+ } catch (IOException e) {
+ throw new IllegalStateException("Error closing reader", e);
+ }
+ }
+}
\ No newline at end of file
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/Compaction/MergedClusterIterator.java b/AtomDB/src/main/java/org/g2n/atomdb/Compaction/MergedClusterIterator.java
new file mode 100644
index 0000000..750697a
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Compaction/MergedClusterIterator.java
@@ -0,0 +1,109 @@
+package org.g2n.atomdb.Compaction;
+
+import org.g2n.atomdb.Table.SSTInfo;
+import org.g2n.atomdb.db.DBComparator;
+import org.g2n.atomdb.db.KVUnit;
+
+import java.io.IOException;
+import java.util.*;
+
+public class MergedClusterIterator implements Iterator, AutoCloseable {
+ private final List clusterIterators;
+ private final int totalEntryCount;
+ private int entriesServed = 0;
+
+ public MergedClusterIterator(Collection sstInfoCollection) throws IOException {
+ Objects.requireNonNull(sstInfoCollection, "SSTInfo collection cannot be null");
+ this.totalEntryCount = sstInfoCollection.stream().mapToInt(SSTInfo::getNumberOfEntries).sum();
+ this.clusterIterators = initializeIterators(sstInfoCollection);
+ }
+
+ private List initializeIterators(Collection sstInfoCollection) throws IOException {
+ List iterators = new ArrayList<>(sstInfoCollection.size());
+ for (SSTInfo sstInfo : sstInfoCollection) {
+ iterators.add(new IndexedClusterIterator(sstInfo));
+ }
+ iterators.sort((a, b) -> {
+ try {
+ return DBComparator.byteArrayComparator.compare(a.nextClusterSmallestKey(), b.nextClusterSmallestKey());
+ } catch (Exception e) {
+ throw new IllegalStateException("Error comparing cluster smallest keys", e);
+ }
+ });
+ return iterators;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return !clusterIterators.isEmpty();
+ }
+
+ @Override
+ public KVUnit next() {
+ if (!hasNext()) {
+ throw new NoSuchElementException("No more KV units available");
+ }
+ return fetchNextKVUnit();
+ }
+
+
+ private KVUnit fetchNextKVUnit() {
+ KVUnit unit = clusterIterators.getFirst().getNextKVUnit();
+ IndexedClusterIterator curr = clusterIterators.getFirst();
+ var toRemove = new ArrayList();
+
+ for (IndexedClusterIterator iterator : clusterIterators) {
+ if (!iterator.hasNext()) {
+ iterator.close();
+ toRemove.add(iterator);
+ continue;
+ }
+
+ if (iterator.equals(curr)) {
+ continue;
+ }
+
+ int compare = DBComparator.byteArrayComparator.compare(iterator.getNextKVUnit().getKey(), unit.getKey());
+
+ if (compare <= -1) {
+ unit = iterator.getNextKVUnit();
+ curr = iterator;
+ }
+
+ if (compare == 0) {
+ if (iterator.getSSTInfo().compareTo(curr.getSSTInfo()) < 0) {
+ curr.pollNextKVUnit(); // old file value
+ unit = iterator.getNextKVUnit();
+ curr = iterator;
+ } else {
+ iterator.pollNextKVUnit(); // old file value
+ }
+ }
+ }
+ KVUnit kvUnit = curr.pollNextKVUnit();
+ removeExhaustedIterator(curr, toRemove);
+ entriesServed++;
+ return kvUnit;
+ }
+
+ private void removeExhaustedIterator(IndexedClusterIterator curr, ArrayList toRemove) {
+ if (!curr.hasNext()) {
+ curr.close();
+ toRemove.add(curr);
+ }
+ clusterIterators.removeAll(toRemove);
+ }
+
+
+ public double approximateRemainingEntries() {
+ return totalEntryCount - entriesServed;
+ }
+
+ @Override
+ public void close() {
+ for (IndexedClusterIterator iterator : clusterIterators) {
+ iterator.close();
+ }
+ clusterIterators.clear();
+ }
+}
\ No newline at end of file
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/Compaction/Pointer.java b/AtomDB/src/main/java/org/g2n/atomdb/Compaction/Pointer.java
new file mode 100644
index 0000000..f6b97a4
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Compaction/Pointer.java
@@ -0,0 +1,70 @@
+package org.g2n.atomdb.Compaction;
+
+import org.g2n.atomdb.db.DBComparator;
+import org.g2n.atomdb.db.ExpandingByteBuffer;
+import org.g2n.atomdb.sstIo.ChannelBackedWriter;
+import org.g2n.atomdb.sstIo.MMappedReader;
+
+import java.nio.MappedByteBuffer;
+import java.util.Arrays;
+
+// todo make the position int.
+public record Pointer(byte[] key, long position) implements Comparable {
+
+ public void storeAsBytes(ChannelBackedWriter writer) {
+ // todo can't we compress the keys ?
+ writer.putLong(position)
+ .putInt(key.length)
+ .putBytes(key);
+ }
+
+ public void storeAsBytes(ExpandingByteBuffer writer) {
+ // todo can't we compress the keys ?
+ writer.putLong(position)
+ .putInt(key.length)
+ .put(key);
+ }
+
+ public static Pointer getPointer(MMappedReader reader) {
+ long position = reader.getLong();
+ int size = reader.getInt();
+ var key = new byte[size];
+ reader.getBytes(key);
+ return new Pointer(key, position);
+ }
+
+ public static Pointer readBytesToObj(MappedByteBuffer buffer) {
+ long pos = buffer.getLong();
+ var key = new byte[buffer.getInt()];
+ buffer.get(key);
+ return new Pointer(key, pos);
+ }
+// @Override
+// public boolean equals(Object o) {
+// if (this == o) return true;
+// if (o == null || getClass() != o.getClass()) return false;
+// Pointer pointer = (Pointer) o;
+// return position == pointer.position && Arrays.equals(key, pointer.key);
+// }
+//
+// @Override
+// public int hashCode() {
+// int result = Objects.hash(position);
+// result = 31 * result + Arrays.hashCode(key);
+// return result;
+// }
+
+
+ @Override
+ public int compareTo(Pointer pointer) {
+ return DBComparator.byteArrayComparator.compare(this.key, pointer.key);
+ }
+
+ @Override
+ public String toString() {
+ return "Pointer{" +
+ "key=" + new String(key) +
+ ", position=" + position +
+ '}';
+ }
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/Compaction/PointerList.java b/AtomDB/src/main/java/org/g2n/atomdb/Compaction/PointerList.java
new file mode 100644
index 0000000..5a96aaa
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Compaction/PointerList.java
@@ -0,0 +1,70 @@
+package org.g2n.atomdb.Compaction;
+
+import com.google.common.base.Preconditions;
+import org.g2n.atomdb.db.ExpandingByteBuffer;
+import org.g2n.atomdb.sstIo.MMappedReader;
+import org.g2n.atomdb.sstIo.ChannelBackedWriter;
+
+import java.util.*;
+
+public class PointerList {
+ private final List pointers;
+ private Pointer previous;
+
+ public PointerList(int numberOfEntries) {
+ this.pointers = new ArrayList<>(numberOfEntries);
+ }
+
+ public PointerList(){
+ this.pointers = new ArrayList<>(5000);
+ }
+
+
+ public void add(Pointer pointer) {
+ if (previous !=null ){
+ Preconditions.checkArgument(previous.compareTo(pointer) <= 0);
+ }
+ previous = pointer;
+ pointers.add(pointer);
+ }
+
+ public Pointer get(int index) {
+ return pointers.get(index);
+ }
+
+ public Pointer getFirst() { return pointers.getFirst();}
+ public Pointer getLast() {return pointers.getLast();}
+
+ public void storeAsBytes(ChannelBackedWriter writer) {
+ pointers.forEach(each -> each.storeAsBytes(writer));
+ }
+
+ public void storeAsBytes(ExpandingByteBuffer writer) {
+ pointers.forEach(each -> each.storeAsBytes(writer));
+ }
+
+ public static PointerList getPointerList(MMappedReader reader, int entries) {
+ var pointers = new PointerList(entries);
+ for (int i = 0; i < entries; i++) {
+ pointers.add(Pointer.getPointer(reader));
+ }
+ return pointers;
+ }
+
+ public int size() {
+ return pointers.size();
+ }
+
+ @Override
+ public String toString() {
+ return "PointerList{" +
+ "firstPointer=" + pointers.getFirst().toString() +
+ "secondLast=" + pointers.get(pointers.size() - 2).toString() +
+ ", LastPointer=" + pointers.getLast().toString() +
+ '}';
+ }
+
+ public List getList() {
+ return Collections.unmodifiableList(pointers);
+ }
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/Compaction/SSTPersist.java b/AtomDB/src/main/java/org/g2n/atomdb/Compaction/SSTPersist.java
new file mode 100644
index 0000000..40d4556
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Compaction/SSTPersist.java
@@ -0,0 +1,121 @@
+package org.g2n.atomdb.Compaction;
+
+import org.g2n.atomdb.Constants.DBConstant;
+import org.g2n.atomdb.Level.Level;
+import org.g2n.atomdb.Table.SSTInfo;
+import org.g2n.atomdb.Table.Table;
+import com.google.common.hash.BloomFilter;
+import com.google.common.hash.Funnels;
+import org.g2n.atomdb.db.ExpandingByteBuffer;
+import org.g2n.atomdb.db.KVUnit;
+import org.g2n.atomdb.sstIo.SSTHeader;
+
+import java.io.File;
+import java.io.IOException;
+import java.lang.foreign.Arena;
+import java.nio.channels.FileChannel;
+import java.nio.file.StandardOpenOption;
+import java.util.Iterator;
+import java.util.function.BooleanSupplier;
+
+import static java.nio.channels.FileChannel.MapMode.READ_WRITE;
+
+/*
+*
+* TODO:
+* 1) we can actually keep record the numberOfEntries in sst for that level, which can improve the bloom filter.
+*
+* */
+public class SSTPersist {
+ private final Table table;
+ private final ThreadLocal bufferThreadLocal = ThreadLocal.withInitial(ExpandingByteBuffer::new);
+
+ public SSTPersist(Table table) {
+ this.table = table;
+ }
+
+ public void writeSingleFile(Level level, int maxEntries, Iterator iterator) throws IOException {
+ var sstInfo = writeOptimized1(table.getNewSST(level), level, maxEntries, iterator, () -> true, Integer.MAX_VALUE);
+ table.addSST(level, sstInfo);
+ }
+
+ public void save(File file) throws IOException {
+ var buffer = bufferThreadLocal.get();
+ buffer.flip();
+ try (
+ var fileChannel = FileChannel.open(file.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE);
+ var arena = Arena.ofConfined()
+ ) {
+ var fileSegment = fileChannel.map(READ_WRITE, 0, buffer.remaining(), arena);
+ fileSegment.asByteBuffer().put(buffer.getBuffer());
+ } finally {
+ buffer.clear();
+ }
+ }
+
+ private IndexedCluster getNextCluster(Iterator customIterator, BloomFilter filter) {
+ var cluster = new IndexedCluster(DBConstant.CLUSTER_SIZE);
+ for (int i = 0; i < DBConstant.CLUSTER_SIZE && customIterator.hasNext(); i++) {
+ KVUnit current = customIterator.next();
+ cluster.add(current);
+ filter.put(current.getKey());
+ }
+ return cluster;
+ }
+
+ public void writeManyFiles(Level level, MergedClusterIterator iterator, int avgNumberOfEntriesInSST) throws IOException {
+ SSTInfo sstInfo = null;
+
+ while (iterator.hasNext()) {
+ int finalAvgNumberOfEntriesInSST = avgNumberOfEntriesInSST;
+ BooleanSupplier piggyBackingPredicate = () -> finalAvgNumberOfEntriesInSST * 0.10 >= iterator.approximateRemainingEntries();
+
+ sstInfo = writeOptimized1(table.getNewSST(level), level, avgNumberOfEntriesInSST, iterator, piggyBackingPredicate, DBConstant.COMPACTED_SST_FILE_SIZE);
+ avgNumberOfEntriesInSST = (sstInfo.getNumberOfEntries() + avgNumberOfEntriesInSST) / 2;
+ table.addSST(level, sstInfo);
+ }
+ table.saveLastCompactedKey(level, sstInfo.getSstKeyRange().getGreatest());
+ }
+
+ private SSTInfo writeOptimized1(File file, Level level, int avgNumberOfEntriesInSST, Iterator iterator, BooleanSupplier piggyBackingPredicate, int compactedSstFileSize) throws IOException {
+ var sstHeader = SSTHeader.getDefault(level);
+ var writer = bufferThreadLocal.get();
+ writer.clear();
+ writer.position(SSTHeader.TOTAL_HEADER_SIZE);
+ var filter = BloomFilter.create(Funnels.byteArrayFunnel(), avgNumberOfEntriesInSST, 0.01);
+
+ // middle block
+ var pointers = new PointerList(avgNumberOfEntriesInSST);
+ IndexedCluster indexedCluster = null;
+ int totalKVSize = 0;
+ int numberOfEntries = 0;
+ while (iterator.hasNext() && (totalKVSize < compactedSstFileSize || piggyBackingPredicate.getAsBoolean())) {
+ indexedCluster = getNextCluster(iterator, filter);
+ totalKVSize += indexedCluster.getTotalSize();
+ numberOfEntries += indexedCluster.getNumberOfEntries();
+ pointers.add(new Pointer(indexedCluster.getFirstKey(), writer.position()));
+// System.out.println("pointer position"+ writer.position());
+ indexedCluster.storeAsBytes(writer);
+ }
+
+ pointers.add(new Pointer(indexedCluster.getLastKey(), Math.negateExact(writer.position())));
+ sstHeader.setEntries(numberOfEntries);
+ sstHeader.setFilterPosition(writer.position());
+
+ // footer
+ filter.writeTo(writer);
+ sstHeader.setPointersPosition(writer.position());
+ pointers.storeAsBytes(writer);
+ writer.putLong(DBConstant.MARK_FILE_END); // todo need confirm this while reading fileToWrite.
+ var lastLeftPosition = writer.position();
+
+ // header
+ writer.position(0);
+ sstHeader.writeSSTHeaderData(writer);
+ sstHeader.check();
+ writer.position(lastLeftPosition);
+
+ save(file);
+ return new SSTInfo(file, sstHeader, pointers, filter);
+ }
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/Compaction/Validator.java b/AtomDB/src/main/java/org/g2n/atomdb/Compaction/Validator.java
new file mode 100644
index 0000000..0d2a405
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Compaction/Validator.java
@@ -0,0 +1,309 @@
+package org.g2n.atomdb.Compaction;
+
+import org.g2n.atomdb.Compression.Lz4Compression;
+import org.g2n.atomdb.Level.Level;
+import org.g2n.atomdb.Table.SSTFileHelper;
+import org.g2n.atomdb.Table.SSTInfo;
+import org.g2n.atomdb.db.KVUnit;
+import org.g2n.atomdb.sstIo.MMappedReader;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import static org.g2n.atomdb.Compaction.IndexedCluster.DUMMY_LOCATION;
+
+public class Validator {
+
+ public static void validateSSTBasedOnSearch(SSTInfo sstInfo, byte[] key) throws IOException {
+ System.out.println("TEST".repeat(30));
+ System.out.println("SST: "+ sstInfo.getSst().getName());
+ System.out.println(new String(key));
+
+ // exists in plain search
+ KVUnit kvUnit = linearSearch(sstInfo, key);
+ if (kvUnit == null) {
+ System.out.println("Key not found, not moving forward");
+ return;
+ }
+
+ if (!sstInfo.getSstKeyRange().inRange(key)) {
+ System.out.println(Arrays.toString(sstInfo.getSstKeyRange().getSmallest()));
+ System.out.println(Arrays.toString(sstInfo.getSstKeyRange().getGreatest()));
+ throw new RuntimeException("Key is not in range, even though it was found in it.");
+ }
+
+ Pointer found = null;
+ for (Pointer pointer : sstInfo.getPointers().getList()) {
+ if (Arrays.compare(pointer.key(), key) <= 0) {
+ found = pointer;
+ }
+ }
+ if (found == null) {
+ System.out.println("Key not found");
+ return;
+ }
+
+ if (found.equals(sstInfo.getPointers().getLast())) {
+ found = sstInfo.getPointers().get(sstInfo.getPointers().size() - 2);
+ System.out.println("adjusting for last pointer");
+ }
+
+ System.out.println("Found: pointerList="+ sstInfo.getPointers().size()+" position going to look is=" + found.position());
+ var reader = new MMappedReader(sstInfo.getSst());
+ reader.position((int) found.position());
+ Cluster cluster = readSimpleCluster(reader, found, sstInfo.getNumberOfKeysInSingleCluster());
+ for (KVUnit unit : cluster.units()) {
+ if (Arrays.compare(key, unit.getKey()) == 0) {
+ System.out.println("Key found");
+ return;
+ }
+ }
+ System.out.println("Key not found");
+
+ }
+
+ private static KVUnit linearSearch(SSTInfo sstInfo, byte[] key) throws IOException {
+ System.out.println("Performing linear search");
+ System.out.println("sstInfo="+sstInfo.getSst().getName());
+
+ var reader = new MMappedReader(sstInfo.getSst());
+ var clusterEndPoint = (int) Math.abs(sstInfo.getPointers().get(sstInfo.getPointers().size() - 1).position());
+ var numberOfKeysInSingleCluster = sstInfo.getNumberOfKeysInSingleCluster();
+ int numberOfRetrievedClusterCount = 0;
+
+ List clusterList = new ArrayList<>();
+ while (!(reader.position() == clusterEndPoint || numberOfRetrievedClusterCount == sstInfo.getPointers().size())) {
+ Cluster cluster = readSimpleCluster(reader, sstInfo.getPointers().get(numberOfRetrievedClusterCount++), numberOfKeysInSingleCluster);
+ clusterList.add(cluster);
+ }
+
+ for (Cluster cluster : clusterList) {
+ for (KVUnit unit : cluster.units()) {
+ if (Arrays.compare(key, unit.getKey()) == 0) {
+ System.out.println("Linear search: Key found");
+ return unit;
+ }
+ }
+ }
+ System.out.println("Linear search: Key not found");
+ return null;
+ }
+
+ public static void validateSST(SSTInfo sstInfo) throws IOException {
+ System.out.println("TEST".repeat(30));
+ System.out.println("sstInfo="+sstInfo.getSst().getName());
+ System.out.println("Number of entries="+sstInfo.getNumberOfEntries());
+
+ if (sstInfo.getLevel() != Level.LEVEL_ZERO) {
+ if (sstInfo.getNumberOfEntries() < 100) {
+ throw new RuntimeException("Number of entries is too little");
+ }
+ }
+
+ verifySSTInfo(sstInfo);
+
+ var reader = new MMappedReader(sstInfo.getSst());
+ var clusterEndPoint = (int) Math.abs(sstInfo.getPointers().get(sstInfo.getPointers().size() - 1).position());
+ var numberOfKeysInSingleCluster = sstInfo.getNumberOfKeysInSingleCluster();
+ int numberOfRetrievedClusterCount = 0;
+
+ System.out.println("Cluster reading");
+ System.out.println("clusterEndPoint="+clusterEndPoint);
+ System.out.println("numberOfKeysInSingleCluster="+numberOfKeysInSingleCluster);
+ System.out.println("number of pointers in sst="+sstInfo.getPointers().size());
+ System.out.println("start position="+reader.position());
+
+ List clusterList = new ArrayList<>();
+ while (!(reader.position() == clusterEndPoint || numberOfRetrievedClusterCount == sstInfo.getPointers().size())) {
+ Cluster cluster = readSimpleCluster(reader, sstInfo.getPointers().get(numberOfRetrievedClusterCount++), numberOfKeysInSingleCluster);
+ clusterList.add(cluster);
+ }
+
+// var inputString = "qwertyuiopasdfghjklzxcvbnm<>?:}{+_)(*&^%$#@!)}1234567890`~".repeat(5).getBytes();
+// byte[] preKey = new byte[] {0};
+// System.out.println("Cluster iteration");
+// System.out.println("Number of cluster retrieved="+clusterList.size());
+// for (Cluster cluster : clusterList) {
+// for (KVUnit unit : cluster.getUnits()) {
+// if (Arrays.compare(preKey, unit.getKey()) > 0) {
+// System.out.println("Cluster is invalid");
+// throw new RuntimeException("Cluster is invalid");
+// }
+// preKey = unit.getKey();
+//
+// if (Arrays.compare(inputString, unit.getValue()) != 0) {
+// System.out.println("Cluster is invalid");
+// System.out.println("Expected: " + new String(inputString) + "\nbut got: " + new String(unit.getValue()));
+// throw new RuntimeException("Cluster is invalid");
+// }
+// System.out.print("Key="+new String(unit.getKey())+" ");
+// }
+// }
+// System.out.println("");
+
+ if (Arrays.compare(clusterList.getFirst().units().getFirst().getKey(), sstInfo.getSstKeyRange().getSmallest()) != 0) {
+ throw new RuntimeException("Smallest key is invalid");
+ }
+
+ if (Arrays.compare(clusterList.getLast().units().getLast().getKey(), sstInfo.getSstKeyRange().getGreatest()) != 0) {
+ throw new RuntimeException("Greatest key is invalid");
+ }
+
+ System.out.println("\nDone with the validation");
+ }
+
+ private static void verifySSTInfo(SSTInfo sstInfo) {
+ SSTInfo retrievedSSTInfo = SSTFileHelper.getSSTInfo(sstInfo.getSst());
+ if (retrievedSSTInfo.equals(sstInfo)) {
+ System.out.println("SSTInfo is valid");
+ } else {
+ System.out.println("SSTInfo is invalid");
+ }
+
+ if (Arrays.compare(retrievedSSTInfo.getSstKeyRange().getSmallest(), sstInfo.getSstKeyRange().getSmallest()) == 0) {
+ System.out.println("getSmallest is valid");
+ } else {
+ System.out.println("getSmallest is invalid");
+ }
+
+ if (Arrays.compare(retrievedSSTInfo.getSstKeyRange().getGreatest(), sstInfo.getSstKeyRange().getGreatest()) == 0) {
+ System.out.println("getSmallest is valid");
+ } else {
+ System.out.println("getSmallest is invalid");
+ }
+
+ if (Arrays.compare(retrievedSSTInfo.getSstKeyRange().getSmallest(), retrievedSSTInfo.getSstKeyRange().getGreatest()) < 0) {
+ System.out.println("getSmallest is valid");
+ } else {
+ System.out.println("getSmallest is invalid");
+ }
+
+ if (retrievedSSTInfo.getNumberOfEntries() == sstInfo.getNumberOfEntries()) {
+ System.out.println("getNumberOfEntries is valid");
+ } else {
+ System.out.println("getNumberOfEntries is invalid");
+ }
+
+ if (retrievedSSTInfo.getLevel().equals(sstInfo.getLevel())) {
+ System.out.println("getLevel is valid");
+ } else {
+ System.out.println("getLevel is invalid");
+ }
+
+ if (retrievedSSTInfo.getFilterPosition() == sstInfo.getFilterPosition()) {
+ System.out.println("getFilterPosition is valid");
+ } else {
+ System.out.println("getFilterPosition is invalid");
+ }
+
+ if (retrievedSSTInfo.getPointersPosition() == sstInfo.getPointersPosition()) {
+ System.out.println("getPointersPosition is valid");
+ } else {
+ System.out.println("getPointersPosition is invalid");
+ }
+
+ if (retrievedSSTInfo.getFileTorsoSize() == sstInfo.getFileTorsoSize()) {
+ System.out.println("getFileTorsoSize is valid");
+ } else {
+ System.out.println("getFileTorsoSize is invalid");
+ }
+
+ if (retrievedSSTInfo.getNumberOfKeysInSingleCluster() == sstInfo.getNumberOfKeysInSingleCluster()) {
+ System.out.println("getNumberOfKeysInSingleCluster is valid");
+ } else {
+ System.out.println("getNumberOfKeysInSingleCluster is invalid");
+ }
+
+ if (retrievedSSTInfo.getShortestCommonPrefixUsed() == sstInfo.getShortestCommonPrefixUsed()) {
+ System.out.println("getShortestCommonPrefixUsed is valid");
+ } else {
+ System.out.println("getShortestCommonPrefixUsed is invalid");
+ }
+
+ int count = retrievedSSTInfo.getPointers().getList().size();
+ for (int i = 0; i < count; i++) {
+ Pointer retrievedPointer = retrievedSSTInfo.getPointers().get(i);
+ Pointer gotPointer = retrievedSSTInfo.getPointers().get(i);
+ if (Arrays.compare(retrievedPointer.key(), gotPointer.key()) != 0 || retrievedPointer.position() != gotPointer.position()) {
+ System.out.println("Pointer " + i + " is invalid");
+ System.out.println("Expected: " + sstInfo.getPointers().get(i) + " but got: " + retrievedSSTInfo.getPointers().get(i));
+ throw new RuntimeException("Pointer is invalid");
+ }
+ }
+ }
+
+ public static Cluster readSimpleCluster(MMappedReader reader, Pointer pointer, byte numberOfKeysInSingleCluster) throws IOException {
+ reader.position((int) (pointer.position() + Long.BYTES * numberOfKeysInSingleCluster));
+ List locations = getLocationList(getBytes(reader, Integer.BYTES * (numberOfKeysInSingleCluster + 1)), numberOfKeysInSingleCluster);
+ int commonPrefix = reader.getInt();
+ ByteBuffer bytes = getBytes(reader, getTotalSizeToReadForKVs(locations));
+
+ List units = new ArrayList<>();
+ for (int i = 0; i < numberOfKeysInSingleCluster && bytes.hasRemaining(); i++) {
+ byte[] block = new byte[locations.get(i + 1) - locations.get(i)];
+ bytes.get(block);
+ byte[] decompress = Lz4Compression.getInstance().decompress(block);
+ var wrap = ByteBuffer.wrap(decompress);
+ int keyLength = wrap.getInt();
+
+ byte[] key = new byte[keyLength + commonPrefix];
+ System.arraycopy(pointer.key(), 0, key, 0, commonPrefix);
+ wrap.get(key, commonPrefix, keyLength);
+
+ var isDeleted = KVUnit.DeletionStatus.of(wrap.get());
+ if (KVUnit.DeletionStatus.DELETED == isDeleted) {
+ units.add(new KVUnit(key));
+ } else {
+ int valueLength = wrap.getInt();
+ byte[] value = new byte[valueLength];
+ wrap.get(value);
+ units.add(new KVUnit(key, value));
+ }
+ }
+ return new Cluster(units);
+ }
+
+ private static int getTotalSizeToReadForKVs(List locations) {
+ for (int i = 0; i < locations.size(); i++) {
+ if (locations.get(i) == DUMMY_LOCATION) {
+ return locations.get(i - 1);
+ }
+ }
+ return locations.getLast();
+ }
+
+ private static ByteBuffer getBytes(MMappedReader reader, int size) throws IOException {
+ byte[] bytes = new byte[size];
+ reader.read(bytes);
+ ByteBuffer wrap = ByteBuffer.wrap(bytes);
+ return wrap;
+ }
+
+ private static List getLocationList(ByteBuffer wrap, int sizeOfCluster) {
+ List locations = new ArrayList<>();
+ for (int i = 0; i < sizeOfCluster + 1; i++) {
+ locations.add(wrap.getInt());
+ }
+ return locations;
+ }
+
+ record Cluster(List units) {
+
+ @Override
+ public List units() {
+ return Collections.unmodifiableList(units);
+ }
+
+ public byte[] getSmallestKeyInCluster() {
+ return units.getFirst().getKey();
+ }
+
+ public byte[] getGreatestKeyInCluster() {
+ return units.getLast().getKey();
+ }
+ }
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/Compression/CompressionStrategyFactory.java b/AtomDB/src/main/java/org/g2n/atomdb/Compression/CompressionStrategyFactory.java
new file mode 100644
index 0000000..176ff6c
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Compression/CompressionStrategyFactory.java
@@ -0,0 +1,11 @@
+package org.g2n.atomdb.Compression;
+
+public class CompressionStrategyFactory {
+ public static DataCompressionStrategy GetCompressionStrategy(boolean compressionDisabled)
+ {
+ if (compressionDisabled) {
+ return new NoCompression();
+ }
+ return new SnappyCompression();
+ }
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/Compression/DataCompressionStrategy.java b/AtomDB/src/main/java/org/g2n/atomdb/Compression/DataCompressionStrategy.java
new file mode 100644
index 0000000..51da6e0
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Compression/DataCompressionStrategy.java
@@ -0,0 +1,8 @@
+package org.g2n.atomdb.Compression;
+
+import java.io.IOException;
+
+public interface DataCompressionStrategy {
+ public byte[] compress(byte[] arr) throws IOException;
+ public byte[] decompress(byte[] arr) throws IOException;
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/Compression/Lz4Compression.java b/AtomDB/src/main/java/org/g2n/atomdb/Compression/Lz4Compression.java
new file mode 100644
index 0000000..8e474df
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Compression/Lz4Compression.java
@@ -0,0 +1,32 @@
+package org.g2n.atomdb.Compression;
+
+import net.jpountz.lz4.*;
+
+import java.io.IOException;
+
+public class Lz4Compression implements DataCompressionStrategy {
+ private final LZ4CompressorWithLength lz4CompressorWithLength;
+ private final LZ4DecompressorWithLength lz4DecompressorWithLength;
+
+ public static DataCompressionStrategy getInstance() {
+ return new Lz4Compression();
+ }
+
+ private Lz4Compression() {
+ LZ4Factory lz4Factory = LZ4Factory.nativeInstance();
+ LZ4Compressor lz4FastCompressor = lz4Factory.fastCompressor();
+ this.lz4CompressorWithLength = new LZ4CompressorWithLength(lz4FastCompressor);
+ LZ4FastDecompressor decompressor = lz4Factory.fastDecompressor();
+ this.lz4DecompressorWithLength = new LZ4DecompressorWithLength(decompressor);
+ }
+
+ @Override
+ public byte[] compress(byte[] arr) throws IOException {
+ return lz4CompressorWithLength.compress(arr);
+ }
+
+ @Override
+ public byte[] decompress(byte[] arr) throws IOException {
+ return lz4DecompressorWithLength.decompress(arr);
+ }
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/Compression/NoCompression.java b/AtomDB/src/main/java/org/g2n/atomdb/Compression/NoCompression.java
new file mode 100644
index 0000000..3a4a6e0
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Compression/NoCompression.java
@@ -0,0 +1,13 @@
+package org.g2n.atomdb.Compression;
+
+public class NoCompression implements DataCompressionStrategy {
+ @Override
+ public byte[] compress(byte[] arr) {
+ return arr;
+ }
+
+ @Override
+ public byte[] decompress(byte[] arr) {
+ return arr;
+ }
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/Compression/SnappyCompression.java b/AtomDB/src/main/java/org/g2n/atomdb/Compression/SnappyCompression.java
new file mode 100644
index 0000000..f5b1c1e
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Compression/SnappyCompression.java
@@ -0,0 +1,17 @@
+package org.g2n.atomdb.Compression;
+
+import org.xerial.snappy.Snappy;
+
+import java.io.IOException;
+
+public class SnappyCompression implements DataCompressionStrategy {
+ @Override
+ public byte[] compress(byte[] arr) throws IOException {
+ return Snappy.compress(arr);
+ }
+
+ @Override
+ public byte[] decompress(byte[] arr) throws IOException {
+ return Snappy.uncompress(arr);
+ }
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/Constants/DBConstant.java b/AtomDB/src/main/java/org/g2n/atomdb/Constants/DBConstant.java
new file mode 100644
index 0000000..91586cf
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Constants/DBConstant.java
@@ -0,0 +1,19 @@
+package org.g2n.atomdb.Constants;
+
+import org.g2n.atomdb.util.SizeOf;
+
+public class DBConstant {
+ public static final int MEMTABLE_SIZE = 2 * SizeOf.MB;
+ public static final int COMPACTED_SST_FILE_SIZE = 2 * SizeOf.MB;
+ public static final int PAGE_SIZE = COMPACTED_SST_FILE_SIZE / 2; //4096;
+ public static final int WRITER_BUFFER_SIZE = COMPACTED_SST_FILE_SIZE + SizeOf.MB;
+ public static final byte SST_VERSION = 11;
+ public static final double SPARSE_BINARY_KEY_PERCENTAGE = 0.8;
+ public static final String OBSOLETE = "OBSOLETE";
+ public static final long MARK_FILE_END = 1234567890L;
+ public static final byte CRC32C_CHECKSUM_TYPE = 32;
+ public static final byte LZ4_COMPRESSION_TYPE = 4;
+ public static final byte CLUSTER_SIZE = 10; // 10% of keys in memory.
+ public static final byte SHORTEST_COMMON_PREFIX_USED = 1;
+ public static final long KEY_VALUE_CACHE_SIZE = 100 * 1024 * 1024;
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/Constants/Operations.java b/AtomDB/src/main/java/org/g2n/atomdb/Constants/Operations.java
new file mode 100644
index 0000000..65178ce
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Constants/Operations.java
@@ -0,0 +1,34 @@
+package org.g2n.atomdb.Constants;
+
+import org.g2n.atomdb.util.BytesConverter;
+
+public enum Operations {
+ WRITE,
+ READ,
+ UPDATE,
+ DELETE;
+
+ private static final byte write = BytesConverter.bytes("W")[0];
+ private static final byte read = BytesConverter.bytes("R")[0];
+ private static final byte update = BytesConverter.bytes("U")[0];
+ private static final byte delete = BytesConverter.bytes("D")[0];
+
+ public static Operations getOperation(byte given) {
+ if (given == write) return WRITE;
+ if (given == delete) return DELETE;
+ throw new RuntimeException("not of org.g2n.atomdb.Constants.Operations type");
+ }
+
+ public byte value() {
+ return switch (this) {
+ case WRITE -> write;
+ case READ -> read;
+ case UPDATE -> update;
+ case DELETE -> delete;
+ };
+ }
+
+ public static int bytesLength() {
+ return Byte.BYTES;
+ }
+}
diff --git a/AtomDB/src/main/java/Level/Level.java b/AtomDB/src/main/java/org/g2n/atomdb/Level/Level.java
similarity index 52%
rename from AtomDB/src/main/java/Level/Level.java
rename to AtomDB/src/main/java/org/g2n/atomdb/Level/Level.java
index 9fa6434..b38d4fd 100644
--- a/AtomDB/src/main/java/Level/Level.java
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Level/Level.java
@@ -1,4 +1,6 @@
-package Level;
+package org.g2n.atomdb.Level;
+
+import org.g2n.atomdb.util.SizeOf;
public enum Level {
LEVEL_ZERO,
@@ -19,7 +21,7 @@ public enum Level {
LEVEL_SIX,
LEVEL_SEVEN};
- public Level next() {
+ public Level nextLevel() {
return switch (this) {
case LEVEL_ZERO -> LEVEL_ONE;
case LEVEL_ONE -> LEVEL_TWO;
@@ -31,15 +33,32 @@ public Level next() {
};
}
- public Integer value() {
- return this.ordinal();
+ public Byte value() {
+ return (byte) this.ordinal();
+ }
+
+ public static Level fromID(byte id) {
+ return levels[id];
}
public static Level fromID(int id) {
return levels[id];
}
- public static long toID(Level level) {
+ public static byte toID(Level level) {
return level.value();
}
+
+ public long limitingSize() {
+ return switch (this) {
+ case LEVEL_ZERO -> 10L * SizeOf.MB;
+ case LEVEL_ONE -> 100L * SizeOf.MB;
+ case LEVEL_TWO -> 1000L * SizeOf.MB;
+ case LEVEL_THREE -> 10000L * SizeOf.MB;
+ case LEVEL_FOUR -> 100000L * SizeOf.MB;
+ case LEVEL_FIVE -> 1000000L * SizeOf.MB;
+ case LEVEL_SIX -> 10000000L * SizeOf.MB;
+ case LEVEL_SEVEN -> 100000000L * SizeOf.MB;
+ };
+ }
}
\ No newline at end of file
diff --git a/AtomDB/src/main/java/Main.java b/AtomDB/src/main/java/org/g2n/atomdb/Main.java
similarity index 70%
rename from AtomDB/src/main/java/Main.java
rename to AtomDB/src/main/java/org/g2n/atomdb/Main.java
index f591e6c..6a3c61e 100644
--- a/AtomDB/src/main/java/Main.java
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Main.java
@@ -1,22 +1,21 @@
-import com.google.common.base.Stopwatch;
-import db.DB;
-import db.DBImpl;
-import db.DBOptions;
-import org.xerial.snappy.Snappy;
+package org.g2n.atomdb;
+import org.g2n.atomdb.db.DB;
+import org.g2n.atomdb.db.DBImpl;
+import org.g2n.atomdb.db.DbOptions;
+
+import java.io.File;
import java.time.Instant;
import java.util.Arrays;
import java.util.Scanner;
-import java.util.concurrent.Executors;
-import java.util.concurrent.TimeUnit;
-
-import static util.BytesConverter.bytes;
+import static org.g2n.atomdb.util.BytesConverter.bytes;
+// todo, think/idea what if we store all the keys in memory, key -> fileToWrite
public class Main {
// public static void main1(String[] args) throws Exception {
// int total = 10;
-// db.DBOptions opt = new db.DBOptions("ExampleDB");
-// Mem.MemtableManager memtableManager = new Mem.MemtableManager(opt);
+// org.g2n.atomdb.db.DbOptions opt = new org.g2n.atomdb.db.DbOptions("ExampleDB");
+// org.g2n.atomdb.Mem.SkipListMemtable memtableManager = new org.g2n.atomdb.Mem.SkipListMemtable(opt);
//
// for (int i = 0; i < total; i++) {
// memtableManager.put(bytes(i + ""),
@@ -38,7 +37,7 @@ public class Main {
//
// memtableManager.close();
//
-// Logs.FileChannelLogReader fileChannelLogReader = new Logs.FileChannelLogReader(opt);
+// org.g2n.atomdb.Logs.FileChannelLogReader fileChannelLogReader = new org.g2n.atomdb.Logs.FileChannelLogReader(opt);
// Map map = new ConcurrentSkipListMap<>(Arrays::compare);
// fileChannelLogReader.readWAL(map);
//
@@ -61,8 +60,8 @@ public static void main(String[] args) throws Exception {
String value = Instant.now().toString().repeat(10);
long a, b;
a = System.nanoTime();
- DBOptions opt = new DBOptions("Thread"+Thread.currentThread());
- DB db = new DBImpl(opt);
+ DbOptions opt = new DbOptions();
+ DB db = new DBImpl(new File("Thread"+Thread.currentThread()), opt);
diskAccessByThreadsSpeedTest(db, total, value);
b = System.nanoTime();
System.out.println(Thread.currentThread() + " took "+ (b -a));
@@ -80,8 +79,8 @@ public static void diskAccessByThreadsSpeedTest(DB db, int total, String value)
public static void main1(String[] args) throws Exception {
int total = 10_000;
- DBOptions opt = new DBOptions("ExampleDB");
- DB db = new DBImpl(opt);
+ DbOptions opt = new DbOptions();
+ DB db = new DBImpl(new File("ExampleDB"), opt);
// String value = "the big value".repeat(40);
String value = Instant.now().toString().repeat(10);
System.out.println("User Input");
@@ -127,13 +126,13 @@ public static void main1(String[] args) throws Exception {
}
// compressed value
// for (int i = 0; i < total; i++) {
-// db.put(bytes(i + ""),
+// org.g2n.atomdb.db.put(bytes(i + ""),
// Snappy.compress(bytes(i + value))
// );
// }
// compressed key & value
// for (int i = 0; i < total; i++) {
-// db.put(Snappy.compress(bytes(i + "")),
+// org.g2n.atomdb.db.put(Snappy.compress(bytes(i + "")),
// Snappy.compress(bytes(i + value))
// );
// }
@@ -166,7 +165,7 @@ else if (!Arrays.equals(foundValue, bytes(i + value)))
// compressed value
// for (int i = 0; i < total; i++) {
-// byte[] foundValue = db.get(bytes(i + ""));
+// byte[] foundValue = org.g2n.atomdb.db.get(bytes(i + ""));
// if (foundValue == null) System.out.println("value found null key=" + i);
// else if (!Arrays.equals(Snappy.uncompress(foundValue), bytes(i + value)))
// System.out.println("value found different");
@@ -174,7 +173,7 @@ else if (!Arrays.equals(foundValue, bytes(i + value)))
// compressed key & value
// for (int i = 0; i < total; i++) {
-// byte[] foundValue = db.get(Snappy.compress(bytes(i + "")));
+// byte[] foundValue = org.g2n.atomdb.db.get(Snappy.compress(bytes(i + "")));
// if (foundValue == null) System.out.println("value found null key=" + i);
// else if (!Arrays.equals(Snappy.uncompress(foundValue), bytes(i + value)))
// System.out.println("value found different");
@@ -187,8 +186,8 @@ else if (!Arrays.equals(foundValue, bytes(i + value)))
}
/**
* previous code
- * creating sst, size= 6092
- * sst.Header{versionId=123456789, entries=6092, sKey=0, lKey=999, binarySearchLocation=-9223372036854775808}
+ * creating org.g2n.atomdb.sst, size= 6092
+ * org.g2n.atomdb.sst.Header{versionId=123456789, entries=6092, sKey=0, lKey=999, binarySearchLocation=-9223372036854775808}
* written
* bs=4146492
* done writing took=8805milli
@@ -197,8 +196,8 @@ else if (!Arrays.equals(foundValue, bytes(i + value)))
* done reading, took=58728milli
*
* pointers writing optimization
- * creating sst, size= 6092
- * sst.Header{versionId=123456789, entries=6092, sKey=0, lKey=999, binarySearchLocation=-9223372036854775808}
+ * creating org.g2n.atomdb.sst, size= 6092
+ * org.g2n.atomdb.sst.Header{versionId=123456789, entries=6092, sKey=0, lKey=999, binarySearchLocation=-9223372036854775808}
* written
* bs=4146492
* done writing took=4279milli
@@ -207,8 +206,8 @@ else if (!Arrays.equals(foundValue, bytes(i + value)))
* done reading, took=64191milli
*
* reading pointers for binarysearch optimization
- * creating sst, size= 6092
- * sst.Header{versionId=123456789, entries=6092, sKey=0, lKey=999, binarySearchLocation=-9223372036854775808}
+ * creating org.g2n.atomdb.sst, size= 6092
+ * org.g2n.atomdb.sst.Header{versionId=123456789, entries=6092, sKey=0, lKey=999, binarySearchLocation=-9223372036854775808}
* written
* bs=4146492
* done writing took=3806milli
@@ -247,3 +246,66 @@ else if (!Arrays.equals(foundValue, bytes(i + value)))
* reading 3899
* writing 3899
*/
+
+// firefly
+//Warm Up with 50k
+//Writing... 50000
+//Reading...
+//writing time=1199177900 , reading time=1393644400
+//Writing... 1000
+//Reading...
+//writing time=20145900 , reading time=35852900
+//Writing... 10000
+//Reading...
+//writing time=256371600 , reading time=234508600
+//Writing... 100000
+//Reading...
+//writing time=1272501300 , reading time=2226460200
+//Writing... 1000000
+//Reading...
+//writing time=15237828200 , reading time=22578177500
+// 15 and 22
+
+//leveldb
+//Warm Up with 50k
+//Writing... 50000
+//Reading...
+//writing time=477759600 , reading time=152400700
+//Writing... 1000
+//Reading...
+//writing time=9359800 , reading time=1390100
+//Writing... 10000
+//Reading...
+//writing time=79710100 , reading time=14241500
+//Writing... 100000
+//Reading...
+//writing time=780837900 , reading time=276792700
+//Writing... 1000000
+//Reading...
+//writing time=10130438600 , reading time=2697346200
+
+//atomDB
+//Warm Up with 50k
+//Writing... 50000
+//Reading...
+//writing time=961983000 , reading time=7546111700
+//Writing... 1000
+//Reading...
+//writing time=4872700 , reading time=414700
+//Writing... 10000
+//Reading...
+//writing time=44763700 , reading time=4071100
+//Writing... 10000_0
+//Reading...
+//writing time=3879929800 , reading time=18500214900
+//Writing... 10000_00
+//Reading...
+//writing time=95125721700 , reading time=204819288000
+//95 sec and 3.4 minutes
+//https://github.com/fusesource/leveldbjni
+//https://www.reddit.com/r/developersIndia/comments/1aj1s2d/i_built_fireflydb_a_fast_keyvalue_storage_engine/
+//https://github.com/godcrampy/fireflydb
+
+
+// todo ideas
+// instead of compressing key and value seperately, can directly compress key value while storing
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/Mem/ImmutableMem.java b/AtomDB/src/main/java/org/g2n/atomdb/Mem/ImmutableMem.java
new file mode 100644
index 0000000..15e70a9
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Mem/ImmutableMem.java
@@ -0,0 +1,12 @@
+package org.g2n.atomdb.Mem;
+
+import org.g2n.atomdb.db.KVUnit;
+
+import java.util.Iterator;
+
+public interface ImmutableMem extends Memtable{
+ static ImmutableMem of(MutableMem memtable) {
+ return new ImmutableMemTable(memtable.getReadOnlyMap(), memtable.getMemTableSize());
+ }
+ Iterator getKeySetIterator();
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/Mem/ImmutableMemTable.java b/AtomDB/src/main/java/org/g2n/atomdb/Mem/ImmutableMemTable.java
new file mode 100644
index 0000000..1c99ab9
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Mem/ImmutableMemTable.java
@@ -0,0 +1,51 @@
+package org.g2n.atomdb.Mem;
+
+import org.g2n.atomdb.db.KVUnit;
+
+import java.util.Iterator;
+import java.util.SortedMap;
+
+public class ImmutableMemTable implements ImmutableMem {
+ private final SortedMap map;
+ private final long totalMemSize;
+
+ public ImmutableMemTable(SortedMap unmodifiableSortedMap, long totalMemSize) {
+ this.map = unmodifiableSortedMap;
+ this.totalMemSize = totalMemSize;
+ }
+
+ @Override
+ public KVUnit get(byte[] key) {
+ return map.get(key);
+ }
+
+ @Override
+ public long getMemTableSize() {
+ return totalMemSize;
+ }
+
+ @Override
+ public int getNumberOfEntries() {
+ return map.size();
+ }
+
+ @Override
+ public byte[] getFirstKey() {
+ return map.firstKey();
+ }
+
+ @Override
+ public byte[] getLastKey() {
+ return map.lastKey();
+ }
+
+ @Override
+ public boolean isFull() {
+ return false;
+ }
+
+ @Override
+ public Iterator getKeySetIterator() {
+ return map.values().iterator();
+ }
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/Mem/Memtable.java b/AtomDB/src/main/java/org/g2n/atomdb/Mem/Memtable.java
new file mode 100644
index 0000000..f241ce4
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Mem/Memtable.java
@@ -0,0 +1,17 @@
+package org.g2n.atomdb.Mem;
+
+import org.g2n.atomdb.db.KVUnit;
+
+public interface Memtable {
+ KVUnit get(byte[] key);
+
+ long getMemTableSize();
+
+ int getNumberOfEntries();
+
+ K getFirstKey();
+
+ K getLastKey();
+
+ boolean isFull();
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/Mem/MutableMem.java b/AtomDB/src/main/java/org/g2n/atomdb/Mem/MutableMem.java
new file mode 100644
index 0000000..64942a8
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Mem/MutableMem.java
@@ -0,0 +1,10 @@
+package org.g2n.atomdb.Mem;
+
+import java.util.SortedMap;
+
+public interface MutableMem extends Memtable{
+ void put(V kv);
+ void delete(V kv);
+
+ SortedMap getReadOnlyMap();
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/Mem/SkipListMemtable.java b/AtomDB/src/main/java/org/g2n/atomdb/Mem/SkipListMemtable.java
new file mode 100644
index 0000000..0eb0c79
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Mem/SkipListMemtable.java
@@ -0,0 +1,65 @@
+package org.g2n.atomdb.Mem;
+
+import org.g2n.atomdb.db.DbOptions;
+import org.g2n.atomdb.db.KVUnit;
+
+import java.util.Collections;
+import java.util.SortedMap;
+import java.util.concurrent.ConcurrentSkipListMap;
+
+public class SkipListMemtable implements MutableMem{
+ private final ConcurrentSkipListMap map;
+ private final int maxSize;
+ private int currentSize;
+
+ public SkipListMemtable(DbOptions options) {
+ this.maxSize = options.memtableSize;
+ map = new ConcurrentSkipListMap<>(options.comparator);
+ currentSize = 0;
+ }
+
+ @Override
+ public void put(KVUnit kvUnit) {
+ map.put(kvUnit.getKey(), kvUnit);
+ currentSize += kvUnit.getUnitSize();
+ }
+
+ @Override
+ public void delete(KVUnit kvUnit) {
+ put(kvUnit);
+ }
+
+ @Override
+ public KVUnit get(byte[] key) {
+ return map.get(key);
+ }
+
+ public long getMemTableSize() {
+ return currentSize;
+ }
+
+ @Override
+ public int getNumberOfEntries() {
+ return map.size();
+ }
+
+ @Override
+ public byte[] getFirstKey() {
+ return map.firstKey();
+ }
+
+ @Override
+ public byte[] getLastKey() {
+ return map.lastKey();
+ }
+
+ @Override
+ public boolean isFull() {
+ return currentSize >= maxSize;
+ }
+
+ @Override
+ public SortedMap getReadOnlyMap() {
+ return Collections.unmodifiableSortedMap(map);
+ }
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/Table/SSTFileHelper.java b/AtomDB/src/main/java/org/g2n/atomdb/Table/SSTFileHelper.java
new file mode 100644
index 0000000..59e78a0
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Table/SSTFileHelper.java
@@ -0,0 +1,31 @@
+package org.g2n.atomdb.Table;
+
+import org.g2n.atomdb.Compaction.PointerList;
+import org.g2n.atomdb.Constants.DBConstant;
+import com.google.common.hash.BloomFilter;
+import com.google.common.hash.Funnels;
+import org.g2n.atomdb.sstIo.*;
+
+import java.io.File;
+
+public class SSTFileHelper {
+
+ public static SSTInfo getSSTInfo(File file) {
+ try(var reader = new MMappedReader(file)) {
+ System.out.println("File="+file.getName());
+ var header = SSTHeader.getHeader(reader);
+ System.out.println(header);
+ reader.position(header.getFilterPosition());
+ BloomFilter bloomFilter = BloomFilter.readFrom(reader, Funnels.byteArrayFunnel());
+ reader.position(header.getPointersPosition());
+ PointerList list = PointerList.getPointerList(reader, 1 + (int) (Math.ceil((header.getNumberOfEntries() * 1.0) / DBConstant.CLUSTER_SIZE)));
+ if (reader.getLong() != DBConstant.MARK_FILE_END) {
+ System.out.println(list.size());
+ throw new Exception("File read wrong "+ reader.position());
+ }
+ return new SSTInfo(file, header, list, bloomFilter);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/Table/SSTInfo.java b/AtomDB/src/main/java/org/g2n/atomdb/Table/SSTInfo.java
new file mode 100644
index 0000000..742634e
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Table/SSTInfo.java
@@ -0,0 +1,93 @@
+package org.g2n.atomdb.Table;
+
+import org.g2n.atomdb.Compaction.PointerList;
+import org.g2n.atomdb.Level.Level;
+import com.google.common.base.Preconditions;
+import com.google.common.hash.BloomFilter;
+import org.g2n.atomdb.sstIo.SSTHeader;
+import org.g2n.atomdb.sstIo.SSTKeyRange;
+
+import java.io.File;
+
+public class SSTInfo extends SSTHeader implements Comparable {
+ private final File sst;
+ private final PointerList pointers;
+ private final BloomFilter filter;
+ private final int number;
+ private final SSTKeyRange sstKeyRange;
+ private final int sstHashCode;
+
+
+ public SSTInfo(File sst, SSTHeader header, PointerList pointers, BloomFilter filter) {
+ super(header);
+ Preconditions.checkArgument(sst.exists());
+ this.number = Integer.parseInt(sst.getName().trim().split("_")[1].trim().replace(".org.g2n.atomdb.sst", ""));
+ Preconditions.checkArgument(Level.fromID(sst.getName().charAt(0) - 48).equals(getLevel()));
+ this.sst = sst;
+ this.sstHashCode = sst.getAbsolutePath().hashCode();
+ this.pointers = pointers;
+ this.filter = filter;
+ this.sstKeyRange = new SSTKeyRange(pointers.getFirst().key(), pointers.getLast().key());
+ }
+
+ public SSTKeyRange getSstKeyRange() {
+ return sstKeyRange;
+ }
+
+ public File getSst() {
+ return sst;
+ }
+
+ public PointerList getPointers() {
+ return pointers;
+ }
+
+ public boolean mightContainElement(byte[] key) {
+ return filter.mightContain(key);
+ }
+
+ @Override
+ public int compareTo(SSTInfo sstInfo) {
+ if (this.getLevel().equals(sstInfo.getLevel())) {
+ return Integer.compare(sstInfo.number, this.number); // newer fileToWrite will have greater number, and they should come first.
+ }
+ return Byte.compare(this.getLevel().value(), sstInfo.getLevel().value());
+ }
+
+ public static File newFile(String filePath, Level level, long number) {
+ return new File(filePath + File.separator +
+ level.value() + "_" + number + ".org.g2n.atomdb.sst");
+ }
+
+ public int getFileTorsoSize() {
+ // todo change this name
+ return (int) (getPointersPosition() - pointers.getLast().position());
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ SSTInfo sstInfo = (SSTInfo) o;
+ return sst.getAbsolutePath().equals(sstInfo.sst.getAbsolutePath());
+ }
+
+ @Override
+ public int hashCode() {
+ return sstHashCode;
+ }
+
+ @Override
+ public String toString() {
+ return "SSTInfo{" +
+ "sst=" + sst +
+ ", pointers=" + pointers.toString() +
+ ", filter=" + filter +
+ ", number=" + number +
+ ", sstKeyRange=" + sstKeyRange.toString() +
+ ", sstHashCode=" + sstHashCode +
+ ", SSTHeader=" + super.toString() +
+ '}';
+ }
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/Table/Table.java b/AtomDB/src/main/java/org/g2n/atomdb/Table/Table.java
new file mode 100644
index 0000000..d7aed76
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Table/Table.java
@@ -0,0 +1,138 @@
+package org.g2n.atomdb.Table;
+
+import org.g2n.atomdb.Constants.DBConstant;
+import org.g2n.atomdb.Level.Level;
+import com.google.common.base.Preconditions;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.g2n.atomdb.search.Search;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.*;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.regex.Pattern;
+
+public class Table{
+ private static final Logger logger = LoggerFactory.getLogger(Table.class);
+ private final Search search;
+ private final Map tableSize;
+ private final Map lastCompactedKV;
+ private Map> table;
+ // todo this should be set at start
+ private final AtomicLong currentFileName = new AtomicLong(0);
+ private final File dbFolder;
+ private final String fileSeparatorForSplit = Pattern.quote(File.separator);
+ public Table(File dbFolder, Search search) {
+ Preconditions.checkArgument(dbFolder.exists());
+ this.dbFolder = dbFolder;
+ this.search = search;
+ table = Map.of(Level.LEVEL_ZERO, new TreeSet(),
+ Level.LEVEL_ONE, new TreeSet(),
+ Level.LEVEL_TWO, new TreeSet(),
+ Level.LEVEL_THREE, new TreeSet(),
+ Level.LEVEL_FOUR, new TreeSet(),
+ Level.LEVEL_FIVE, new TreeSet(),
+ Level.LEVEL_SIX, new TreeSet(),
+ Level.LEVEL_SEVEN, new TreeSet());
+ tableSize = new HashMap<>() ;
+ for (Level value : Level.values()) {
+ tableSize.put(value, 0);
+ }
+ fillLevels();
+ lastCompactedKV = new HashMap<>();
+ }
+
+ private void fillLevels() {
+ long max = Long.MIN_VALUE;
+ for (File file : dbFolder.listFiles()) {
+ if (!file.getName().contains(".org.g2n.atomdb.sst") || file.getName().contains(DBConstant.OBSOLETE)) {
+ continue;
+ }
+ var split = file.getName().strip().replace(".org.g2n.atomdb.sst", "").split("_");
+
+ max = Math.max(Long.parseLong(split[1]), max);
+ Level level = Level.fromID(split[0].charAt(0) - 48);
+
+ var sstInfo = SSTFileHelper.getSSTInfo(file);
+ addSST(level, sstInfo);
+ }
+ currentFileName.set(max != Long.MIN_VALUE ? max : 0);
+ }
+
+ public File getNewSST(Level level) throws IOException {
+ Preconditions.checkNotNull(level);
+ File file = SSTInfo.newFile(dbFolder.getAbsolutePath(), level, currentFileName.incrementAndGet());
+ if (!file.createNewFile()) {
+ throw new RuntimeException("Unable to create fileToWrite");
+ }
+ return file;
+ }
+
+ public synchronized void addSST(Level level, SSTInfo sstInfo) {
+ Preconditions.checkNotNull(level);
+ Preconditions.checkNotNull(sstInfo);
+ table.get(level).add(sstInfo);
+ tableSize.put(level, tableSize.get(level) + sstInfo.getFileTorsoSize());
+ search.addSSTInfo(sstInfo);
+ }
+
+ public synchronized void removeSST(SSTInfo sstInfo) {
+ Preconditions.checkNotNull(sstInfo.getLevel());
+ Preconditions.checkNotNull(sstInfo);
+ table.get(sstInfo.getLevel()).remove(sstInfo);
+ tableSize.put(sstInfo.getLevel(), tableSize.get(sstInfo.getLevel()) - sstInfo.getFileTorsoSize());
+ try {
+ search.removeSSTInfo(sstInfo);
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ // todo
+// File obsolete = FileUtil.makeFileObsolete(sstInfo.getSst());
+// if (obsolete == null) {
+// throw new RuntimeException("unable to rename");
+// }
+// if (!obsolete.delete()) {
+// throw new RuntimeException("Unable to delete files");
+// }
+ }
+
+// private List createList() {
+// // todo improve this
+// return new ArrayList<>() {
+// public boolean add(String mt) {
+// int index = Collections.binarySearch(this, mt, (s1, s2) -> {
+// String[] pi = s1.trim().split(fileSeparatorForSplit);
+// var thisPi = pi[pi.length - 1].trim().split("_");
+//
+// pi = s2.trim().split(fileSeparatorForSplit);
+// var providedPi = pi[pi.length - 1].trim().split("_");
+//
+// if (!thisPi[0].equals(providedPi[0])) throw new RuntimeException("level mismatch");
+// long a = Long.parseLong(providedPi[1].trim().replace(".org.g2n.atomdb.sst", ""));
+// long b = Long.parseLong(thisPi[1].trim().replace(".org.g2n.atomdb.sst", ""));
+// return Long.compare(a, b);
+// });
+// if (index < 0) index = ~index;
+// super.add(index, mt);
+// return true;
+// }
+// };
+// }
+
+ public SortedSet getSSTInfoSet(Level level) {
+ return table.get(level);
+ }
+
+ public int getCurrentLevelSize(Level level) {
+ return tableSize.get(level);
+ }
+
+ public byte[] getLastCompactedKey(Level level) {
+ return lastCompactedKV.get(level);
+ }
+
+ public synchronized void saveLastCompactedKey(Level level, byte[] last) {
+ lastCompactedKV.put(level, last);
+ }
+}
diff --git a/AtomDB/src/main/java/Tools/DBHas.java b/AtomDB/src/main/java/org/g2n/atomdb/Tools/DBHas.java
similarity index 81%
rename from AtomDB/src/main/java/Tools/DBHas.java
rename to AtomDB/src/main/java/org/g2n/atomdb/Tools/DBHas.java
index 17d33f7..43c236d 100644
--- a/AtomDB/src/main/java/Tools/DBHas.java
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Tools/DBHas.java
@@ -1,10 +1,10 @@
-package Tools;
+package org.g2n.atomdb.Tools;
-import db.DBComparator;
-import sst.Header;
-import sst.MiddleBlock;
-import sst.ValueUnit;
-import util.BytesConverter;
+import org.g2n.atomdb.db.DBComparator;
+import org.g2n.atomdb.sst.Header;
+import org.g2n.atomdb.sst.MiddleBlock;
+import org.g2n.atomdb.sst.ValueUnit;
+import org.g2n.atomdb.util.BytesConverter;
import java.io.File;
import java.io.FileInputStream;
@@ -18,7 +18,7 @@
/**
* basically this class just takes a folder and a key
* searches all files for that key
- * reports the file name if the key is found in it, either it is with deleted marker or not
+ * reports the fileToWrite name if the key is found in it, either it is with deleted marker or not
*/
public class DBHas {
@@ -26,10 +26,10 @@ public class DBHas {
private static final ByteBuffer byteBuffer = ByteBuffer.allocateDirect(4096);
public static void main(String[] args) throws Exception {
- var file = new File("UpdationDeletionTestDB"); // db folder
- byte[] key = BytesConverter.bytes(32856 + ""); // key to search
+ var file = new File("UpdationDeletionTestDB"); // org.g2n.atomdb.db folder
+ byte[] key = BytesConverter.bytes(32856 + ""); // key to org.g2n.atomdb.search
for (File listFile : file.listFiles()) {
- if (!listFile.getName().contains(".sst")) continue;
+ if (!listFile.getName().contains(".org.g2n.atomdb.sst")) continue;
ValueUnit found = trySearch(listFile.toPath().toString(), key);
if (found == null) {
System.out.println(listFile.getName()+" not found");
@@ -47,7 +47,7 @@ private static ValueUnit trySearch(String file, byte[] key) throws Exception {
try(FileInputStream inputStream = new FileInputStream(file);
FileChannel channel = inputStream.getChannel();
) {
- Header header = Header.getHeader(file, channel, byteBuffer);
+ Header header = null; //Header.getHeader(fileToWrite, channel, byteBuffer);
// bound check
if (Arrays.compare(key, header.getSmallestKey()) < 0) {
@@ -74,7 +74,7 @@ private static ValueUnit trySearch(String file, byte[] key) throws Exception {
return entry.getValue();
}
}catch (Exception e) {
- throw new RuntimeException("while accessing file=" + file, e);
+ throw new RuntimeException("while accessing fileToWrite=" + file, e);
}
}
private static Map.Entry performBinarySearch(FileChannel channel, List pointers, byte[] key) throws Exception {
diff --git a/AtomDB/src/main/java/Tools/TestOfChannels.java b/AtomDB/src/main/java/org/g2n/atomdb/Tools/TestOfChannels.java
similarity index 86%
rename from AtomDB/src/main/java/Tools/TestOfChannels.java
rename to AtomDB/src/main/java/org/g2n/atomdb/Tools/TestOfChannels.java
index 612d485..44bed72 100644
--- a/AtomDB/src/main/java/Tools/TestOfChannels.java
+++ b/AtomDB/src/main/java/org/g2n/atomdb/Tools/TestOfChannels.java
@@ -1,13 +1,13 @@
-package Tools;
+package org.g2n.atomdb.Tools;
-import Checksum.CheckSum;
-import Constants.DBConstant;
-import Level.Level;
-import db.DBComparator;
-import sst.Header;
-import sst.ValueUnit;
-import util.BytesConverter;
-import util.SizeOf;
+import org.g2n.atomdb.Checksum.CheckSumStatic;
+import org.g2n.atomdb.Constants.DBConstant;
+import org.g2n.atomdb.Level.Level;
+import org.g2n.atomdb.db.DBComparator;
+import org.g2n.atomdb.sst.Header;
+import org.g2n.atomdb.sst.ValueUnit;
+import org.g2n.atomdb.util.BytesConverter;
+import org.g2n.atomdb.util.SizeOf;
import java.io.File;
import java.io.FileOutputStream;
@@ -44,7 +44,7 @@ public static void main(String[] args) throws Exception {
public static void createSST(SortedMap map, ByteBuffer byteBuffer) throws Exception {
String tempFileName = Instant.now().toString().replace(':', '_') + Level.LEVEL_ZERO;
- var header = new Header(map, DBConstant.SST_VERSION, Level.LEVEL_ZERO, tempFileName);
+ var header = new Header(map.firstKey(), map.lastKey(), map.size(), DBConstant.SST_VERSION, Level.LEVEL_ZERO, tempFileName);
try(FileOutputStream outputStream = new FileOutputStream(tempFileName);
FileChannel channel = outputStream.getChannel();) {
@@ -72,7 +72,7 @@ public static void createSST(SortedMap map, ByteBuffer byteBu
public static void createSSTRand(SortedMap map) throws Exception {
String tempFileName = Instant.now().toString().replace(':', '_') + Level.LEVEL_ZERO;
- var header = new Header(map, DBConstant.SST_VERSION, Level.LEVEL_ZERO, tempFileName);
+ var header = new Header(map.firstKey(), map.lastKey(), map.size(), DBConstant.SST_VERSION, Level.LEVEL_ZERO, tempFileName);
try(RandomAccessFile channel = new RandomAccessFile(
tempFileName, "rw")) {
@@ -110,10 +110,10 @@ public static void writeHeader(Header header, FileChannel channel, ByteBuffer by
.putLong(header.getEntries())
.putLong(header.getSmallestKey().length)
.put(header.getSmallestKey())
- .putLong(CheckSum.compute(header.getSmallestKey()))
+ .putLong(CheckSumStatic.compute(header.getSmallestKey()))
.putLong(header.getLargestKey().length)
.put(header.getLargestKey())
- .putLong(CheckSum.compute(header.getLargestKey()))
+ .putLong(CheckSumStatic.compute(header.getLargestKey()))
.flip();
channel.position(0); // moved to 0 position
channel.write(byteBuffer);
@@ -126,10 +126,10 @@ public static void writeHeaderRand(Header header, RandomAccessFile channel) thro
channel.writeLong(header.getEntries());
channel.writeLong(header.getSmallestKey().length);
channel.write(header.getSmallestKey());
- channel.writeLong(CheckSum.compute(header.getSmallestKey()));
+ channel.writeLong(CheckSumStatic.compute(header.getSmallestKey()));
channel.writeLong(header.getLargestKey().length);
channel.write(header.getLargestKey());
- channel.writeLong(CheckSum.compute(header.getLargestKey()));
+ channel.writeLong(CheckSumStatic.compute(header.getLargestKey()));
}
public static void writeBS(Header header, FileChannel channel, ByteBuffer byteBuffer, long binarySearchLocation) throws IOException {
@@ -151,9 +151,9 @@ public static void writeBlock(FileChannel channel, ByteBuffer byteBuffer, Map.En
if (entry.getValue().getIsDelete() != ValueUnit.DELETE) {
byteBuffer.putLong(entry.getValue().getValue().length)
.put(entry.getValue().getValue())
- .putLong(CheckSum.compute(entry.getKey(), entry.getValue().getValue()));
+ .putLong(CheckSumStatic.compute(entry.getKey(), entry.getValue().getValue()));
} else {
- byteBuffer.putLong(CheckSum.compute(entry.getKey()));
+ byteBuffer.putLong(CheckSumStatic.compute(entry.getKey()));
}
byteBuffer.flip();
channel.write(byteBuffer);
@@ -167,9 +167,9 @@ public static void writeBlockRand(RandomAccessFile channel, Map.Entry myPointers = new ArrayList<>((int) header.getEntries());
@@ -113,7 +113,7 @@ public static Map.Entry readKeyValue(FileChannel channel, Byt
verifyChecksum(byteBuffer, channel, key, value);
- return Map.entry(key, new ValueUnit(value, isDelete));
+ return Map.entry(key, new ValueUnit(value, (byte) isDelete));
}
private static void verifyChecksum(ByteBuffer byteBuffer, FileChannel channel, byte[] key, byte[] value) throws Exception {
@@ -122,8 +122,8 @@ private static void verifyChecksum(ByteBuffer byteBuffer, FileChannel channel, b
channel.read(byteBuffer);
byteBuffer.flip();
long checksum = byteBuffer.getLong();
- if (CheckSum.compute(key, value) != checksum) {
- throw new Exception("Checksum not matching");
+ if (CheckSumStatic.compute(key, value) != checksum) {
+ throw new Exception("AtomChecksum not matching");
}
}
@@ -133,8 +133,8 @@ private static void verifyChecksum0(ByteBuffer byteBuffer, FileChannel channel,
channel.read(byteBuffer);
byteBuffer.flip();
long checksum = byteBuffer.getLong();
- if (CheckSum.compute(key) != checksum) {
- throw new Exception("Checksum not matching");
+ if (CheckSumStatic.compute(key) != checksum) {
+ throw new Exception("AtomChecksum not matching");
}
}
}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/db/DB.java b/AtomDB/src/main/java/org/g2n/atomdb/db/DB.java
new file mode 100644
index 0000000..4b9dffc
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/db/DB.java
@@ -0,0 +1,14 @@
+package org.g2n.atomdb.db;
+
+public interface DB{
+
+ void put(byte[] key, byte[] value) throws Exception;
+
+ byte[] get(byte[] key) throws Exception;
+
+ void delete(byte[] key) throws Exception;
+
+ void close() throws Exception;
+
+ void destroy();
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/db/DBComparator.java b/AtomDB/src/main/java/org/g2n/atomdb/db/DBComparator.java
new file mode 100644
index 0000000..9d3a541
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/db/DBComparator.java
@@ -0,0 +1,20 @@
+package org.g2n.atomdb.db;
+
+import java.util.Arrays;
+import java.util.Comparator;
+
+public class DBComparator {
+ public static final Comparator byteArrayComparator = DBComparator::compare;
+
+ private static int compare(byte[] left, byte[] right) {
+ return Arrays.compare(left, right);
+// int minLength = Math.min(left.length, right.length);
+// for (int i = 0; i < minLength; i++) {
+// int result = left[i] - right[i];
+// if (result != 0) {
+// return result;
+// }
+// }
+// return left.length - right.length;
+ }
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/db/DBImpl.java b/AtomDB/src/main/java/org/g2n/atomdb/db/DBImpl.java
new file mode 100644
index 0000000..ef0b473
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/db/DBImpl.java
@@ -0,0 +1,124 @@
+package org.g2n.atomdb.db;
+
+import org.g2n.atomdb.Compaction.Compactor;
+import org.g2n.atomdb.Constants.Operations;
+import org.g2n.atomdb.Level.Level;
+import org.g2n.atomdb.Logs.WALManager;
+import org.g2n.atomdb.Mem.ImmutableMem;
+import org.g2n.atomdb.Mem.SkipListMemtable;
+import org.g2n.atomdb.Table.Table;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.g2n.atomdb.search.Search;
+import org.g2n.atomdb.util.Util;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Objects;
+
+public class DBImpl implements DB{
+ private final Logger logger = LoggerFactory.getLogger(getClass());
+ private final File dbFolder;
+ private final WALManager walManager;
+ private final Compactor compactor;
+ private final Search search;
+ private final DbOptions options;
+ private SkipListMemtable memtable;
+ private Table table;
+
+ public DBImpl(File dbFolder, DbOptions dbOptions) throws Exception {
+ createDB(dbFolder);
+ this.dbFolder = dbFolder;
+ this.walManager = new WALManager(dbFolder.getAbsolutePath());
+ this.memtable = new SkipListMemtable(dbOptions);
+ this.search = new Search();
+ this.table = new Table(dbFolder, search);
+ this.compactor = new Compactor(table, dbOptions);
+ this.options = dbOptions;
+ walManager.restore(this);
+ }
+
+ private void createDB(File dbFolder) {
+ if (dbFolder.isDirectory() || dbFolder.mkdirs()) {
+ new File(dbFolder, "ATOM_DB");
+ } else {
+ throw new RuntimeException("Unable to create org.g2n.atomdb.db folder");
+ }
+ }
+
+
+ @Override
+ public void put(byte[] key, byte[] value) throws Exception {
+ var kvUnit = new KVUnit(key, value);
+
+ walManager.log(Operations.WRITE, kvUnit);
+ memtable.put(kvUnit);
+
+ if (memtable.isFull()){
+ handleMemtableFull();
+ }
+ }
+
+ private void handleMemtableFull() throws Exception {
+ var immutableMem = ImmutableMem.of(memtable);
+
+ compactor.persistLevel0(immutableMem);
+ search.addSecondaryMemtable(immutableMem);
+
+ walManager.rotateLog();
+ memtable = new SkipListMemtable(options); // todo we can have more memtable
+
+ compactor.tryCompaction(Level.LEVEL_ZERO);
+ }
+
+ @Override
+ public byte[] get(byte[] key) throws Exception {
+ Objects.requireNonNull(key);
+ var kvUnit = memtable.get(key);
+ if (kvUnit == null) {
+ kvUnit = search.findKey(key);
+ }
+ return kvUnit == null || kvUnit.isDeleted() ? null : kvUnit.getValue();
+ }
+
+ @Override
+ public void delete(byte[] key) throws Exception {
+ KVUnit kvUnit = new KVUnit(key);
+ walManager.log(Operations.DELETE, kvUnit);
+ memtable.delete(kvUnit);
+ }
+
+ @Override
+ public void close() throws Exception {
+ try {
+ walManager.close();
+ search.close();
+ compactor.close();
+ } catch (IOException e) {
+ logger.error("Failed to close resources: {}", e.getMessage(), e);
+ throw e;
+ }
+ }
+
+ @Override
+ public void destroy() {
+ validateFolder(dbFolder);
+ deleteFolderContents(dbFolder);
+ }
+
+ private void validateFolder(File folder) {
+ Util.requireTrue(folder.exists(), "Folder " + folder.getPath() + " does not exist");
+ Util.requireTrue(folder.isDirectory(), "File " + folder.getPath() + " is not a folder");
+ }
+
+ private void deleteFolderContents(File folder) {
+ for (File file : Objects.requireNonNull(folder.listFiles())) {
+ if (!file.delete()) {
+ logger.warn("Unable to delete fileToWrite: {}", file.getAbsolutePath());
+ }
+ }
+ if (!folder.delete()) {
+ logger.warn("Unable to delete folder: {}", folder.getPath());
+ }
+ }
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/db/DbOptions.java b/AtomDB/src/main/java/org/g2n/atomdb/db/DbOptions.java
new file mode 100644
index 0000000..c9dbd8b
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/db/DbOptions.java
@@ -0,0 +1,30 @@
+package org.g2n.atomdb.db;
+
+import org.g2n.atomdb.Compression.DataCompressionStrategy;
+import org.g2n.atomdb.Compression.Lz4Compression;
+import org.g2n.atomdb.Constants.DBConstant;
+import sun.misc.Unsafe;
+
+import java.lang.reflect.Field;
+import java.util.Comparator;
+
+public final class DbOptions {
+
+ public int pageSize;
+ public int memtableSize = DBConstant.MEMTABLE_SIZE;
+ public Comparator comparator = DBComparator.byteArrayComparator;
+
+ public DataCompressionStrategy dataCompressionStrategy = Lz4Compression.getInstance();
+
+ public DbOptions() {
+ try {
+ Field f = Unsafe.class.getDeclaredField("theUnsafe");
+ f.setAccessible(true);
+ Unsafe unsafe;
+ unsafe = (Unsafe)f.get(null);
+ pageSize = unsafe.pageSize();
+ } catch (IllegalAccessException | NoSuchFieldException ignored) {
+ pageSize = 4096;
+ }
+ }
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/db/ExpandingByteBuffer.java b/AtomDB/src/main/java/org/g2n/atomdb/db/ExpandingByteBuffer.java
new file mode 100644
index 0000000..62d5723
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/db/ExpandingByteBuffer.java
@@ -0,0 +1,110 @@
+package org.g2n.atomdb.db;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+
+public class ExpandingByteBuffer extends OutputStream {
+ private static final int DEFAULT_CAPACITY = 1024;
+
+ private ByteBuffer buffer;
+
+ public ExpandingByteBuffer() {
+ this.buffer = ByteBuffer.allocateDirect(DEFAULT_CAPACITY);
+ }
+
+ public ExpandingByteBuffer put(byte[] data) {
+ makeSpaceIfNeeded(data.length);
+ buffer.put(data);
+ return this;
+ }
+
+ public ExpandingByteBuffer put(byte data) {
+ makeSpaceIfNeeded(1);
+ buffer.put(data);
+ return this;
+ }
+
+ public ExpandingByteBuffer putInt(int data) {
+ makeSpaceIfNeeded(4);
+ buffer.putInt(data);
+ return this;
+ }
+
+ public ExpandingByteBuffer putLong(long data) {
+ makeSpaceIfNeeded(Long.BYTES);
+ buffer.putLong(data);
+ return this;
+ }
+
+ private void makeSpaceIfNeeded(int requiredSize) {
+ if (buffer.remaining() < requiredSize) {
+ int newCapacity = Math.max(buffer.capacity() * 2, buffer.position() + requiredSize);
+ ByteBuffer newBuffer = ByteBuffer.allocateDirect(newCapacity);
+
+ buffer.flip();
+ newBuffer.put(buffer);
+
+ buffer = newBuffer;
+ }
+ }
+
+ public ByteBuffer getBuffer() {
+ // Returns the buffer in its current state
+ return buffer;
+ }
+
+ public byte[] toByteArray() {
+ // Prepare buffer for reading and convert to byte array
+ buffer.flip();
+ byte[] byteArray = new byte[buffer.remaining()];
+ buffer.get(byteArray);
+ return byteArray;
+ }
+
+ public void clear() {
+ // Reset the buffer
+ buffer.clear();
+ }
+
+ public ExpandingByteBuffer flip() {
+ buffer.flip();
+ return this;
+ }
+
+ public ExpandingByteBuffer put(byte[] src, int offset, int length) {
+ makeSpaceIfNeeded(length);
+ buffer.put(src, offset, length);
+ return this;
+ }
+
+ public int position() {
+ return this.buffer.position();
+ }
+
+ public ExpandingByteBuffer position(int position) {
+ this.buffer.position(position);
+ return this;
+ }
+
+ public long remaining() {
+ return this.buffer.remaining();
+ }
+
+ @Override
+ public void write(int b) throws IOException {
+ makeSpaceIfNeeded(Byte.BYTES);
+ buffer.put((byte) b);
+ }
+
+ @Override
+ public void write(byte[] b) throws IOException {
+ makeSpaceIfNeeded(b.length);
+ buffer.put(b);
+ }
+
+ @Override
+ public void write(byte[] b, int off, int len) throws IOException {
+ makeSpaceIfNeeded(len - off);
+ buffer.put(b, off, len);
+ }
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/db/GlobalByteBuffer.java b/AtomDB/src/main/java/org/g2n/atomdb/db/GlobalByteBuffer.java
new file mode 100644
index 0000000..77f0e7c
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/db/GlobalByteBuffer.java
@@ -0,0 +1,16 @@
+package org.g2n.atomdb.db;
+
+import java.nio.ByteBuffer;
+
+public class GlobalByteBuffer {
+ private static final int DEFAULT_BUFFER_SIZE = 1024;
+ private static final ThreadLocal bufferThreadLocal = ThreadLocal.withInitial(() ->
+ ByteBuffer.allocateDirect(DEFAULT_BUFFER_SIZE)
+ );
+
+ public static ByteBuffer getBuffer() {
+ ByteBuffer buffer = bufferThreadLocal.get();
+ buffer.clear();
+ return buffer;
+ }
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/db/KVUnit.java b/AtomDB/src/main/java/org/g2n/atomdb/db/KVUnit.java
new file mode 100644
index 0000000..df60344
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/db/KVUnit.java
@@ -0,0 +1,114 @@
+package org.g2n.atomdb.db;
+
+import java.nio.MappedByteBuffer;
+import java.util.Arrays;
+import java.util.Objects;
+
+public class KVUnit implements Comparable {
+ private final byte[] key;
+ private final byte[] value;
+ private final DeletionStatus isDeleted;
+ private final int unitSize;
+
+
+ public enum DeletionStatus {
+ DELETED, NOT_DELETED;
+
+ public static boolean isDeleted(byte isDeleted) {
+ return switch (isDeleted) {
+ case 1 -> true;
+ case 0 -> false;
+ default -> throw new IllegalStateException("Unexpected value: " + isDeleted);
+ };
+ }
+
+ public byte value() {
+ return switch (this) {
+ case DELETED -> 1;
+ case NOT_DELETED -> 0;
+ };
+ }
+
+ public static DeletionStatus of(byte given) {
+ if (given == DELETED.value()) return DELETED;
+ if (given == NOT_DELETED.value()) return NOT_DELETED;
+ throw new RuntimeException("not of DeletionStatus type");
+ }
+ }
+
+ public KVUnit(byte[] key) {
+ this(key, DeletionStatus.DELETED, null);
+ }
+
+ public KVUnit(byte[] key, byte[] value) {
+ this(key, DeletionStatus.NOT_DELETED, value);
+ }
+
+ private KVUnit(byte[] key, DeletionStatus isDeleted, byte[] value) {
+ this.key = Objects.requireNonNull(key, "Key cannot be null");
+ this.isDeleted = isDeleted;
+ this.value = value;
+ this.unitSize = calculateUnitSize();
+ }
+
+ public byte[] getKey() {
+ return key;
+ }
+
+ public byte[] getValue() {
+ return value;
+ }
+
+ private int calculateUnitSize() {
+ return key.length + (value != null ? value.length : 1 /*isDeleted marker*/);
+ }
+
+ public int getUnitSize() {
+ return unitSize;
+ }
+
+ public void uploadKV(ExpandingByteBuffer buffer) {
+ buffer.putInt(Integer.BYTES + key.length + Byte.BYTES + (value != null ? Integer.BYTES + value.length : 0))
+ .putInt(key.length)
+ .put(key)
+ .put(isDeleted.value());
+ if (DeletionStatus.NOT_DELETED == isDeleted) {
+ buffer.putInt(value.length).put(value);
+ }
+ }
+
+ public static KVUnit read(MappedByteBuffer reader) {
+ int totalKvLength = reader.getInt();
+ int kLenght = reader.getInt();
+ var k = new byte[kLenght];
+ reader.get(k);
+ var isDeleted = DeletionStatus.of(reader.get());
+ if (DeletionStatus.NOT_DELETED == isDeleted) {
+ int vLenght = reader.getInt();
+ var v = new byte[vLenght];
+ reader.get(v);
+ return new KVUnit(k, v);
+ }
+ return new KVUnit(k);
+ }
+
+ public boolean isDeleted() {
+ return isDeleted == DeletionStatus.DELETED;
+ }
+
+ public DeletionStatus getDeletedStatus() {
+ return this.isDeleted;
+ }
+
+ @Override
+ public String toString() {
+ String keyString = key.length > 10 ? Arrays.toString(Arrays.copyOf(key, 10)) + "..." : Arrays.toString(key);
+ String valueString = value != null ? (value.length > 10 ? Arrays.toString(Arrays.copyOf(value, 10)) + "..." : Arrays.toString(value)) : "null";
+ return String.format("KVUnit{key=%s, value=%s, isDeleted=%s}", keyString, valueString, isDeleted);
+ }
+
+ @Override
+ public int compareTo(KVUnit kvUnit) {
+ return DBComparator.byteArrayComparator.compare(this.key, kvUnit.key);
+ }
+}
\ No newline at end of file
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/search/Checksums.java b/AtomDB/src/main/java/org/g2n/atomdb/search/Checksums.java
new file mode 100644
index 0000000..4f90db0
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/search/Checksums.java
@@ -0,0 +1,3 @@
+package org.g2n.atomdb.search;
+
+public record Checksums(long[] checksums) { }
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/search/Finder.java b/AtomDB/src/main/java/org/g2n/atomdb/search/Finder.java
new file mode 100644
index 0000000..8f0a822
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/search/Finder.java
@@ -0,0 +1,127 @@
+package org.g2n.atomdb.search;
+
+import org.g2n.atomdb.Compaction.Pointer;
+import org.g2n.atomdb.Compaction.PointerList;
+import org.g2n.atomdb.Compression.Lz4Compression;
+import org.g2n.atomdb.Constants.DBConstant;
+import com.github.benmanes.caffeine.cache.Cache;
+import org.g2n.atomdb.db.DBComparator;
+import org.g2n.atomdb.db.KVUnit;
+import org.g2n.atomdb.sstIo.MMappedReader;
+import org.g2n.atomdb.sstIo.ChannelBackedReader;
+import java.io.File;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+/**
+ * we should work on moving respective code to thier respective classes.
+ */
+public class Finder implements AutoCloseable{
+ private final File file;
+ private final PointerList pointerList;
+ private final MMappedReader reader;
+ private final Cache checksumsCache;
+
+ public Finder(File file, PointerList pointerList, Cache checksumsCache) throws IOException {
+ this.file = file;
+ // todo
+ // we need not mapp the whole fileToWrite rather map only required potion, ie we dont need header and pointers region
+ this.reader = new MMappedReader(file);
+ this.pointerList = pointerList;
+ this.checksumsCache = checksumsCache;
+ }
+
+ public KVUnit find(byte[] key, long keyChecksum) throws IOException {
+ Pointer pointer = getPointer(key);
+ reader.position((int) pointer.position());
+
+ Checksums check = checksumsCache.get(pointer, position -> {
+ var checksums = new long[DBConstant.CLUSTER_SIZE];
+ for (int i = 0; i < DBConstant.CLUSTER_SIZE; i++) {
+ checksums[i] = reader.getLong();
+ }
+ return new Checksums(checksums);
+ });
+
+ int index = getIndex(check, keyChecksum);
+ if (index == -1) {
+ return null;
+ }
+
+ return getLocation(key, keyChecksum, index, (int) pointer.position());
+ }
+
+ private int getIndex(Checksums check, long keyChecksum) {
+ long[] checksums = check.checksums();
+ for (int i = 0; i < checksums.length; i++) {
+ if (keyChecksum == checksums[i]) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ private KVUnit getLocation(byte[] key, long keyChecksum, int index, int initialPosition) throws IOException {
+ if (index == -1) {
+ return null;
+ }
+
+ // directly moving to location block
+ reader.position( initialPosition + Long.BYTES * DBConstant.CLUSTER_SIZE + index * Integer.BYTES);
+ int keyLocation = reader.getInt();
+ int nextKeyLocation = reader.getInt();
+ reader.position(initialPosition + Long.BYTES * DBConstant.CLUSTER_SIZE + (DBConstant.CLUSTER_SIZE + 1) * Integer.BYTES);
+ // todo use the common prefix, to maybe validate.
+ int commonPrefix = reader.getInt();
+ reader.position((int) (initialPosition +
+ Long.BYTES * DBConstant.CLUSTER_SIZE +
+ (DBConstant.CLUSTER_SIZE + 1) * Integer.BYTES
+ + Integer.BYTES + keyLocation));
+
+ int blockSizeToRead = nextKeyLocation - keyLocation;
+ var block = new byte[blockSizeToRead];
+ reader.getBytes(block);
+ byte[] decompress = Lz4Compression.getInstance().decompress(block);
+ var wrap = ByteBuffer.wrap(decompress);
+ int keyLength = wrap.getInt();
+ wrap.position(wrap.position() + keyLength);
+ byte isDeleted = wrap.get();
+ if (KVUnit.DeletionStatus.isDeleted(isDeleted)) return new KVUnit(key);
+ int valueLength = wrap.getInt();
+ byte[] bytes = new byte[valueLength];
+ wrap.get(bytes);
+ return new KVUnit(key, bytes);
+ }
+
+ private Pointer getPointer(byte[] key) {
+ int index = getCluster(key);
+ if (index == pointerList.size() -1) {
+ index--; // finding the last element.
+ }
+ return pointerList.get(index);
+ }
+
+ private int getCluster(byte[] key) {
+ int l = 0, h = pointerList.size() - 1;
+ while(l <= h) {
+ int mid = (l + h) >>> 1;
+ Pointer midPointer = pointerList.get(mid);
+ int compare = DBComparator.byteArrayComparator.compare(midPointer.key(), key);
+ if (compare < 0){
+ l = mid + 1;
+ }
+ else if (compare > 0) {
+ h = mid - 1;
+ }
+ else {
+ return mid;
+ }
+ }
+ return h;
+ }
+
+ @Override
+ public void close() throws Exception {
+ reader.close();
+ }
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/search/Search.java b/AtomDB/src/main/java/org/g2n/atomdb/search/Search.java
new file mode 100644
index 0000000..42261cf
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/search/Search.java
@@ -0,0 +1,134 @@
+package org.g2n.atomdb.search;
+
+import org.g2n.atomdb.Checksum.Crc32cChecksum;
+import org.g2n.atomdb.Compaction.Pointer;
+import org.g2n.atomdb.Compaction.Validator;
+import org.g2n.atomdb.Constants.DBConstant;
+import org.g2n.atomdb.Mem.ImmutableMem;
+import org.g2n.atomdb.Mem.ImmutableMemTable;
+import org.g2n.atomdb.Table.SSTInfo;
+import com.github.benmanes.caffeine.cache.Cache;
+import com.github.benmanes.caffeine.cache.Caffeine;
+import com.github.benmanes.caffeine.cache.LoadingCache;
+import org.g2n.atomdb.db.DBComparator;
+import org.g2n.atomdb.db.KVUnit;
+import org.g2n.atomdb.util.MaxMinAvg;
+
+import java.io.IOException;
+import java.util.*;
+import java.util.concurrent.ConcurrentSkipListSet;
+
+/**
+ * we should have a class which will tell us
+ * 1. data existence
+ * 2. data is of type deleted.
+ * 3. data is value
+ * 1beafb0b371cf3b51732a84d4d82acd7b4926ba1 commit is faster.
+ * https://github.com/theuntamed839/AtomDB/commit/1beafb0b371cf3b51732a84d4d82acd7b4926ba1
+ */
+
+public class Search implements AutoCloseable{
+
+ private final LoadingCache kvCache;
+ private final LoadingCache readerCache;
+ private final Cache checksumsCache;
+ private final HashMap removeMeAfterTestMap;
+ private ImmutableMem secondaryMem;
+ private final SortedSet fileList = new ConcurrentSkipListSet<>();
+ MaxMinAvg maker = new MaxMinAvg();
+
+ public Search() {
+ this.kvCache = Caffeine.newBuilder()
+ .maximumWeight(DBConstant.KEY_VALUE_CACHE_SIZE)
+ .weigher((byte[] k, KVUnit v) -> k.length + v.getUnitSize())
+ .build(this::findKey);
+ this.readerCache = Caffeine.newBuilder()
+ .maximumSize(900)
+ .build(this::getFinder);
+ this.checksumsCache = Caffeine.newBuilder()
+ .maximumWeight(500 * 1024 * 1024)
+ .weigher((Pointer pos, Checksums check) -> DBConstant.CLUSTER_SIZE * Long.BYTES)
+ .build();
+ this.secondaryMem = new ImmutableMemTable(new TreeMap<>(DBComparator.byteArrayComparator), 0);
+ this.removeMeAfterTestMap = new HashMap<>();
+ }
+
+ public void addSSTInfo(SSTInfo info) {
+ fileList.add(info);
+ }
+
+ public void removeSSTInfo(SSTInfo info) throws Exception {
+ fileList.remove(info);
+ Finder exists = readerCache.getIfPresent(info);
+ if (exists != null) {
+ exists.close();
+ }
+ readerCache.invalidate(info);
+ }
+
+ private Finder getFinder(SSTInfo sst) throws IOException {
+ return new Finder(sst.getSst(), sst.getPointers(), checksumsCache);
+ }
+
+ public KVUnit findKey(byte[] key) throws IOException {
+ KVUnit kvUnit = secondaryMem.get(key);
+ if (kvUnit != null) {
+ return kvUnit;
+ }
+ Crc32cChecksum crc32cChecksum = new Crc32cChecksum();
+ long keyChecksum = crc32cChecksum.compute(key);
+
+ int fileRequiredToSearch = 0;
+
+ for (SSTInfo sstInfo : fileList) {
+ if (sstInfo.getSstKeyRange().inRange(key) && sstInfo.mightContainElement(key)) {
+ fileRequiredToSearch++;
+// System.out.println("trying sstInfo="+sstInfo);
+// Validator.validateSSTBasedOnSearch(sstInfo, key);
+ Finder finder = readerCache.get(sstInfo);
+ var unit = finder.find(key, keyChecksum);
+ if (unit != null) {
+ removeMeAfterTestMap.put(fileRequiredToSearch, removeMeAfterTestMap.getOrDefault(fileRequiredToSearch, 0) + 1);
+ return unit;
+ }
+ }
+ }
+ removeMeAfterTestMap.put(fileRequiredToSearch, removeMeAfterTestMap.getOrDefault(fileRequiredToSearch, 0) + 1);
+// System.out.println(this.getClass().getName() + " :Key not found in any file");
+// fileList.forEach(each -> {
+// try {
+// Validator.validateSSTBasedOnSearch(each, key);
+// } catch (IOException e) {
+// throw new RuntimeException(e);
+// }
+// });
+ return null;
+ }
+
+ public void addSecondaryMemtable(ImmutableMem mem) {
+ this.secondaryMem = mem;
+ for (Iterator it = mem.getKeySetIterator(); it.hasNext(); ) {
+ byte[] key = it.next().getKey();
+ kvCache.invalidate(key);
+ }
+ }
+
+
+ @Override
+ public void close() throws Exception {
+ for (Map.Entry entry : removeMeAfterTestMap.entrySet()) {
+ System.out.println("numberOfFilesRequiredToSearch="+entry.getKey()+" numberOfTimesThisHappened="+entry.getValue());
+ }
+ for (Finder value : readerCache.asMap().values()) {
+ value.close();
+ }
+ kvCache.invalidateAll();
+ readerCache.invalidateAll();
+ kvCache.cleanUp();
+ readerCache.cleanUp();
+ }
+
+ public void printActiveFiles() {
+ fileList.forEach(each -> System.out.println("Active fileToWrite="+each.getSst().getName()));
+ }
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/sst/Header.java b/AtomDB/src/main/java/org/g2n/atomdb/sst/Header.java
new file mode 100644
index 0000000..250fa43
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/sst/Header.java
@@ -0,0 +1,177 @@
+package org.g2n.atomdb.sst;
+
+import org.g2n.atomdb.Checksum.CheckSumStatic;
+import org.g2n.atomdb.Level.Level;
+import org.g2n.atomdb.db.DBComparator;
+import org.g2n.atomdb.sstIo.ReaderInterface;
+import org.g2n.atomdb.sstIo.PrimitiveWriter;
+import org.g2n.atomdb.util.SizeOf;
+import org.g2n.atomdb.util.Util;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+
+/**
+ * closing of header is important as it provided easy debugging and safety
+ */
+
+public class Header implements AutoCloseable{
+ private static final long BS_POSITION = 2;
+ private static final int HEADER_POSITION = 0;
+ private final byte versionId;
+ private String fileName;
+ private int entries = Integer.MIN_VALUE;
+ private final byte[] sKey;
+ private final byte[] lKey;
+ private long binarySearchLocation = Long.MIN_VALUE;
+ private final Level level;
+ /**
+ * isWritten
+ * this bool is there for the safety for writing
+ * header is created with purpose
+ * header is created when reading then isWritten is marked as true
+ * while its actual reason being here is for writing
+ * that is if header is created then it needs to be written
+ * if this is written then it is set to true else it is already marked as false
+ *
+ * when header is closed this bool is checked along with entries and binarySearchLocation to Long.MIN_VALUE
+ * if bool is set to false or entries and binarySearchLocation is set to Long.MIN then the purpose of header is not servered
+ * to which a exception is thrown
+ */
+ private boolean isWritten = false;
+
+ public Header(byte versionId, Level level, long bs, int entries, byte[] sKey, byte[] lKey, long checksum) {
+ this.versionId = versionId;
+ this.level = level;
+ this.binarySearchLocation = bs;
+ this.entries = entries;
+ this.sKey = sKey;
+ this.lKey = lKey;
+ validation(checksum);
+ }
+
+ public Header(byte[] firstKey, byte[] lastKey, int size, byte sstVersion, Level levelZero, String tempFileName) {
+ this.versionId = sstVersion;
+ this.entries = size;
+ this.sKey = firstKey;
+ this.lKey = lastKey;
+ this.level = levelZero;
+ this.fileName = tempFileName;
+ this.isWritten = true; // this is used for safety of writing, since this constructor is used by reading there is writing required. that's why this is set to true
+ }
+
+ private void validation(long checksum) {
+ if (checksum != getKeysChecksum()) {
+ throw new RuntimeException("Mismatch of checksum");
+ }
+ Util.requireTrue(
+ DBComparator.byteArrayComparator.compare(
+ sKey, lKey
+ ) < 0, "found SmallVal>LargerVAl " + this);
+ }
+
+ @Override
+ public String toString() {
+ return "org.g2n.atomdb.sst.Header{" +
+ "versionId=" + versionId +
+ ", entries=" + entries +
+ ", sKey=" + new String(sKey) +
+ ", lKey=" + new String(lKey) +
+ ", binarySearchLocation=" + binarySearchLocation +
+ ", org.g2n.atomdb.Level.org.g2n.atomdb.Level=" + level +
+ ", FileName=" + fileName +
+ '}';
+ }
+
+ public long getVersionId() {
+ return versionId;
+ }
+
+ public Level getLevel() {
+ return level;
+ }
+
+ public int getEntries() {
+ return entries;
+ }
+
+ public byte[] getSmallestKey() {
+ return sKey;
+ }
+
+ public byte[] getLargestKey() {
+ return lKey;
+ }
+
+ public long getBinarySearchLocation() {
+ return binarySearchLocation;
+ }
+
+ public static Header getHeader(ReaderInterface sstReader) throws Exception {
+ var byteBuffer = sstReader.readSize(new byte[18], HEADER_POSITION, 18);
+ byte verId = byteBuffer.get();
+ byte levelID = byteBuffer.get();
+ long bs = byteBuffer.getLong();
+ int entries = byteBuffer.getInt();
+ int nextBlockLength = byteBuffer.getInt();
+
+ byteBuffer = sstReader.readSize(new byte[(int) nextBlockLength], nextBlockLength);
+
+ // todo key and value sizes are int
+ int sLength = byteBuffer.getInt();
+ byte[] sKey = new byte[sLength];
+ byteBuffer.get(sKey);
+
+ int lLength = byteBuffer.getInt();
+ byte[] lKey = new byte[lLength];
+ byteBuffer.get(lKey);
+
+ long checkSum = byteBuffer.getLong();
+
+ return new Header(verId, Level.fromID((int) levelID), bs, entries, sKey, lKey, checkSum);
+ }
+
+ public void write(PrimitiveWriter writer) throws Exception{
+// VID | LEV | BS | EN | Block_LEN | [ SK_LEN | SK | LK_LEN | LK | CH ]
+ // todo what about size exceeding bytebuffer length as well as the mappedBuffer length
+ writer.putByte(versionId)
+ .putByte(Level.toID(level))
+ .putLong(binarySearchLocation)
+ .putInt(entries)
+ .putInt(SizeOf.IntLength + sKey.length + SizeOf.IntLength + lKey.length + SizeOf.LongLength)
+ .putInt(sKey.length)
+ .putBytes(sKey)
+ .putInt(lKey.length)
+ .putBytes(lKey)
+ .putLong(getKeysChecksum());
+ isWritten = true;
+ }
+
+ private long getKeysChecksum() {
+ return CheckSumStatic.compute(sKey, lKey);
+ }
+
+ public void writeBS(FileChannel channel, ByteBuffer byteBuffer, long binarySearchLocation) throws IOException {
+ Util.requireEquals(this.binarySearchLocation, Long.MIN_VALUE, "overwriting of binary org.g2n.atomdb.search position, fileToWrite="+ fileName);
+ this.binarySearchLocation = binarySearchLocation;
+ byteBuffer.clear();
+ byteBuffer.putLong(binarySearchLocation)
+ .flip();
+ channel.write(byteBuffer, BS_POSITION);
+ }
+
+ public void writeBS(PrimitiveWriter writer, long binarySearchLocation) throws IOException {
+ Util.requireEquals(this.binarySearchLocation, Long.MIN_VALUE, "overwriting of binary org.g2n.atomdb.search position, fileToWrite="+ fileName);
+ this.binarySearchLocation = binarySearchLocation;
+// writer.writeAtPositionInIsolation(BS_POSITION, binarySearchLocation);
+ }
+
+
+ @Override
+ public void close() throws Exception {
+ Util.requireNotEquals(this.entries, Long.MIN_VALUE, "overwriting of entries, fileToWrite=" + fileName);
+ Util.requireNotEquals(this.binarySearchLocation, Long.MIN_VALUE, "overwriting of binary org.g2n.atomdb.search position, fileToWrite="+ fileName);
+ Util.requireTrue(isWritten, "header purpose is not served as the header is not written");
+ }
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/sst/KeyUnit.java b/AtomDB/src/main/java/org/g2n/atomdb/sst/KeyUnit.java
new file mode 100644
index 0000000..de2fe61
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/sst/KeyUnit.java
@@ -0,0 +1,35 @@
+package org.g2n.atomdb.sst;
+
+import org.g2n.atomdb.Checksum.CheckSumStatic;
+import org.g2n.atomdb.util.Util;
+
+public class KeyUnit {
+ public static final byte DELETE = 1;
+ public static final byte ADDED = 0;
+ private final long keyChecksum;
+ private byte[] key;
+ private byte isDelete;
+
+ private int valueSize;
+ public KeyUnit(byte[] key, long checkSum, byte isDelete, int valueSize) {
+ Util.requireTrue(CheckSumStatic.compute(key) == checkSum, "checksum mismatch key");
+ this.key = key;
+ this.keyChecksum = checkSum;
+ this.isDelete = isDelete;
+ this.valueSize = valueSize;
+ }
+
+ public byte[] getKey() {
+ return key;
+ }
+
+ public byte getIsDelete() {
+ return isDelete;
+ }
+
+ public int getValueSize() {
+ return valueSize;
+ }
+
+ public long getKeyChecksum() {return keyChecksum; }
+}
diff --git a/AtomDB/src/main/java/sst/MiddleBlock.java b/AtomDB/src/main/java/org/g2n/atomdb/sst/MiddleBlock.java
similarity index 58%
rename from AtomDB/src/main/java/sst/MiddleBlock.java
rename to AtomDB/src/main/java/org/g2n/atomdb/sst/MiddleBlock.java
index 5183e07..d5c843a 100644
--- a/AtomDB/src/main/java/sst/MiddleBlock.java
+++ b/AtomDB/src/main/java/org/g2n/atomdb/sst/MiddleBlock.java
@@ -1,8 +1,10 @@
-package sst;
+package org.g2n.atomdb.sst;
-import Checksum.CheckSum;
+import org.g2n.atomdb.Checksum.CheckSumStatic;
import com.google.common.hash.BloomFilter;
-import util.SizeOf;
+import org.g2n.atomdb.sstIo.ReaderInterface;
+import org.g2n.atomdb.sstIo.PrimitiveWriter;
+import org.g2n.atomdb.util.SizeOf;
import java.io.EOFException;
import java.io.FileOutputStream;
@@ -21,47 +23,64 @@ public static void writeBlock(FileChannel channel, ByteBuffer byteBuffer, Map.En
if (entry.getValue().getIsDelete() != ValueUnit.DELETE) {
byteBuffer.putLong(entry.getValue().getValue().length)
.put(entry.getValue().getValue())
- .putLong(CheckSum.compute(entry.getKey(), entry.getValue().getValue()));
+ .putLong(CheckSumStatic.compute(entry.getKey(), entry.getValue().getValue()));
} else {
- byteBuffer.putLong(CheckSum.compute(entry.getKey()));
+ byteBuffer.putLong(CheckSumStatic.compute(entry.getKey()));
}
byteBuffer.flip();
channel.write(byteBuffer);
}
+ public static void writeMiddleBlock(PrimitiveWriter writer, byte[] key, ValueUnit value) {
+ if (value.getIsDelete() == ValueUnit.DELETE) {
+ writer.putInt(key.length)
+ .putBytes(key)
+ .putLong(CheckSumStatic.compute(key))
+ .putByte(value.getIsDelete());
+ } else {
+ writer.putInt(key.length)
+ .putBytes(key)
+ .putLong(CheckSumStatic.compute(key))
+ .putByte(value.getIsDelete())
+ .putInt(value.getValue().length)
+ .putBytes(value.getValue())
+ .putLong(CheckSumStatic.compute(key, value.getValue()));
+ }
+ }
+
public static void writePointers(FileChannel channel, ByteBuffer byteBuffer, List pointers) throws Exception {
byteBuffer.clear();
- int limit = byteBuffer.limit();
- limit = (limit / SizeOf.LongLength) - 1;
-
- for(int i = 0; i < pointers.size(); ) {
- for (int j = 0; j < limit &&
- i < pointers.size() &&
- byteBuffer.remaining() > SizeOf.LongLength; j++, i++) {
-
- byteBuffer.putLong(pointers.get(i));
+ for (Long pointer : pointers) {
+ if (byteBuffer.remaining() >= SizeOf.LongLength) {
+ byteBuffer.putLong(pointer);
+ } else {
+ byteBuffer.flip();
+ channel.write(byteBuffer);
+ byteBuffer.clear();
+ byteBuffer.putLong(pointer);
}
- byteBuffer.flip();
- channel.write(byteBuffer);
- byteBuffer.compact();
+ //instead of compact, clear can be used. or we can check for remaining() if > 0 then compact else clear()
}
+ byteBuffer.flip();
+ channel.write(byteBuffer);
+ byteBuffer.clear();
+
if (byteBuffer.position() != 0) {
throw new Exception("pointers not written fully");
}
- // sure code that works
-// byteBuffer.clear();
-// for (Long pointer : pointers) {
-// byteBuffer.clear()
-// .putLong(pointer)
-// .flip();
-// channel.write(byteBuffer);
-// }
+ }
+
+ public static void writePointers(PrimitiveWriter writer, List pointers) {
+ // todo can be improved.
+ for (Long pointer : pointers) {
+ writer.putLong(pointer);
+ }
}
/**
* // todo
- * adding footer to contain the meta data about the sst
- * like total size of that sst and maybe a checksum
+ * adding footer to contain the meta data about the org.g2n.atomdb.sst
+ * like total size of that org.g2n.atomdb.sst and maybe a checksum
* // todo
* further reading pointer can be improved by getting only
* limited amount of pointers and not reading them all at once
@@ -101,7 +120,7 @@ public static List readPointers(FileChannel channel,
}
if (searchList.get(0) == 0) {
- throw new EOFException("found zero in sst binary search");
+ throw new EOFException("found zero in org.g2n.atomdb.sst binary org.g2n.atomdb.search");
}
return searchList;
@@ -124,7 +143,7 @@ public static List readPointers(FileChannel channel,
//
// // hack
// if (searchList.get(0) == 0) {
-// throw new EOFException("found zero in sst binary search");
+// throw new EOFException("found zero in org.g2n.atomdb.sst binary org.g2n.atomdb.search");
// }
//
// return searchList;
@@ -169,7 +188,7 @@ public static Map.Entry readKeyValue(FileChannel channel, Byt
long checksumPosition = offset + Long.BYTES + key.length + Short.BYTES + Long.BYTES + value.length;
verifyChecksum(byteBuffer, channel, checksumPosition, key, value);
- return Map.entry(key, new ValueUnit(value, isDelete));
+ return Map.entry(key, new ValueUnit(value, (byte) isDelete));
}
private static void verifyChecksum(ByteBuffer byteBuffer, FileChannel channel,long position, byte[] key, byte[] value) throws Exception {
@@ -178,8 +197,8 @@ private static void verifyChecksum(ByteBuffer byteBuffer, FileChannel channel,lo
channel.read(byteBuffer, position); // previously it was channel.read(byteBuffer);
byteBuffer.flip();
long checksum = byteBuffer.getLong();
- if (CheckSum.compute(key, value) != checksum) {
- throw new Exception("Checksum not matching");
+ if (CheckSumStatic.compute(key, value) != checksum) {
+ throw new Exception("AtomChecksum not matching");
}
}
@@ -189,12 +208,46 @@ private static void verifyChecksum0(ByteBuffer byteBuffer, FileChannel channel,l
channel.read(byteBuffer, position);// previously it was channel.read(byteBuffer);
byteBuffer.flip();
long checksum = byteBuffer.getLong();
- if (CheckSum.compute(key) != checksum) {
- throw new Exception("Checksum not matching");
+ if (CheckSumStatic.compute(key) != checksum) {
+ throw new Exception("AtomChecksum not matching");
}
}
public static void writeBloom(FileOutputStream outputStream, BloomFilter filter) throws IOException {
filter.writeTo(outputStream);
}
-}
+
+ public static KeyUnit getKeyUnit(ReaderInterface reader, long position) {
+ ByteBuffer byteBuffer = reader.readSize(new byte[SizeOf.IntLength], position, SizeOf.IntLength);
+ int keySize = byteBuffer.getInt();
+ int readSize = keySize + SizeOf.LongLength + Byte.BYTES + SizeOf.IntLength;
+ byteBuffer = reader.readSize(new byte[readSize], readSize);
+ byte[] key = new byte[keySize];
+ byteBuffer.get(key);
+ long checkSum = byteBuffer.getLong();
+ byte isDelete = byteBuffer.get();
+ if (isDelete != KeyUnit.DELETE) {
+ return new KeyUnit(key, checkSum, isDelete, byteBuffer.getInt());
+ }
+ return new KeyUnit(key, checkSum, isDelete, -1);
+ }
+
+ public static byte[] getValueUnit(ReaderInterface reader, long position, KeyUnit keyUnit) {
+ int valueSize = keyUnit.getValueSize();
+ ByteBuffer byteBuffer = reader.readSize(new byte[valueSize + SizeOf.LongLength],
+ position + SizeOf.IntLength + keyUnit.getKey().length + SizeOf.LongLength + Byte.BYTES + SizeOf.IntLength,
+ valueSize + SizeOf.LongLength);
+ byte[] value = new byte[valueSize];
+ byteBuffer.get(value);
+ long checkSum = byteBuffer.getLong();
+ if (CheckSumStatic.compute(keyUnit.getKey(), value) != checkSum) {
+ throw new RuntimeException("AtomChecksum mismatch");
+ }
+ return value;
+ }
+
+ public static KeyUnit getKeyUnit(byte[] bytes, ValueUnit valueUnit) {
+ return new KeyUnit(bytes, CheckSumStatic.compute(bytes), valueUnit.getIsDelete(),
+ valueUnit.getIsDelete() != ValueUnit.DELETE ? valueUnit.getValue().length : -1);
+ }
+}
\ No newline at end of file
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/sst/ValueUnit.java b/AtomDB/src/main/java/org/g2n/atomdb/sst/ValueUnit.java
new file mode 100644
index 0000000..ffdca7c
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/sst/ValueUnit.java
@@ -0,0 +1,46 @@
+package org.g2n.atomdb.sst;
+
+import java.util.Arrays;
+import java.util.Objects;
+
+/**
+ * TODO
+ * can be improved to take byte array directly and make valueUnit out of it
+ */
+public class ValueUnit {
+ public static final byte DELETE = 1;
+ public static final byte ADDED = 0;
+ private byte[] value;
+ private byte isDelete;
+
+ @Override
+ public String toString() {
+ return "org.g2n.atomdb.sst.ValueUnit{" +
+ "value=" + Arrays.toString(value) +
+ ", isDelete=" + isDelete +
+ '}';
+ }
+
+ public byte[] getValue() {
+ return value;
+ }
+
+ public byte getIsDelete() {
+ return isDelete;
+ }
+
+ public ValueUnit(byte[] value, byte isDelete) {
+ // todo the isDelete from this constructor.
+ Objects.requireNonNull(value);
+ this.value = value;
+ this.isDelete = isDelete;
+ }
+
+ public ValueUnit(byte isDelete) {
+ this.isDelete = isDelete;
+ }
+
+ public int getSize() {
+ return 1 + (value != null ? value.length : 0);
+ }
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/sstIo/ByteBufferBackedWriter.java b/AtomDB/src/main/java/org/g2n/atomdb/sstIo/ByteBufferBackedWriter.java
new file mode 100644
index 0000000..b92bd6c
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/sstIo/ByteBufferBackedWriter.java
@@ -0,0 +1,86 @@
+package org.g2n.atomdb.sstIo;
+
+import org.g2n.atomdb.Constants.DBConstant;
+import org.g2n.atomdb.db.ExpandingByteBuffer;
+
+import java.io.File;
+import java.io.IOException;
+import java.lang.foreign.Arena;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.file.StandardOpenOption;
+
+import static java.nio.channels.FileChannel.MapMode.READ_WRITE;
+
+public class ByteBufferBackedWriter extends ChannelBackedWriter {
+ private final ByteBuffer buffer = ByteBuffer.allocateDirect(DBConstant.WRITER_BUFFER_SIZE);
+
+ public ByteBufferBackedWriter(File file) {
+ this.fileToWrite = file;
+ }
+
+ public PrimitiveWriter putLong(long item) {
+ buffer.putLong(item);
+ return this;
+ }
+
+ public PrimitiveWriter putInt(int item) {
+ buffer.putInt(item);
+ return this;
+ }
+
+ public PrimitiveWriter putByte(byte item) {
+ buffer.put(item);
+ return this;
+ }
+
+ public PrimitiveWriter putBytes(byte[] item) {
+ buffer.put(item);
+ return this;
+ }
+
+ @Override
+ public PrimitiveWriter putBytes(byte[] item, int offset, int length) {
+ buffer.put(item, offset, length);
+ return this;
+ }
+
+ @Override
+ public void write(int b) throws IOException {
+ putByte((byte) b);
+ }
+
+ @Override
+ public void write(byte[] b) throws IOException {
+ putBytes(b);
+ }
+
+ @Override
+ public void write(byte[] b, int off, int len) throws IOException {
+ putBytes(b, off, len);
+ }
+
+ @Override
+ public long position() throws IOException {
+ return buffer.position();
+ }
+
+ @Override
+ public void position(long positionToMove) {
+ buffer.position((int) positionToMove);
+ }
+
+ @Override
+ public void close() throws IOException {
+ buffer.flip();
+ try (
+ var fileChannel = FileChannel.open(fileToWrite.toPath(), StandardOpenOption.READ, StandardOpenOption.WRITE);
+ var arena = Arena.ofConfined())
+ {
+ var fileSegment = fileChannel.map(READ_WRITE, 0, buffer.remaining(), arena);
+ fileSegment.asByteBuffer().put(buffer);
+ } finally {
+ buffer.clear();
+ }
+ }
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/sstIo/ChannelBackedReader.java b/AtomDB/src/main/java/org/g2n/atomdb/sstIo/ChannelBackedReader.java
new file mode 100644
index 0000000..7d311a9
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/sstIo/ChannelBackedReader.java
@@ -0,0 +1,12 @@
+package org.g2n.atomdb.sstIo;
+
+import java.io.File;
+import java.io.InputStream;
+import java.io.RandomAccessFile;
+import java.nio.channels.FileChannel;
+
+public abstract class ChannelBackedReader extends InputStream implements PrimitiveReader {
+ protected File file;
+ protected RandomAccessFile randomAccessFile;
+ protected FileChannel channel;
+}
\ No newline at end of file
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/sstIo/ChannelBackedWriter.java b/AtomDB/src/main/java/org/g2n/atomdb/sstIo/ChannelBackedWriter.java
new file mode 100644
index 0000000..c90aca8
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/sstIo/ChannelBackedWriter.java
@@ -0,0 +1,10 @@
+package org.g2n.atomdb.sstIo;
+
+import java.io.File;
+import java.io.OutputStream;
+import java.nio.channels.FileChannel;
+
+public abstract class ChannelBackedWriter extends OutputStream implements PrimitiveWriter {
+ protected File fileToWrite;
+ protected FileChannel channel;
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/sstIo/MMappedReader.java b/AtomDB/src/main/java/org/g2n/atomdb/sstIo/MMappedReader.java
new file mode 100644
index 0000000..1a19d6b
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/sstIo/MMappedReader.java
@@ -0,0 +1,77 @@
+package org.g2n.atomdb.sstIo;
+
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.lang.foreign.Arena;
+import java.nio.ByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.file.StandardOpenOption;
+
+import static java.nio.channels.FileChannel.MapMode.READ_WRITE;
+
+public class MMappedReader extends InputStream {
+ private final Arena arena;
+ private final ByteBuffer map;
+ private final FileChannel channel;
+
+ public MMappedReader(File file) throws IOException {
+ //System.out.println("Constructor for fileToWrite="+fileToWrite.getName());
+ this.channel = FileChannel.open(file.toPath(), StandardOpenOption.READ);
+ this.arena = Arena.ofShared();
+ this.map = this.channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size(), arena).asByteBuffer();
+ }
+
+ // input stream
+ @Override
+ public int read() throws IOException {
+ return getByte();
+ }
+
+ @Override
+ public int read(byte[] b) throws IOException {
+ return getBytes(b);
+ }
+
+ @Override
+ public int read(byte[] b, int off, int len) throws IOException {
+ return getBytes(b, off, len);
+ }
+
+ public long getLong() {
+ return map.getLong();
+ }
+
+ public int getInt() {
+ return map.getInt();
+ }
+
+ public byte getByte() {
+ return map.get();
+ }
+
+ public int getBytes(byte[] item) {
+ map.get(item);
+ return item.length;
+ }
+
+ public int getBytes(byte[] item, int offset, int length) {
+ map.get(item, offset, length);
+ return item.length;
+ }
+
+ public long position() {
+ return map.position();
+ }
+
+ public void position(int positionToMove) {
+ map.position(positionToMove);
+ }
+
+ @Override
+ public void close() throws IOException {
+ arena.close();
+ channel.close();
+ }
+}
\ No newline at end of file
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/sstIo/PrimitiveReader.java b/AtomDB/src/main/java/org/g2n/atomdb/sstIo/PrimitiveReader.java
new file mode 100644
index 0000000..adc9a4b
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/sstIo/PrimitiveReader.java
@@ -0,0 +1,13 @@
+package org.g2n.atomdb.sstIo;
+
+import java.io.IOException;
+
+public interface PrimitiveReader extends AutoCloseable{
+ long getLong();
+ int getInt();
+ byte getByte();
+ int getBytes(byte[] item);
+ int getBytes(byte[] item, int offset, int length);
+ long position() throws IOException;
+ void position(int positionToMove);
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/sstIo/PrimitiveWriter.java b/AtomDB/src/main/java/org/g2n/atomdb/sstIo/PrimitiveWriter.java
new file mode 100644
index 0000000..f5b3c6f
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/sstIo/PrimitiveWriter.java
@@ -0,0 +1,21 @@
+package org.g2n.atomdb.sstIo;
+
+import java.io.IOException;
+
+public interface PrimitiveWriter extends AutoCloseable {
+ PrimitiveWriter putLong(long item);
+ PrimitiveWriter putInt(int item);
+ PrimitiveWriter putByte(byte item);
+ PrimitiveWriter putBytes(byte[] item);
+ PrimitiveWriter putBytes(byte[] item, int offset, int length);
+ long position() throws IOException;
+ void position(long positionToMove);
+}
+
+
+/**
+ * Todo
+ * 1. direct mappedByteBuffer which directly writes based on remaining.
+ * 2. buffer using the bytebuffer and writes in chunk where our buffer is fixed 4mb.
+ * 3.
+ */
\ No newline at end of file
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/sstIo/ReaderInterface.java b/AtomDB/src/main/java/org/g2n/atomdb/sstIo/ReaderInterface.java
new file mode 100644
index 0000000..25780d0
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/sstIo/ReaderInterface.java
@@ -0,0 +1,6 @@
+package org.g2n.atomdb.sstIo;
+
+import java.io.InputStream;
+
+public abstract class ReaderInterface extends InputStream implements SSTReaderInterface {
+}
\ No newline at end of file
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/sstIo/SSTHeader.java b/AtomDB/src/main/java/org/g2n/atomdb/sstIo/SSTHeader.java
new file mode 100644
index 0000000..09f6b42
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/sstIo/SSTHeader.java
@@ -0,0 +1,234 @@
+package org.g2n.atomdb.sstIo;
+
+import org.g2n.atomdb.Constants.DBConstant;
+import org.g2n.atomdb.Level.Level;
+import org.g2n.atomdb.db.ExpandingByteBuffer;
+
+import java.lang.reflect.Field;
+import java.nio.ByteBuffer;
+
+public class SSTHeader{
+ private final Level level;
+ private final byte checksumType;
+ private final byte compressionType;
+ private final byte numberOfKeysInSingleCluster;
+ private final byte shortestCommonPrefixUsed;
+ private int numberOfEntries;
+ private final byte sstVersion;
+ private int pointersPosition;
+ private int filterPosition;
+ public static final int TOTAL_HEADER_SIZE = 18;
+
+ public static SSTHeader getDefault(Level level) {
+ return new SSTHeader(
+ DBConstant.SST_VERSION,
+ level,
+ DBConstant.CRC32C_CHECKSUM_TYPE,
+ DBConstant.LZ4_COMPRESSION_TYPE,
+ DBConstant.CLUSTER_SIZE,
+ DBConstant.SHORTEST_COMMON_PREFIX_USED);
+ }
+
+ public SSTHeader(byte sstVersion, Level level, byte checksumType,
+ byte compressionType, byte numberOfKeysInSingleCluster,
+ byte shortestCommonPrefixUsed) {
+ this.sstVersion = sstVersion;
+ this.level = level;
+ this.checksumType = checksumType;
+ this.compressionType = compressionType;
+ this.numberOfKeysInSingleCluster = numberOfKeysInSingleCluster;
+ this.shortestCommonPrefixUsed = shortestCommonPrefixUsed;
+ this.numberOfEntries = -1;
+ this.pointersPosition = -1;
+ this.filterPosition = -1;
+ }
+
+ public SSTHeader(byte sstVersion,
+ byte level,
+ byte checksumType,
+ byte compressionType,
+ byte numberOfKeysInSingleCluster,
+ byte shortestCommonPrefixUsed,
+ int numberOfEntries,
+ int filterPosition,
+ int pointersPosition) {
+ this.sstVersion = sstVersion;
+ this.level = Level.fromID(level);
+ this.checksumType = checksumType;
+ this.compressionType = compressionType;
+ this.numberOfKeysInSingleCluster = numberOfKeysInSingleCluster;
+ this.shortestCommonPrefixUsed = shortestCommonPrefixUsed;
+ this.numberOfEntries = numberOfEntries;
+ this.filterPosition = filterPosition;
+ this.pointersPosition = pointersPosition;
+ }
+
+ public SSTHeader(SSTHeader header) {
+ this.sstVersion = header.sstVersion;
+ this.level = header.level;
+ this.checksumType = header.checksumType;
+ this.compressionType = header.compressionType;
+ this.numberOfKeysInSingleCluster = header.numberOfKeysInSingleCluster;
+ this.shortestCommonPrefixUsed = header.shortestCommonPrefixUsed;
+ this.numberOfEntries = header.numberOfEntries;
+ this.filterPosition = header.filterPosition;
+ this.pointersPosition = header.pointersPosition;
+ }
+
+ public void check() {
+ // todo more security checks
+ if (pointersPosition <= 0 || numberOfEntries <= 0) {
+ throw new RuntimeException("Header without bs and number of entries found");
+ }
+ }
+
+ public void storeAsBytes(ChannelBackedWriter writer) {
+ validateIfAllFieldTakenIntoConsideration(9);
+ writer.putByte(sstVersion)
+ .putByte(level.value())
+ .putByte(checksumType)
+ .putByte(compressionType)
+ .putByte(numberOfKeysInSingleCluster)
+ .putByte(shortestCommonPrefixUsed)
+ .putInt(numberOfEntries)
+ .putInt(filterPosition)
+ .putInt(pointersPosition);
+ }
+
+ public void storeAsBytes(ExpandingByteBuffer writer) {
+ validateIfAllFieldTakenIntoConsideration(9); // todo should we remove this safety stuff ?
+ writer.put(sstVersion)
+ .put(level.value())
+ .put(checksumType)
+ .put(compressionType)
+ .put(numberOfKeysInSingleCluster)
+ .put(shortestCommonPrefixUsed)
+ .putInt(numberOfEntries)
+ .putInt(filterPosition)
+ .putInt(pointersPosition);
+ }
+
+ public int totalHeaderSize() {
+ validateIfAllFieldTakenIntoConsideration(9);
+ return Byte.BYTES + // version
+ Byte.BYTES + // level
+ Byte.BYTES + // checksum type
+ Byte.BYTES + // compression type
+ Byte.BYTES + // number of keys in chunk
+ Byte.BYTES + // shortest common prefix used
+ Integer.BYTES + // number of entries
+ Integer.BYTES + // filter
+ Integer.BYTES;
+ }
+
+ public static SSTHeader getHeader(MMappedReader reader) {
+ var bytes = new byte[TOTAL_HEADER_SIZE];
+ reader.getBytes(bytes);
+ var buffer = ByteBuffer.wrap(bytes);
+ return new SSTHeader(buffer.get(), buffer.get(), buffer.get(), buffer.get(), buffer.get(), buffer.get(), buffer.getInt(), buffer.getInt(), buffer.getInt());
+ }
+
+ private void validateIfAllFieldTakenIntoConsideration(int fields) {
+ Field[] declaredFields = getClass().getDeclaredFields();
+ int staticCount = 0;
+ for (Field declaredField : declaredFields) {
+ if (java.lang.reflect.Modifier.isStatic(declaredField.getModifiers())) {
+ staticCount++;
+ }
+ }
+ if (getClass().getDeclaredFields().length - staticCount != fields) {
+ throw new RuntimeException("Modified " + getClass().getName() + " but didn't modify the total count");
+ }
+ }
+
+ public void setEntries(int count) {
+ this.numberOfEntries = count;
+ }
+
+ public void setFilterPosition(int position) {
+ this.filterPosition = position;
+ }
+
+ public void setPointersPosition(int position) {
+ this.pointersPosition = position;
+ }
+
+ public void writeRemaining(ChannelBackedWriter writer) {
+ writer.position(Byte.BYTES * 6);
+ writer.putInt(numberOfEntries)
+ .putInt(filterPosition)
+ .putInt(pointersPosition);
+ }
+
+ public void writeRemaining(ExpandingByteBuffer writer) {
+ writer.position(Byte.BYTES * 6);
+ writer.putInt(numberOfEntries)
+ .putInt(filterPosition)
+ .putInt(pointersPosition);
+ }
+
+ public void writeSSTHeaderData(ExpandingByteBuffer writer) {
+ validateIfAllFieldTakenIntoConsideration(9); // todo should we remove this safety stuff ?
+ writer.put(sstVersion)
+ .put(level.value())
+ .put(checksumType)
+ .put(compressionType)
+ .put(numberOfKeysInSingleCluster)
+ .put(shortestCommonPrefixUsed)
+ .putInt(numberOfEntries)
+ .putInt(filterPosition)
+ .putInt(pointersPosition);
+ }
+
+
+ public Level getLevel() {
+ return level;
+ }
+
+ public byte getChecksumType() {
+ return checksumType;
+ }
+
+ public byte getCompressionType() {
+ return compressionType;
+ }
+
+ public byte getNumberOfKeysInSingleCluster() {
+ return numberOfKeysInSingleCluster;
+ }
+
+ public byte getShortestCommonPrefixUsed() {
+ return shortestCommonPrefixUsed;
+ }
+
+ public int getNumberOfEntries() {
+ return numberOfEntries;
+ }
+
+ public byte getSstVersion() {
+ return sstVersion;
+ }
+
+ public int getPointersPosition() {
+ return pointersPosition;
+ }
+
+ public int getFilterPosition() {
+ return filterPosition;
+ }
+
+ @Override
+ public String toString() {
+ return "SSTHeader{" +
+ "level=" + level +
+ ", checksumType=" + checksumType +
+ ", compressionType=" + compressionType +
+ ", numberOfKeysInSingleCluster=" + numberOfKeysInSingleCluster +
+ ", shortestCommonPrefixUsed=" + shortestCommonPrefixUsed +
+ ", numberOfEntries=" + numberOfEntries +
+ ", sstVersion=" + sstVersion +
+ ", pointersPosition=" + pointersPosition +
+ ", filterPosition=" + filterPosition +
+ '}';
+ }
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/sstIo/SSTKeyRange.java b/AtomDB/src/main/java/org/g2n/atomdb/sstIo/SSTKeyRange.java
new file mode 100644
index 0000000..e8b35b9
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/sstIo/SSTKeyRange.java
@@ -0,0 +1,96 @@
+package org.g2n.atomdb.sstIo;
+
+import org.g2n.atomdb.Checksum.AtomChecksum;
+import org.g2n.atomdb.Checksum.Crc32cChecksum;
+import org.g2n.atomdb.db.DBComparator;
+import org.g2n.atomdb.util.Util;
+
+import java.util.Arrays;
+
+public class SSTKeyRange {
+ private final byte[] smallest;
+ private final byte[] greatest;
+ private final long checksum;
+ private final int size;
+
+ public SSTKeyRange(byte[] smallest, byte[] greatest) {
+ Validate(smallest, greatest);
+ this.smallest = smallest;
+ this.greatest = greatest;
+ AtomChecksum checksumProvide = new Crc32cChecksum();
+ this.checksum = checksumProvide.compute(smallest, greatest);
+ this.size = smallest.length + greatest.length + Integer.BYTES * 2 + Long.BYTES;
+ }
+
+ public SSTKeyRange(byte[] smallest, byte[] greatest, long checksumProvided) {
+ Validate(smallest, greatest);
+ long computedChecksum = computeChecksum(smallest, greatest);
+ Util.requireTrue(checksumProvided == computedChecksum, "AtomChecksum mismatch");
+ this.smallest = smallest;
+ this.greatest = greatest;
+ this.checksum = computedChecksum;
+ this.size = smallest.length + greatest.length + Integer.BYTES * 2 + Long.BYTES;
+ }
+
+ private static long computeChecksum(byte[] first, byte[] last) {
+ AtomChecksum checksumProvide = new Crc32cChecksum();
+ return checksumProvide.compute(first, last);
+ }
+
+
+ private void Validate(byte[] first, byte[] last) {
+ Util.requireTrue(DBComparator.byteArrayComparator.compare(first, last) < 0, "First key should be smaller than greatest key");
+ }
+
+ public int getRequiredSizeToStoreKeyRange() {
+ return size;
+ }
+
+ public void storeAsBytes(ChannelBackedWriter writer) {
+ writer.putInt(smallest.length)
+ .putBytes(smallest)
+ .putInt(greatest.length)
+ .putBytes(greatest)
+ .putLong(checksum);
+ }
+
+ public byte[] getSmallest() {
+ return smallest;
+ }
+
+ public byte[] getGreatest() {
+ return greatest;
+ }
+
+ public byte[] start() {
+ return smallest;
+ }
+
+ public byte[] end() {
+ return greatest;
+ }
+
+ public long getChecksum() {
+ return checksum;
+ }
+
+ public boolean inRange(byte[] key) {
+ return DBComparator.byteArrayComparator.compare(smallest, key) <= 0 &&
+ DBComparator.byteArrayComparator.compare(greatest, key) >= 0;
+ }
+
+ public boolean overLapping(SSTKeyRange givenRange) {
+ return (DBComparator.byteArrayComparator.compare(smallest, givenRange.getGreatest()) <= 0 &&
+ DBComparator.byteArrayComparator.compare(greatest, givenRange.getSmallest()) >= 0);
+ }
+
+ @Override
+ public String toString() {
+ return "SSTKeyRange{" +
+ "smallest=" + new String(smallest) +
+ ", greatest=" + new String(greatest) +
+ ", checksum=" + checksum +
+ ", size=" + size +
+ '}';
+ }
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/sstIo/SSTReaderInterface.java b/AtomDB/src/main/java/org/g2n/atomdb/sstIo/SSTReaderInterface.java
new file mode 100644
index 0000000..f21ecd4
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/sstIo/SSTReaderInterface.java
@@ -0,0 +1,8 @@
+package org.g2n.atomdb.sstIo;
+
+import java.nio.ByteBuffer;
+
+public interface SSTReaderInterface extends AutoCloseable{
+ ByteBuffer readSize(byte[] arr, long position, int length);
+ ByteBuffer readSize(byte[] arr, int length);
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/trash.java b/AtomDB/src/main/java/org/g2n/atomdb/trash.java
new file mode 100644
index 0000000..5db616d
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/trash.java
@@ -0,0 +1,84 @@
+package org.g2n.atomdb;
+
+import net.jpountz.lz4.*;
+import org.xerial.snappy.Snappy;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Random;
+
+public class trash {
+ public static void main(String[] args) throws Exception {
+ int numberOfChecksums = 1000;
+ ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES * numberOfChecksums);
+ Random random = new Random();
+ for (int i = 0; i < numberOfChecksums; i++) {
+ buffer.putLong(random.nextLong());
+ }
+ buffer.flip();
+ var allChecksums = new byte[buffer.limit()];
+ System.out.println("buffer filled="+buffer.limit());
+ buffer.get(allChecksums);
+ byte[] compress = Snappy.compress(allChecksums);
+ System.out.println("uncompressed size="+allChecksums.length);
+ System.out.println("compressed size="+compress.length);
+ assert Arrays.compare(allChecksums, Snappy.uncompress(compress)) == 0;
+
+ LZ4Factory lz4Factory = LZ4Factory.nativeInstance();
+ LZ4Compressor lz4FastCompressor = lz4Factory.fastCompressor();
+ LZ4FastDecompressor decompressor = lz4Factory.fastDecompressor();
+
+ Benchmark(lz4FastCompressor, decompressor, 10);
+ Benchmark(lz4FastCompressor, decompressor, 1000);
+ Benchmark(lz4FastCompressor, decompressor, 10000);
+ Benchmark(lz4FastCompressor, decompressor, 100000);
+ Benchmark(lz4FastCompressor, decompressor, 1000000);
+ Benchmark(lz4FastCompressor, decompressor, 10000000);
+
+// String generaatedString = new String(array, StandardCharsets.UTF_8);
+// System.out.println(generaatedString);
+// var a = generaatedString.getBytes(StandardCharsets.UTF_8);
+// System.out.println("a original="+a.length);
+// System.out.println("a snappy="+ Snappy.compress(a).length);
+// System.out.println("a lsz="+lz4Compressor.compress(a).length);
+ }
+
+ private static void Benchmark(LZ4Compressor lz4FastCompressor, LZ4FastDecompressor decompressor, int n) throws IOException {
+ List values = new ArrayList<>(n);
+ for (int i = 0; i < n; i++) {
+ byte[] array = new byte[100];
+ new Random().nextBytes(array);
+ values.add(array);
+ }
+ // snappy
+ long start, end;
+ start = System.nanoTime();
+ for (byte[] value : values) {
+ byte[] compress1 = Snappy.compress(value);
+ byte[] uncompress = Snappy.uncompress(compress1);
+ assert Arrays.compare(value, uncompress) == 0;
+ }
+ end = System.nanoTime();
+ System.out.println("snappy took for encode and decode="+(end - start));
+
+ // LZ4
+ start = System.nanoTime();
+ LZ4CompressorWithLength lz4CompressorWithLength = new LZ4CompressorWithLength(lz4FastCompressor);
+ LZ4DecompressorWithLength lz4DecompressorWithLength = new LZ4DecompressorWithLength(decompressor);
+ for (byte[] value : values) {
+ byte[] compress1 = lz4CompressorWithLength.compress(value);
+ byte[] decompress = lz4DecompressorWithLength.decompress(compress1);
+ assert Arrays.compare(value, decompress) == 0;
+ }
+ end = System.nanoTime();
+ System.out.println("lz4 took for encode and decode="+(end - start));
+ }
+}
+/**
+ * a original=943
+ * a snappy=654
+ * a lsz=675
+ */
\ No newline at end of file
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/util/ByteBufferSupport.java b/AtomDB/src/main/java/org/g2n/atomdb/util/ByteBufferSupport.java
new file mode 100644
index 0000000..6d290d5
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/util/ByteBufferSupport.java
@@ -0,0 +1,64 @@
+package org.g2n.atomdb.util;
+
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.MethodType;
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.nio.ByteBuffer;
+import java.nio.MappedByteBuffer;
+
+// with the help of
+// https://stackoverflow.com/questions/2972986/how-to-unmap-a-file-from-memory-mapped-using-filechannel-in-java
+// https://github.com/dain/leveldb/blob/130db6965ebba2c19106c5355bee0c8dc59f57db/leveldb/src/main/java/org/iq80/leveldb/util/ByteBufferSupport.java
+public final class ByteBufferSupport {
+ private static final MethodHandle INVOKE_CLEANER;
+
+ static {
+ MethodHandle invoker;
+ try {
+ // todo check if our code works with java 8 or so... if it doesn't work then delete the catch block.
+ // Java 9 added an invokeCleaner method to Unsafe to work around
+ // module visibility issues for code that used to rely on DirectByteBuffer's cleaner()
+ Class> unsafeClass = Class.forName("sun.misc.Unsafe");
+ Field theUnsafe = unsafeClass.getDeclaredField("theUnsafe");
+ theUnsafe.setAccessible(true);
+ invoker = MethodHandles.lookup()
+ .findVirtual(unsafeClass, "invokeCleaner", MethodType.methodType(void.class, ByteBuffer.class))
+ .bindTo(theUnsafe.get(null));
+ }
+ catch (Exception e) {
+ // fall back to pre-java 9 compatible behavior
+ try {
+ Class> directByteBufferClass = Class.forName("java.nio.DirectByteBuffer");
+ Class> cleanerClass = Class.forName("sun.misc.Cleaner");
+
+ Method cleanerMethod = directByteBufferClass.getDeclaredMethod("cleaner");
+ cleanerMethod.setAccessible(true);
+ MethodHandle getCleaner = MethodHandles.lookup().unreflect(cleanerMethod);
+
+ Method cleanMethod = cleanerClass.getDeclaredMethod("clean");
+ cleanerMethod.setAccessible(true);
+ MethodHandle clean = MethodHandles.lookup().unreflect(cleanMethod);
+
+ clean = MethodHandles.dropArguments(clean, 1, directByteBufferClass);
+ invoker = MethodHandles.foldArguments(clean, getCleaner);
+ }
+ catch (Exception e1) {
+ throw new AssertionError(e1);
+ }
+ }
+ INVOKE_CLEANER = invoker;
+ }
+
+ private ByteBufferSupport() {
+ }
+
+ public static void unmap(MappedByteBuffer buffer) {
+ try {
+ INVOKE_CLEANER.invoke(buffer);
+ } catch (Throwable e) {
+ throw new RuntimeException(e);
+ }
+ }
+}
diff --git a/AtomDB/src/main/java/util/BytesConverter.java b/AtomDB/src/main/java/org/g2n/atomdb/util/BytesConverter.java
similarity index 94%
rename from AtomDB/src/main/java/util/BytesConverter.java
rename to AtomDB/src/main/java/org/g2n/atomdb/util/BytesConverter.java
index 85d4307..ddd1647 100644
--- a/AtomDB/src/main/java/util/BytesConverter.java
+++ b/AtomDB/src/main/java/org/g2n/atomdb/util/BytesConverter.java
@@ -1,4 +1,4 @@
-package util;
+package org.g2n.atomdb.util;
import java.nio.charset.StandardCharsets;
@@ -21,6 +21,8 @@ public static byte[] bytes(long value) {
return result;
}
+
+
public static long toLong(byte[] bytes, int inclusive, int exclusive) {
requireNonNull(bytes);
if (bytes.length < exclusive) {
@@ -74,6 +76,7 @@ public long fromBytes(byte[] arr) {
}
// google guava
+ //https://github.com/google/guava/blob/d5fbccac90aba8501c633e896ea67e2b0bfb426d/guava/src/com/google/common/primitives/Longs.java#L284
private static long fromBytesToLong(
byte b1, byte b2, byte b3, byte b4, byte b5, byte b6, byte b7, byte b8) {
return (b1 & 0xFFL) << 56
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/util/FileUtil.java b/AtomDB/src/main/java/org/g2n/atomdb/util/FileUtil.java
new file mode 100644
index 0000000..3559859
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/util/FileUtil.java
@@ -0,0 +1,4 @@
+package org.g2n.atomdb.util;
+
+public class FileUtil {
+}
diff --git a/AtomDB/src/main/java/org/g2n/atomdb/util/MaxMinAvg.java b/AtomDB/src/main/java/org/g2n/atomdb/util/MaxMinAvg.java
new file mode 100644
index 0000000..d84ce71
--- /dev/null
+++ b/AtomDB/src/main/java/org/g2n/atomdb/util/MaxMinAvg.java
@@ -0,0 +1,28 @@
+package org.g2n.atomdb.util;
+
+public class MaxMinAvg {
+ public long max;
+ public long min;
+ public double avg;
+ public int times;
+
+ public MaxMinAvg(long value) {
+ this.max = value;
+ this.min = value;
+ this.avg = value;
+ }
+
+ public MaxMinAvg() {
+ this.max = Integer.MIN_VALUE;
+ this.min = Integer.MAX_VALUE;
+ this.avg = 0;
+ }
+
+ public MaxMinAvg add(long time) {
+ this.max = Math.max(max, time);
+ this.min = Math.min(min, time);
+ this.avg = avg == 0 ? time : (avg + time)/2.0;
+ this.times++;
+ return this;
+ }
+}
\ No newline at end of file
diff --git a/AtomDB/src/main/java/util/SizeOf.java b/AtomDB/src/main/java/org/g2n/atomdb/util/SizeOf.java
similarity index 70%
rename from AtomDB/src/main/java/util/SizeOf.java
rename to AtomDB/src/main/java/org/g2n/atomdb/util/SizeOf.java
index a75984f..3e93823 100644
--- a/AtomDB/src/main/java/util/SizeOf.java
+++ b/AtomDB/src/main/java/org/g2n/atomdb/util/SizeOf.java
@@ -1,10 +1,10 @@
-package util;
+package org.g2n.atomdb.util;
-import Constants.Operations;
+import org.g2n.atomdb.Constants.Operations;
public class SizeOf {
public final static int LongLength = Long.BYTES;
public final static int IntLength = Integer.BYTES;
- public static final int MBinBytes = 1_000_000;
+ public static final int MB = 1024 * 1024;
public static final int InstantTimeLength = IntLength + LongLength;
public static final int OperationsLength = Operations.bytesLength();
}
diff --git a/AtomDB/src/main/java/util/Speed.java b/AtomDB/src/main/java/org/g2n/atomdb/util/Speed.java
similarity index 99%
rename from AtomDB/src/main/java/util/Speed.java
rename to AtomDB/src/main/java/org/g2n/atomdb/util/Speed.java
index c393f6f..e3c2614 100644
--- a/AtomDB/src/main/java/util/Speed.java
+++ b/AtomDB/src/main/java/org/g2n/atomdb/util/Speed.java
@@ -1,4 +1,4 @@
-package util;
+package org.g2n.atomdb.util;
public class Speed {
diff --git a/AtomDB/src/main/java/util/Util.java b/AtomDB/src/main/java/org/g2n/atomdb/util/Util.java
similarity index 63%
rename from AtomDB/src/main/java/util/Util.java
rename to AtomDB/src/main/java/org/g2n/atomdb/util/Util.java
index ed2e373..b06f37f 100644
--- a/AtomDB/src/main/java/util/Util.java
+++ b/AtomDB/src/main/java/org/g2n/atomdb/util/Util.java
@@ -1,7 +1,11 @@
-package util;
+package org.g2n.atomdb.util;
+import java.io.File;
import java.nio.ByteBuffer;
import java.time.Instant;
+import java.util.concurrent.Callable;
+import java.util.function.Function;
+import java.util.regex.Pattern;
public class Util {
@@ -61,4 +65,32 @@ public static boolean requireNotEquals(int a, int b, String errMsg) {
throw new RuntimeException(errMsg);
return true;
}
+
+ public static boolean requiresNull(Object obj, String errMsg) {
+ if (obj != null)
+ throw new RuntimeException(errMsg);
+ return true;
+ }
+
+ public static String fileSeparatorForSplit = Pattern.quote(File.separator);
+
+ public static Object recordTimeTaken(Function