hashiverse_lib/tools/
compression.rs1use crate::tools::BytesGatherer;
26use bytes::{BufMut, Bytes, BytesMut};
27use std::io::{Read, Write};
28
29const COMPRESSION_VERSION_NONE: u8 = 0;
32const COMPRESSION_VERSION_LZ4: u8 = 1;
33const COMPRESSION_VERSION_BROTLI: u8 = 2;
34
35const MIN_BYTES_FOR_LZ4: usize = 64;
37
38const MIN_BYTES_FOR_BROTLI: usize = 128;
40
41const MAX_DECOMPRESSED_SIZE: usize = 32 * 1024 * 1024;
44
45fn compress_passthrough(input: &[u8]) -> anyhow::Result<BytesGatherer> {
46 let mut bytes_gatherer = BytesGatherer::default();
47 bytes_gatherer.put_u8(COMPRESSION_VERSION_NONE);
48 bytes_gatherer.put_slice(input);
49 Ok(bytes_gatherer)
50}
51
52fn decompress_passthrough(input: &[u8]) -> anyhow::Result<BytesGatherer> {
53 Ok(BytesGatherer::from_bytes(Bytes::copy_from_slice(&input[1..])))
54}
55
56fn compress_lz4(input: &[u8]) -> anyhow::Result<BytesGatherer> {
57 if input.len() < MIN_BYTES_FOR_LZ4 {
58 return compress_passthrough(input);
59 }
60
61 let max_out = lz4_flex::block::get_maximum_output_size(input.len());
64 let mut result = BytesMut::with_capacity(5 + max_out);
65 result.put_u8(COMPRESSION_VERSION_LZ4);
66 result.put_slice(&(input.len() as u32).to_le_bytes());
67 let data_start = result.len(); result.resize(data_start + max_out, 0);
69 let n = lz4_flex::block::compress_into(input, &mut result[data_start..]).map_err(|e| anyhow::anyhow!("lz4 compression failed: {}", e))?;
70 result.truncate(data_start + n);
71 Ok(BytesGatherer::from_bytes(result.freeze()))
72}
73
74fn decompress_lz4(input: &[u8]) -> anyhow::Result<BytesGatherer> {
75 let data = &input[1..];
76 if data.len() < 4 {
77 anyhow::bail!("lz4 decompression failed: missing size prefix");
78 }
79 let uncompressed_size = u32::from_le_bytes(data[..4].try_into().unwrap()) as usize;
80 if uncompressed_size > MAX_DECOMPRESSED_SIZE {
81 anyhow::bail!("lz4 decompressed size {} exceeds limit {}", uncompressed_size, MAX_DECOMPRESSED_SIZE);
82 }
83 lz4_flex::decompress_size_prepended(data).map(|v| BytesGatherer::from_bytes(Bytes::from(v))).map_err(|e| anyhow::anyhow!("lz4 decompression failed: {}", e))
84}
85
86fn compress_brotli(input: &[u8]) -> anyhow::Result<BytesGatherer> {
87 if input.len() < MIN_BYTES_FOR_BROTLI {
88 return compress_passthrough(input);
89 }
90
91 let mut result = vec![COMPRESSION_VERSION_BROTLI];
94 {
95 let mut writer = brotli::CompressorWriter::new(&mut result, 4096, 11, 22);
96 writer.write_all(input)?;
97 }
98 Ok(BytesGatherer::from_bytes(Bytes::from(result)))
99}
100
101fn decompress_brotli(input: &[u8]) -> anyhow::Result<BytesGatherer> {
102 let mut output = Vec::new();
103 let bytes_read = brotli::Decompressor::new(&input[1..], 4096).take(MAX_DECOMPRESSED_SIZE as u64 + 1).read_to_end(&mut output)?;
104 if bytes_read > MAX_DECOMPRESSED_SIZE {
105 anyhow::bail!("brotli decompressed size {} exceeds limit {}", bytes_read, MAX_DECOMPRESSED_SIZE);
106 }
107 Ok(BytesGatherer::from_bytes(Bytes::from(output)))
108}
109
110pub fn compress_for_speed(input: &[u8]) -> anyhow::Result<BytesGatherer> {
116 let result = compress_lz4(input)?;
117 if result.len() < input.len() { Ok(result) } else { compress_passthrough(input) }
118}
119
120pub fn compress_for_size(input: &[u8]) -> anyhow::Result<BytesGatherer> {
126 let result = compress_brotli(input)?;
127 if result.len() < input.len() { Ok(result) } else { compress_passthrough(input) }
128}
129
130pub fn decompress(input: &[u8]) -> anyhow::Result<BytesGatherer> {
134 if input.is_empty() {
135 anyhow::bail!("missing compression version byte");
136 }
137 match input[0] {
138 COMPRESSION_VERSION_LZ4 => decompress_lz4(input),
139 COMPRESSION_VERSION_BROTLI => decompress_brotli(input),
140 COMPRESSION_VERSION_NONE => decompress_passthrough(input),
141 v => anyhow::bail!("unsupported compression version byte {}", v),
142 }
143}
144
145
146
147
148#[cfg(test)]
149mod tests {
150 use crate::tools::compression::{compress_for_size, compress_for_speed, decompress};
151 use crate::tools::tools;
152
153 #[cfg(target_arch = "wasm32")]
154 extern crate wasm_bindgen_test;
155 #[cfg(target_arch = "wasm32")]
156 use wasm_bindgen_test::*;
157 #[cfg(target_arch = "wasm32")]
158 wasm_bindgen_test::wasm_bindgen_test_configure!(run_in_browser);
159
160 fn roundtrip_speed(input: &[u8], msg: &str) -> anyhow::Result<()> {
161 let compressed = compress_for_speed(input)?.to_bytes();
162 let output = decompress(&compressed)?.to_bytes();
163 assert_eq!(input, output.as_ref(), "{}", msg);
164 Ok(())
165 }
166
167 fn roundtrip_size(input: &[u8], msg: &str) -> anyhow::Result<()> {
168 let compressed = compress_for_size(input)?.to_bytes();
169 let output = decompress(&compressed)?.to_bytes();
170 assert_eq!(input, output.as_ref(), "{}", msg);
171 Ok(())
172 }
173
174 #[cfg_attr(not(target_arch = "wasm32"), tokio::test)]
175 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
176 async fn test_compression_is_reversible() -> anyhow::Result<()> {
177 let input = b"Some example string to test compression and decompression.";
178 roundtrip_speed(input, "lz4 roundtrip")?;
179 roundtrip_size(input, "brotli roundtrip")?;
180 Ok(())
181 }
182
183 #[cfg_attr(not(target_arch = "wasm32"), tokio::test)]
184 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
185 async fn test_compression_is_reversible_short() -> anyhow::Result<()> {
186 let input = b"Some...";
188 roundtrip_speed(input, "lz4 short passthrough")?;
189 roundtrip_size(input, "brotli short passthrough")?;
190 Ok(())
191 }
192
193 #[cfg_attr(not(target_arch = "wasm32"), tokio::test)]
194 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
195 async fn test_compression_is_reversible_empty() -> anyhow::Result<()> {
196 let input = b"";
197 roundtrip_speed(input, "lz4 empty passthrough")?;
198 roundtrip_size(input, "brotli empty passthrough")?;
199 Ok(())
200 }
201
202 #[cfg_attr(not(target_arch = "wasm32"), tokio::test)]
203 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
204 async fn test_compression_is_reversible_random() -> anyhow::Result<()> {
205 const LENGTH: usize = 8192 * 8192;
208 let input: Vec<u8> = tools::random_bytes(LENGTH);
209 roundtrip_speed(&input, "lz4 random passthrough")?;
210 Ok(())
211 }
212
213 #[cfg_attr(not(target_arch = "wasm32"), tokio::test)]
214 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
215 async fn test_brotli_actually_compresses_html() -> anyhow::Result<()> {
216 let input = "<!DOCTYPE html><html><head><title>Test</title></head><body>".repeat(50);
218 let compressed = compress_for_size(input.as_bytes())?.to_bytes();
219 assert!(
220 compressed.len() < input.len(),
221 "brotli should compress repetitive HTML: {} -> {}",
222 input.len(),
223 compressed.len()
224 );
225 let output = decompress(&compressed)?.to_bytes();
226 assert_eq!(input.as_bytes(), output.as_ref());
227 Ok(())
228 }
229
230 #[cfg_attr(not(target_arch = "wasm32"), tokio::test)]
231 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
232 async fn test_lz4_rejects_oversized_decompressed_payload() {
233 let fake_size: u32 = (super::MAX_DECOMPRESSED_SIZE as u32) + 1;
236 let mut payload = vec![super::COMPRESSION_VERSION_LZ4];
237 payload.extend_from_slice(&fake_size.to_le_bytes());
238 payload.extend_from_slice(&[0u8; 16]); let result = decompress(&payload);
240 let error_message = result.err().expect("should have failed").to_string();
241 assert!(error_message.contains("exceeds limit"), "unexpected error: {}", error_message);
242 }
243
244 #[cfg_attr(not(target_arch = "wasm32"), tokio::test)]
245 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
246 async fn test_lz4_accepts_valid_decompressed_payload() -> anyhow::Result<()> {
247 let input = "hello world! ".repeat(100);
249 let compressed = compress_for_speed(input.as_bytes())?.to_bytes();
250 let output = decompress(&compressed)?.to_bytes();
251 assert_eq!(input.as_bytes(), output.as_ref());
252 Ok(())
253 }
254
255 #[cfg_attr(not(target_arch = "wasm32"), tokio::test)]
256 #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
257 async fn test_lz4_actually_compresses_text() -> anyhow::Result<()> {
258 let input = "The quick brown fox jumps over the lazy dog. ".repeat(100);
260 let compressed = compress_for_speed(input.as_bytes())?.to_bytes();
261 assert!(
262 compressed.len() < input.len(),
263 "lz4 should compress repetitive text: {} -> {}",
264 input.len(),
265 compressed.len()
266 );
267 let output = decompress(&compressed)?.to_bytes();
268 assert_eq!(input.as_bytes(), output.as_ref());
269 Ok(())
270 }
271}