29#if !defined(SEQAN3_HAS_ZLIB) && !defined(SEQAN3_HEADER_TEST)
30# error "This file cannot be used when building without GZip-support."
33#if defined(SEQAN3_HAS_ZLIB)
37namespace seqan3::contrib
51 '\x00',
'\x00',
'\x00',
'\x00',
52 '\x00',
'\xff',
'\x06',
'\x00',
53 '\x42',
'\x43',
'\x02',
'\x00',
54 '\x1b',
'\x00',
'\x03',
'\x00',
55 '\x00',
'\x00',
'\x00',
'\x00',
56 '\x00',
'\x00',
'\x00',
'\x00'}};
58template <
typename TAlgTag>
59struct CompressionContext {};
61template <
typename TAlgTag>
62struct DefaultPageSize;
65struct CompressionContext<detail::gz_compression>
76struct CompressionContext<detail::bgzf_compression>:
77 CompressionContext<detail::gz_compression>
79 static constexpr size_t BLOCK_HEADER_LENGTH = detail::bgzf_compression::magic_header.size();
80 unsigned char headerPos;
84struct DefaultPageSize<detail::bgzf_compression>
86 static const unsigned MAX_BLOCK_SIZE = 64 * 1024;
87 static const unsigned BLOCK_FOOTER_LENGTH = 8;
89 static const unsigned ZLIB_BLOCK_OVERHEAD = 5;
93 enum { BLOCK_HEADER_LENGTH = CompressionContext<detail::bgzf_compression>::BLOCK_HEADER_LENGTH };
94 static const unsigned VALUE = MAX_BLOCK_SIZE - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH - ZLIB_BLOCK_OVERHEAD;
106compressInit(CompressionContext<detail::gz_compression> & ctx)
108 const int GZIP_WINDOW_BITS = -15;
109 const int Z_DEFAULT_MEM_LEVEL = 8;
111 ctx.strm.zalloc = NULL;
112 ctx.strm.zfree = NULL;
118 int status = deflateInit2(&ctx.strm, Z_BEST_SPEED, Z_DEFLATED,
119 GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY);
121 throw io_error(
"Calling deflateInit2() failed for gz file.");
129compressInit(CompressionContext<detail::bgzf_compression> & ctx)
131 compressInit(
static_cast<CompressionContext<detail::gz_compression> &
>(ctx));
140_bgzfUnpack16(
char const * buffer)
144 return detail::to_little_endian(tmp);
148_bgzfUnpack32(
char const * buffer)
152 return detail::to_little_endian(tmp);
160_bgzfPack16(
char * buffer, uint16_t value)
162 value = detail::to_little_endian(value);
164 reinterpret_cast<char *
>(&value) +
sizeof(uint16_t),
169_bgzfPack32(
char * buffer, uint32_t value)
171 value = detail::to_little_endian(value);
173 reinterpret_cast<char *
>(&value) +
sizeof(uint32_t),
181template <
typename TDestValue,
typename TDestCapacity,
typename TSourceValue,
typename TSourceLength>
183_compressBlock(TDestValue *dstBegin, TDestCapacity dstCapacity,
184 TSourceValue *srcBegin, TSourceLength srcLength, CompressionContext<detail::bgzf_compression> & ctx)
186 const size_t BLOCK_HEADER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_HEADER_LENGTH;
187 const size_t BLOCK_FOOTER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_FOOTER_LENGTH;
189 assert(dstCapacity > BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH);
190 assert(
sizeof(TDestValue) == 1u);
191 assert(
sizeof(
unsigned) == 4u);
194 std::ranges::copy(detail::bgzf_compression::magic_header, dstBegin);
198 ctx.strm.next_in = (Bytef *)(srcBegin);
199 ctx.strm.next_out = (Bytef *)(dstBegin + BLOCK_HEADER_LENGTH);
200 ctx.strm.avail_in = srcLength *
sizeof(TSourceValue);
201 ctx.strm.avail_out = dstCapacity - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
203 int status = deflate(&ctx.strm, Z_FINISH);
204 if (status != Z_STREAM_END)
206 deflateEnd(&ctx.strm);
207 throw io_error(
"Deflation failed. Compressed BGZF data is too big.");
210 status = deflateEnd(&ctx.strm);
212 throw io_error(
"BGZF deflateEnd() failed.");
219 size_t len = dstCapacity - ctx.strm.avail_out;
220 _bgzfPack16(dstBegin + 16, len - 1);
222 dstBegin += len - BLOCK_FOOTER_LENGTH;
223 _bgzfPack32(dstBegin, crc32(crc32(0u, NULL, 0u), (Bytef *)(srcBegin), srcLength *
sizeof(TSourceValue)));
224 _bgzfPack32(dstBegin + 4, srcLength *
sizeof(TSourceValue));
226 return dstCapacity - ctx.strm.avail_out;
234decompressInit(CompressionContext<detail::gz_compression> & ctx)
236 const int GZIP_WINDOW_BITS = -15;
238 ctx.strm.zalloc = NULL;
239 ctx.strm.zfree = NULL;
240 int status = inflateInit2(&ctx.strm, GZIP_WINDOW_BITS);
242 throw io_error(
"GZip inflateInit2() failed.");
250decompressInit(CompressionContext<detail::bgzf_compression> & ctx)
252 decompressInit(
static_cast<CompressionContext<detail::gz_compression> &
>(ctx));
260template <
typename TDestValue,
typename TDestCapacity,
typename TSourceValue,
typename TSourceLength>
262_decompressBlock(TDestValue *dstBegin, TDestCapacity dstCapacity,
263 TSourceValue *srcBegin, TSourceLength srcLength, CompressionContext<detail::bgzf_compression> & ctx)
265 const size_t BLOCK_HEADER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_HEADER_LENGTH;
266 const size_t BLOCK_FOOTER_LENGTH = DefaultPageSize<detail::bgzf_compression>::BLOCK_FOOTER_LENGTH;
268 assert(
sizeof(TSourceValue) == 1u);
269 assert(
sizeof(
unsigned) == 4u);
273 if (srcLength <= BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH)
274 throw io_error(
"BGZF block too short.");
276 if (!detail::bgzf_compression::validate_header(std::span{srcBegin, srcLength}))
277 throw io_error(
"Invalid BGZF block header.");
279 size_t compressedLen = _bgzfUnpack16(srcBegin + 16) + 1u;
280 if (compressedLen != srcLength)
281 throw io_error(
"BGZF compressed size mismatch.");
287 ctx.strm.next_in = (Bytef *)(srcBegin + BLOCK_HEADER_LENGTH);
288 ctx.strm.next_out = (Bytef *)(dstBegin);
289 ctx.strm.avail_in = srcLength - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
290 ctx.strm.avail_out = dstCapacity *
sizeof(TDestValue);
292 int status = inflate(&ctx.strm, Z_FINISH);
293 if (status != Z_STREAM_END)
295 inflateEnd(&ctx.strm);
296 throw io_error(
"Inflation failed. Decompressed BGZF data is too big.");
299 status = inflateEnd(&ctx.strm);
301 throw io_error(
"BGZF inflateEnd() failed.");
308 unsigned crc = crc32(crc32(0u, NULL, 0u), (Bytef *)(dstBegin), dstCapacity - ctx.strm.avail_out);
310 srcBegin += compressedLen - BLOCK_FOOTER_LENGTH;
311 if (_bgzfUnpack32(srcBegin) != crc)
312 throw io_error(
"BGZF wrong checksum.");
314 if (_bgzfUnpack32(srcBegin + 4) != dstCapacity - ctx.strm.avail_out)
315 throw io_error(
"BGZF size mismatch.");
317 return (dstCapacity - ctx.strm.avail_out) /
sizeof(TDestValue);
Provides seqan3::contrib::bgzf_thread_count.
Provides various transformation traits used by the range module.
Provides exceptions used in the I/O module.
Provides utility functions for bit twiddling.
T uninitialized_copy(T... args)