git ptrace.dev


        
            
            
        
        0/*
1
2Copyright (c) 2023, Dominic Szablewski - https://phoboslab.org
3SPDX-License-Identifier: MIT
4
5QOA - The "Quite OK Audio" format for fast, lossy audio compression
6
7
8-- Data Format
9
10QOA encodes pulse-code modulated (PCM) audio data with up to 255 channels, 
11sample rates from 1 up to 16777215 hertz and a bit depth of 16 bits.
12
13The compression method employed in QOA is lossy; it discards some information
14from the uncompressed PCM data. For many types of audio signals this compression
15is "transparent", i.e. the difference from the original file is often not
16audible.
17
18QOA encodes 20 samples of 16 bit PCM data into slices of 64 bits. A single
19sample therefore requires 3.2 bits of storage space, resulting in a 5x
20compression (16 / 3.2).
21
22A QOA file consists of an 8 byte file header, followed by a number of frames.
23Each frame contains an 8 byte frame header, the current 16 byte en-/decoder
24state per channel and 256 slices per channel. Each slice is 8 bytes wide and
25encodes 20 samples of audio data.
26
27All values, including the slices, are big endian. The file layout is as follows:
28
29struct {
30	struct {
31		char     magic[4];         // magic bytes "qoaf"
32		uint32_t samples;          // samples per channel in this file
33	} file_header;             
34
35	struct {
36		struct {
37			uint8_t  num_channels; // no. of channels
38			uint24_t samplerate;   // samplerate in hz
39			uint16_t fsamples;     // samples per channel in this frame
40			uint16_t fsize;        // frame size (includes this header)
41		} frame_header;          
42
43		struct {
44			int16_t history[4];    // most recent last
45			int16_t weights[4];    // most recent last
46		} lms_state[num_channels]; 
47
48		qoa_slice_t slices[256][num_channels];
49
50	} frames[ceil(samples / (256 * 20))];
51} qoa_file_t;
52
53Each `qoa_slice_t` contains a quantized scalefactor `sf_quant` and 20 quantized
54residuals `qrNN`:
55
56.- QOA_SLICE -- 64 bits, 20 samples --------------------------/  /------------.
57|        Byte[0]         |        Byte[1]         |  Byte[2]  \  \  Byte[7]   |
58| 7  6  5  4  3  2  1  0 | 7  6  5  4  3  2  1  0 | 7  6  5   /  /    2  1  0 |
59|------------+--------+--------+--------+---------+---------+-\  \--+---------|
60|  sf_quant  |  qr00  |  qr01  |  qr02  |  qr03   |  qr04   | /  /  |  qr19   |
61`-------------------------------------------------------------\  \------------`
62
63Each frame except the last must contain exactly 256 slices per channel. The last
64frame may contain between 1 .. 256 (inclusive) slices per channel. The last
65slice (for each channel) in the last frame may contain less than 20 samples; the
66slice still must be 8 bytes wide, with the unused samples zeroed out.
67
68Channels are interleaved per slice. E.g. for 2 channel stereo: 
69slice[0] = L, slice[1] = R, slice[2] = L, slice[3] = R ...
70
71A valid QOA file or stream must have at least one frame. Each frame must contain
72at least one channel and one sample with a samplerate between 1 .. 16777215
73(inclusive).
74
75If the total number of samples is not known by the encoder, the samples in the
76file header may be set to 0x00000000 to indicate that the encoder is 
77"streaming". In a streaming context, the samplerate and number of channels may
78differ from frame to frame. For static files (those with samples set to a
79non-zero value), each frame must have the same number of channels and same
80samplerate.
81
82Note that this implementation of QOA only handles files with a known total
83number of samples.
84
85A decoder should support at least 8 channels. The channel layout for channel
86counts 1 .. 8 is:
87
88	1. Mono
89	2. L, R
90	3. L, R, C 
91	4. FL, FR, B/SL, B/SR 
92	5. FL, FR, C, B/SL, B/SR 
93	6. FL, FR, C, LFE, B/SL, B/SR
94	7. FL, FR, C, LFE, B, SL, SR 
95	8. FL, FR, C, LFE, BL, BR, SL, SR
96
97QOA predicts each audio sample based on the previously decoded ones using a
98"Sign-Sign Least Mean Squares Filter" (LMS). This prediction plus the 
99dequantized residual forms the final output sample.
100
101*/
102
103
104
105/* -----------------------------------------------------------------------------
106	Header - Public functions */
107
108#ifndef QOA_H
109#define QOA_H
110
111#ifdef __cplusplus
112extern "C" {
113#endif
114
115#define QOA_MIN_FILESIZE 16
116#define QOA_MAX_CHANNELS 8
117
118#define QOA_SLICE_LEN 20
119#define QOA_SLICES_PER_FRAME 256
120#define QOA_FRAME_LEN (QOA_SLICES_PER_FRAME * QOA_SLICE_LEN)
121#define QOA_LMS_LEN 4
122#define QOA_MAGIC 0x716f6166 /* 'qoaf' */
123
124#define QOA_FRAME_SIZE(channels, slices) \
125	(8 + QOA_LMS_LEN * 4 * channels + 8 * slices * channels)
126
127typedef struct {
128	int history[QOA_LMS_LEN];
129	int weights[QOA_LMS_LEN];
130} qoa_lms_t;
131
132typedef struct {
133	unsigned int channels;
134	unsigned int samplerate;
135	unsigned int samples;
136	qoa_lms_t lms[QOA_MAX_CHANNELS];
137	#ifdef QOA_RECORD_TOTAL_ERROR
138		double error;
139	#endif
140} qoa_desc;
141
142unsigned int qoa_encode_header(qoa_desc *qoa, unsigned char *bytes);
143unsigned int qoa_encode_frame(const short *sample_data, qoa_desc *qoa, unsigned int frame_len, unsigned char *bytes);
144void *qoa_encode(const short *sample_data, qoa_desc *qoa, unsigned int *out_len);
145
146unsigned int qoa_max_frame_size(qoa_desc *qoa);
147unsigned int qoa_decode_header(const unsigned char *bytes, int size, qoa_desc *qoa);
148unsigned int qoa_decode_frame(const unsigned char *bytes, unsigned int size, qoa_desc *qoa, short *sample_data, unsigned int *frame_len);
149short *qoa_decode(const unsigned char *bytes, int size, qoa_desc *file);
150
151#ifndef QOA_NO_STDIO
152
153int qoa_write(const char *filename, const short *sample_data, qoa_desc *qoa);
154void *qoa_read(const char *filename, qoa_desc *qoa);
155
156#endif /* QOA_NO_STDIO */
157
158
159#ifdef __cplusplus
160}
161#endif
162#endif /* QOA_H */
163
164
165/* -----------------------------------------------------------------------------
166	Implementation */
167
168#ifdef QOA_IMPLEMENTATION
169#include <stdlib.h>
170
171#ifndef QOA_MALLOC
172	#define QOA_MALLOC(sz) malloc(sz)
173	#define QOA_FREE(p) free(p)
174#endif
175
176typedef unsigned long long qoa_uint64_t;
177
178
179/* The quant_tab provides an index into the dequant_tab for residuals in the
180range of -8 .. 8. It maps this range to just 3bits and becomes less accurate at 
181the higher end. Note that the residual zero is identical to the lowest positive 
182value. This is mostly fine, since the qoa_div() function always rounds away 
183from zero. */
184
185static const int qoa_quant_tab[17] = {
186	7, 7, 7, 5, 5, 3, 3, 1, /* -8..-1 */
187	0,                      /*  0     */
188	0, 2, 2, 4, 4, 6, 6, 6  /*  1.. 8 */
189};
190
191
192/* We have 16 different scalefactors. Like the quantized residuals these become
193less accurate at the higher end. In theory, the highest scalefactor that we
194would need to encode the highest 16bit residual is (2**16)/8 = 8192. However we
195rely on the LMS filter to predict samples accurately enough that a maximum 
196residual of one quarter of the 16 bit range is sufficient. I.e. with the 
197scalefactor 2048 times the quant range of 8 we can encode residuals up to 2**14.
198
199The scalefactor values are computed as:
200scalefactor_tab[s] <- round(pow(s + 1, 2.75)) */
201
202static const int qoa_scalefactor_tab[16] = {
203	1, 7, 21, 45, 84, 138, 211, 304, 421, 562, 731, 928, 1157, 1419, 1715, 2048
204};
205
206
207/* The reciprocal_tab maps each of the 16 scalefactors to their rounded 
208reciprocals 1/scalefactor. This allows us to calculate the scaled residuals in 
209the encoder with just one multiplication instead of an expensive division. We 
210do this in .16 fixed point with integers, instead of floats.
211
212The reciprocal_tab is computed as:
213reciprocal_tab[s] <- ((1<<16) + scalefactor_tab[s] - 1) / scalefactor_tab[s] */
214
215static const int qoa_reciprocal_tab[16] = {
216	65536, 9363, 3121, 1457, 781, 475, 311, 216, 156, 117, 90, 71, 57, 47, 39, 32
217};
218
219
220/* The dequant_tab maps each of the scalefactors and quantized residuals to 
221their unscaled & dequantized version.
222
223Since qoa_div rounds away from the zero, the smallest entries are mapped to 3/4
224instead of 1. The dequant_tab assumes the following dequantized values for each 
225of the quant_tab indices and is computed as:
226float dqt[8] = {0.75, -0.75, 2.5, -2.5, 4.5, -4.5, 7, -7};
227dequant_tab[s][q] <- round_ties_away_from_zero(scalefactor_tab[s] * dqt[q])
228
229The rounding employed here is "to nearest, ties away from zero",  i.e. positive
230and negative values are treated symmetrically.
231*/
232
233static const int qoa_dequant_tab[16][8] = {
234	{   1,    -1,    3,    -3,    5,    -5,     7,     -7},
235	{   5,    -5,   18,   -18,   32,   -32,    49,    -49},
236	{  16,   -16,   53,   -53,   95,   -95,   147,   -147},
237	{  34,   -34,  113,  -113,  203,  -203,   315,   -315},
238	{  63,   -63,  210,  -210,  378,  -378,   588,   -588},
239	{ 104,  -104,  345,  -345,  621,  -621,   966,   -966},
240	{ 158,  -158,  528,  -528,  950,  -950,  1477,  -1477},
241	{ 228,  -228,  760,  -760, 1368, -1368,  2128,  -2128},
242	{ 316,  -316, 1053, -1053, 1895, -1895,  2947,  -2947},
243	{ 422,  -422, 1405, -1405, 2529, -2529,  3934,  -3934},
244	{ 548,  -548, 1828, -1828, 3290, -3290,  5117,  -5117},
245	{ 696,  -696, 2320, -2320, 4176, -4176,  6496,  -6496},
246	{ 868,  -868, 2893, -2893, 5207, -5207,  8099,  -8099},
247	{1064, -1064, 3548, -3548, 6386, -6386,  9933,  -9933},
248	{1286, -1286, 4288, -4288, 7718, -7718, 12005, -12005},
249	{1536, -1536, 5120, -5120, 9216, -9216, 14336, -14336},
250};
251
252
253/* The Least Mean Squares Filter is the heart of QOA. It predicts the next
254sample based on the previous 4 reconstructed samples. It does so by continuously
255adjusting 4 weights based on the residual of the previous prediction.
256
257The next sample is predicted as the sum of (weight[i] * history[i]).
258
259The adjustment of the weights is done with a "Sign-Sign-LMS" that adds or
260subtracts the residual to each weight, based on the corresponding sample from 
261the history. This, surprisingly, is sufficient to get worthwhile predictions.
262
263This is all done with fixed point integers. Hence the right-shifts when updating
264the weights and calculating the prediction. */
265
266static int qoa_lms_predict(qoa_lms_t *lms) {
267	int prediction = 0;
268	for (int i = 0; i < QOA_LMS_LEN; i++) {
269		prediction += lms->weights[i] * lms->history[i];
270	}
271	return prediction >> 13;
272}
273
274static void qoa_lms_update(qoa_lms_t *lms, int sample, int residual) {
275	int delta = residual >> 4;
276	for (int i = 0; i < QOA_LMS_LEN; i++) {
277		lms->weights[i] += lms->history[i] < 0 ? -delta : delta;
278	}
279
280	for (int i = 0; i < QOA_LMS_LEN-1; i++) {
281		lms->history[i] = lms->history[i+1];
282	}
283	lms->history[QOA_LMS_LEN-1] = sample;
284}
285
286
287/* qoa_div() implements a rounding division, but avoids rounding to zero for 
288small numbers. E.g. 0.1 will be rounded to 1. Note that 0 itself still 
289returns as 0, which is handled in the qoa_quant_tab[].
290qoa_div() takes an index into the .16 fixed point qoa_reciprocal_tab as an
291argument, so it can do the division with a cheaper integer multiplication. */
292
293static inline int qoa_div(int v, int scalefactor) {
294	int reciprocal = qoa_reciprocal_tab[scalefactor];
295	int n = (v * reciprocal + (1 << 15)) >> 16;
296	n = n + ((v > 0) - (v < 0)) - ((n > 0) - (n < 0)); /* round away from 0 */
297	return n;
298}
299
300static inline int qoa_clamp(int v, int min, int max) {
301	if (v < min) { return min; }
302	if (v > max) { return max; }
303	return v;
304}
305
306/* This specialized clamp function for the signed 16 bit range improves decode
307performance quite a bit. The extra if() statement works nicely with the CPUs
308branch prediction as this branch is rarely taken. */
309
310static inline int qoa_clamp_s16(int v) {
311	if ((unsigned int)(v + 32768) > 65535) {
312		if (v < -32768) { return -32768; }
313		if (v >  32767) { return  32767; }
314	}
315	return v;
316}
317
318static inline qoa_uint64_t qoa_read_u64(const unsigned char *bytes, unsigned int *p) {
319	bytes += *p;
320	*p += 8;
321	return 
322		((qoa_uint64_t)(bytes[0]) << 56) | ((qoa_uint64_t)(bytes[1]) << 48) |
323		((qoa_uint64_t)(bytes[2]) << 40) | ((qoa_uint64_t)(bytes[3]) << 32) |
324		((qoa_uint64_t)(bytes[4]) << 24) | ((qoa_uint64_t)(bytes[5]) << 16) |
325		((qoa_uint64_t)(bytes[6]) <<  8) | ((qoa_uint64_t)(bytes[7]) <<  0);
326}
327
328static inline void qoa_write_u64(qoa_uint64_t v, unsigned char *bytes, unsigned int *p) {
329	bytes += *p;
330	*p += 8;
331	bytes[0] = (v >> 56) & 0xff;
332	bytes[1] = (v >> 48) & 0xff;
333	bytes[2] = (v >> 40) & 0xff;
334	bytes[3] = (v >> 32) & 0xff;
335	bytes[4] = (v >> 24) & 0xff;
336	bytes[5] = (v >> 16) & 0xff;
337	bytes[6] = (v >>  8) & 0xff;
338	bytes[7] = (v >>  0) & 0xff;
339}
340
341
342/* -----------------------------------------------------------------------------
343	Encoder */
344
345unsigned int qoa_encode_header(qoa_desc *qoa, unsigned char *bytes) {
346	unsigned int p = 0;
347	qoa_write_u64(((qoa_uint64_t)QOA_MAGIC << 32) | qoa->samples, bytes, &p);
348	return p;
349}
350
351unsigned int qoa_encode_frame(const short *sample_data, qoa_desc *qoa, unsigned int frame_len, unsigned char *bytes) {
352	unsigned int channels = qoa->channels;
353
354	unsigned int p = 0;
355	unsigned int slices = (frame_len + QOA_SLICE_LEN - 1) / QOA_SLICE_LEN;
356	unsigned int frame_size = QOA_FRAME_SIZE(channels, slices);
357	int prev_scalefactor[QOA_MAX_CHANNELS] = {0};
358
359	/* Write the frame header */
360	qoa_write_u64((
361		(qoa_uint64_t)qoa->channels   << 56 |
362		(qoa_uint64_t)qoa->samplerate << 32 |
363		(qoa_uint64_t)frame_len       << 16 |
364		(qoa_uint64_t)frame_size
365	), bytes, &p);
366
367	
368	for (unsigned int c = 0; c < channels; c++) {
369		/* Write the current LMS state */
370		qoa_uint64_t weights = 0;
371		qoa_uint64_t history = 0;
372		for (int i = 0; i < QOA_LMS_LEN; i++) {
373			history = (history << 16) | (qoa->lms[c].history[i] & 0xffff);
374			weights = (weights << 16) | (qoa->lms[c].weights[i] & 0xffff);
375		}
376		qoa_write_u64(history, bytes, &p);
377		qoa_write_u64(weights, bytes, &p);
378	}
379
380	/* We encode all samples with the channels interleaved on a slice level.
381	E.g. for stereo: (ch-0, slice 0), (ch 1, slice 0), (ch 0, slice 1), ...*/
382	for (unsigned int sample_index = 0; sample_index < frame_len; sample_index += QOA_SLICE_LEN) {
383
384		for (unsigned int c = 0; c < channels; c++) {
385			int slice_len = qoa_clamp(QOA_SLICE_LEN, 0, frame_len - sample_index);
386			int slice_start = sample_index * channels + c;
387			int slice_end = (sample_index + slice_len) * channels + c;			
388
389			/* Brute for search for the best scalefactor. Just go through all
390			16 scalefactors, encode all samples for the current slice and 
391			meassure the total squared error. */
392			qoa_uint64_t best_rank = -1;
393			#ifdef QOA_RECORD_TOTAL_ERROR
394				qoa_uint64_t best_error = -1;
395			#endif
396			qoa_uint64_t best_slice = 0;
397			qoa_lms_t best_lms;
398			int best_scalefactor = 0;
399
400			for (int sfi = 0; sfi < 16; sfi++) {
401				/* There is a strong correlation between the scalefactors of
402				neighboring slices. As an optimization, start testing
403				the best scalefactor of the previous slice first. */
404				int scalefactor = (sfi + prev_scalefactor[c]) % 16;
405
406				/* We have to reset the LMS state to the last known good one
407				before trying each scalefactor, as each pass updates the LMS
408				state when encoding. */
409				qoa_lms_t lms = qoa->lms[c];
410				qoa_uint64_t slice = scalefactor;
411				qoa_uint64_t current_rank = 0;
412				#ifdef QOA_RECORD_TOTAL_ERROR
413					qoa_uint64_t current_error = 0;
414				#endif
415
416				for (int si = slice_start; si < slice_end; si += channels) {
417					int sample = sample_data[si];
418					int predicted = qoa_lms_predict(&lms);
419
420					int residual = sample - predicted;
421					int scaled = qoa_div(residual, scalefactor);
422					int clamped = qoa_clamp(scaled, -8, 8);
423					int quantized = qoa_quant_tab[clamped + 8];
424					int dequantized = qoa_dequant_tab[scalefactor][quantized];
425					int reconstructed = qoa_clamp_s16(predicted + dequantized);
426
427
428					/* If the weights have grown too large, we introduce a penalty
429					here. This prevents pops/clicks in certain problem cases */
430					int weights_penalty = ((
431						lms.weights[0] * lms.weights[0] + 
432						lms.weights[1] * lms.weights[1] + 
433						lms.weights[2] * lms.weights[2] + 
434						lms.weights[3] * lms.weights[3]
435					) >> 18) - 0x8ff;
436					if (weights_penalty < 0) {
437						weights_penalty = 0;
438					}
439
440					long long error = (sample - reconstructed);
441					qoa_uint64_t error_sq = error * error;
442
443					current_rank += error_sq + weights_penalty * weights_penalty;
444					#ifdef QOA_RECORD_TOTAL_ERROR
445						current_error += error_sq;
446					#endif
447					if (current_rank > best_rank) {
448						break;
449					}
450
451					qoa_lms_update(&lms, reconstructed, dequantized);
452					slice = (slice << 3) | quantized;
453				}
454
455				if (current_rank < best_rank) {
456					best_rank = current_rank;
457					#ifdef QOA_RECORD_TOTAL_ERROR
458						best_error = current_error;
459					#endif
460					best_slice = slice;
461					best_lms = lms;
462					best_scalefactor = scalefactor;
463				}
464			}
465
466			prev_scalefactor[c] = best_scalefactor;
467
468			qoa->lms[c] = best_lms;
469			#ifdef QOA_RECORD_TOTAL_ERROR
470				qoa->error += best_error;
471			#endif
472
473			/* If this slice was shorter than QOA_SLICE_LEN, we have to left-
474			shift all encoded data, to ensure the rightmost bits are the empty
475			ones. This should only happen in the last frame of a file as all
476			slices are completely filled otherwise. */
477			best_slice <<= (QOA_SLICE_LEN - slice_len) * 3;
478			qoa_write_u64(best_slice, bytes, &p);
479		}
480	}
481	
482	return p;
483}
484
485void *qoa_encode(const short *sample_data, qoa_desc *qoa, unsigned int *out_len) {
486	if (
487		qoa->samples == 0 || 
488		qoa->samplerate == 0 || qoa->samplerate > 0xffffff ||
489		qoa->channels == 0 || qoa->channels > QOA_MAX_CHANNELS
490	) {
491		return NULL;
492	}
493
494	/* Calculate the encoded size and allocate */
495	unsigned int num_frames = (qoa->samples + QOA_FRAME_LEN-1) / QOA_FRAME_LEN;
496	unsigned int num_slices = (qoa->samples + QOA_SLICE_LEN-1) / QOA_SLICE_LEN;
497	unsigned int encoded_size = 8 +                    /* 8 byte file header */
498		num_frames * 8 +                               /* 8 byte frame headers */
499		num_frames * QOA_LMS_LEN * 4 * qoa->channels + /* 4 * 4 bytes lms state per channel */
500		num_slices * 8 * qoa->channels;                /* 8 byte slices */
501
502	unsigned char *bytes = (unsigned char *)QOA_MALLOC(encoded_size);
503
504	for (unsigned int c = 0; c < qoa->channels; c++) {
505		/* Set the initial LMS weights to {0, 0, -1, 2}. This helps with the 
506		prediction of the first few ms of a file. */
507		qoa->lms[c].weights[0] = 0;
508		qoa->lms[c].weights[1] = 0;
509		qoa->lms[c].weights[2] = -(1<<13);
510		qoa->lms[c].weights[3] =  (1<<14);
511
512		/* Explicitly set the history samples to 0, as we might have some
513		garbage in there. */
514		for (int i = 0; i < QOA_LMS_LEN; i++) {
515			qoa->lms[c].history[i] = 0;
516		}
517	}
518
519
520	/* Encode the header and go through all frames */
521	unsigned int p = qoa_encode_header(qoa, bytes);
522	#ifdef QOA_RECORD_TOTAL_ERROR
523		qoa->error = 0;
524	#endif
525
526	int frame_len = QOA_FRAME_LEN;
527	for (unsigned int sample_index = 0; sample_index < qoa->samples; sample_index += frame_len) {
528		frame_len = qoa_clamp(QOA_FRAME_LEN, 0, qoa->samples - sample_index);		
529		const short *frame_samples = sample_data + sample_index * qoa->channels;
530		unsigned int frame_size = qoa_encode_frame(frame_samples, qoa, frame_len, bytes + p);
531		p += frame_size;
532	}
533
534	*out_len = p;
535	return bytes;
536}
537
538
539
540/* -----------------------------------------------------------------------------
541	Decoder */
542
543unsigned int qoa_max_frame_size(qoa_desc *qoa) {
544	return QOA_FRAME_SIZE(qoa->channels, QOA_SLICES_PER_FRAME);
545}
546
547unsigned int qoa_decode_header(const unsigned char *bytes, int size, qoa_desc *qoa) {
548	unsigned int p = 0;
549	if (size < QOA_MIN_FILESIZE) {
550		return 0;
551	}
552
553
554	/* Read the file header, verify the magic number ('qoaf') and read the 
555	total number of samples. */
556	qoa_uint64_t file_header = qoa_read_u64(bytes, &p);
557
558	if ((file_header >> 32) != QOA_MAGIC) {
559		return 0;
560	}
561
562	qoa->samples = file_header & 0xffffffff;
563	if (!qoa->samples) {
564		return 0;
565	}
566
567	/* Peek into the first frame header to get the number of channels and
568	the samplerate. */
569	qoa_uint64_t frame_header = qoa_read_u64(bytes, &p);
570	qoa->channels   = (frame_header >> 56) & 0x0000ff;
571	qoa->samplerate = (frame_header >> 32) & 0xffffff;
572
573	if (qoa->channels == 0 || qoa->samples == 0 || qoa->samplerate == 0) {
574		return 0;
575	}
576
577	return 8;
578}
579
580unsigned int qoa_decode_frame(const unsigned char *bytes, unsigned int size, qoa_desc *qoa, short *sample_data, unsigned int *frame_len) {
581	unsigned int p = 0;
582	*frame_len = 0;
583
584	if (size < 8 + QOA_LMS_LEN * 4 * qoa->channels) {
585		return 0;
586	}
587
588	/* Read and verify the frame header */
589	qoa_uint64_t frame_header = qoa_read_u64(bytes, &p);
590	unsigned int channels   = (frame_header >> 56) & 0x0000ff;
591	unsigned int samplerate = (frame_header >> 32) & 0xffffff;
592	unsigned int samples    = (frame_header >> 16) & 0x00ffff;
593	unsigned int frame_size = (frame_header      ) & 0x00ffff;
594
595	unsigned int data_size = frame_size - 8 - QOA_LMS_LEN * 4 * channels;
596	unsigned int num_slices = data_size / 8;
597	unsigned int max_total_samples = num_slices * QOA_SLICE_LEN;
598
599	if (
600		channels != qoa->channels || 
601		samplerate != qoa->samplerate ||
602		frame_size > size ||
603		samples * channels > max_total_samples
604	) {
605		return 0;
606	}
607
608
609	/* Read the LMS state: 4 x 2 bytes history, 4 x 2 bytes weights per channel */
610	for (unsigned int c = 0; c < channels; c++) {
611		qoa_uint64_t history = qoa_read_u64(bytes, &p);
612		qoa_uint64_t weights = qoa_read_u64(bytes, &p);
613		for (int i = 0; i < QOA_LMS_LEN; i++) {
614			qoa->lms[c].history[i] = ((signed short)(history >> 48));
615			history <<= 16;
616			qoa->lms[c].weights[i] = ((signed short)(weights >> 48));
617			weights <<= 16;
618		}
619	}
620
621
622	/* Decode all slices for all channels in this frame */
623	for (unsigned int sample_index = 0; sample_index < samples; sample_index += QOA_SLICE_LEN) {
624		for (unsigned int c = 0; c < channels; c++) {
625			qoa_uint64_t slice = qoa_read_u64(bytes, &p);
626
627			int scalefactor = (slice >> 60) & 0xf;
628			slice <<= 4;
629
630			int slice_start = sample_index * channels + c;
631			int slice_end = qoa_clamp(sample_index + QOA_SLICE_LEN, 0, samples) * channels + c;
632
633			for (int si = slice_start; si < slice_end; si += channels) {
634				int predicted = qoa_lms_predict(&qoa->lms[c]);
635				int quantized = (slice >> 61) & 0x7;
636				int dequantized = qoa_dequant_tab[scalefactor][quantized];
637				int reconstructed = qoa_clamp_s16(predicted + dequantized);
638				
639				sample_data[si] = reconstructed;
640				slice <<= 3;
641
642				qoa_lms_update(&qoa->lms[c], reconstructed, dequantized);
643			}
644		}
645	}
646
647	*frame_len = samples;
648	return p;
649}
650
651short *qoa_decode(const unsigned char *bytes, int size, qoa_desc *qoa) {
652	unsigned int p = qoa_decode_header(bytes, size, qoa);
653	if (!p) {
654		return NULL;
655	}
656
657	/* Calculate the required size of the sample buffer and allocate */
658	int total_samples = qoa->samples * qoa->channels;
659	short *sample_data = (short *)QOA_MALLOC(total_samples * sizeof(short));
660
661	unsigned int sample_index = 0;
662	unsigned int frame_len;
663	unsigned int frame_size;
664
665	/* Decode all frames */
666	do {
667		short *sample_ptr = sample_data + sample_index * qoa->channels;
668		frame_size = qoa_decode_frame(bytes + p, size - p, qoa, sample_ptr, &frame_len);
669
670		p += frame_size;
671		sample_index += frame_len;
672	} while (frame_size && sample_index < qoa->samples);
673
674	qoa->samples = sample_index;
675	return sample_data;
676}
677
678
679
680/* -----------------------------------------------------------------------------
681	File read/write convenience functions */
682
683#ifndef QOA_NO_STDIO
684#include <stdio.h>
685
686int qoa_write(const char *filename, const short *sample_data, qoa_desc *qoa) {
687	FILE *f = fopen(filename, "wb");
688	unsigned int size;
689	void *encoded;
690
691	if (!f) {
692		return 0;
693	}
694
695	encoded = qoa_encode(sample_data, qoa, &size);
696	if (!encoded) {
697		fclose(f);
698		return 0;
699	}
700
701	fwrite(encoded, 1, size, f);
702	fclose(f);
703
704	QOA_FREE(encoded);
705	return size;
706}
707
708void *qoa_read(const char *filename, qoa_desc *qoa) {
709	FILE *f = fopen(filename, "rb");
710	int size, bytes_read;
711	void *data;
712	short *sample_data;
713
714	if (!f) {
715		return NULL;
716	}
717
718	fseek(f, 0, SEEK_END);
719	size = ftell(f);
720	if (size <= 0) {
721		fclose(f);
722		return NULL;
723	}
724	fseek(f, 0, SEEK_SET);
725
726	data = QOA_MALLOC(size);
727	if (!data) {
728		fclose(f);
729		return NULL;
730	}
731
732	bytes_read = fread(data, 1, size, f);
733	fclose(f);
734
735	sample_data = qoa_decode((const unsigned char *)data, bytes_read, qoa);
736	QOA_FREE(data);
737	return sample_data;
738}
739
740#endif /* QOA_NO_STDIO */
741#endif /* QOA_IMPLEMENTATION */
742
index : raylib-jai