diff --git a/modules/http2/h2_push.c b/modules/http2/h2_push.c index 9a3b19b..f0c0437 100644 --- a/modules/http2/h2_push.c +++ b/modules/http2/h2_push.c @@ -464,33 +464,6 @@ apr_array_header_t *h2_push_collect(apr_pool_t *p, const h2_request *req, return NULL; } -/******************************************************************************* - * push diary - * - * - The push diary keeps track of resources already PUSHed via HTTP/2 on this - * connection. It records a hash value from the absolute URL of the resource - * pushed. - * - Lacking openssl, it uses 'apr_hashfunc_default' for the value - * - with openssl, it uses SHA256 to calculate the hash value - * - whatever the method to generate the hash, the diary keeps a maximum of 64 - * bits per hash, limiting the memory consumption to about - * H2PushDiarySize * 8 - * bytes. Entries are sorted by most recently used and oldest entries are - * forgotten first. - * - Clients can initialize/replace the push diary by sending a 'Cache-Digest' - * header. Currently, this is the base64url encoded value of the cache digest - * as specified in https://datatracker.ietf.org/doc/draft-kazuho-h2-cache-digest/ - * This draft can be expected to evolve and the definition of the header - * will be added there and refined. - * - The cache digest header is a Golomb Coded Set of hash values, but it may - * limit the amount of bits per hash value even further. For a good description - * of GCS, read here: - * http://giovanni.bajo.it/post/47119962313/golomb-coded-sets-smaller-than-bloom-filters - * - The means that the push diary might be initialized with hash values of much - * less than 64 bits, leading to more false positives, but smaller digest size. - ******************************************************************************/ - - #define GCSLOG_LEVEL APLOG_TRACE1 typedef struct h2_push_diary_entry { @@ -617,38 +590,49 @@ static int h2_push_diary_find(h2_push_diary *diary, apr_uint64_t hash) return -1; } -static h2_push_diary_entry *move_to_last(h2_push_diary *diary, apr_size_t idx) +static void move_to_last(h2_push_diary *diary, apr_size_t idx) { h2_push_diary_entry *entries = (h2_push_diary_entry*)diary->entries->elts; h2_push_diary_entry e; - apr_size_t lastidx = diary->entries->nelts-1; + int lastidx; + /* Move an existing entry to the last place */ + if (diary->entries->nelts <= 0) + return; + /* move entry[idx] to the end */ + lastidx = diary->entries->nelts - 1; if (idx < lastidx) { e = entries[idx]; - memmove(entries+idx, entries+idx+1, sizeof(e) * (lastidx - idx)); + memmove(entries+idx, entries+idx+1, sizeof(h2_push_diary_entry) * (lastidx - idx)); entries[lastidx] = e; } - return &entries[lastidx]; } -static void h2_push_diary_append(h2_push_diary *diary, h2_push_diary_entry *e) +static void remove_first(h2_push_diary *diary) { - h2_push_diary_entry *ne; + h2_push_diary_entry *entries = (h2_push_diary_entry*)diary->entries->elts; + int lastidx; - if (diary->entries->nelts < diary->N) { - /* append a new diary entry at the end */ - APR_ARRAY_PUSH(diary->entries, h2_push_diary_entry) = *e; - ne = &APR_ARRAY_IDX(diary->entries, diary->entries->nelts-1, h2_push_diary_entry); + /* move remaining entries to index 0 */ + lastidx = diary->entries->nelts - 1; + if (lastidx > 0) { + --diary->entries->nelts; + memmove(entries, entries+1, sizeof(h2_push_diary_entry) * diary->entries->nelts); } - else { - /* replace content with new digest. keeps memory usage constant once diary is full */ - ne = move_to_last(diary, 0); - *ne = *e; +} + +static void h2_push_diary_append(h2_push_diary *diary, h2_push_diary_entry *e) +{ + while (diary->entries->nelts >= diary->N) { + remove_first(diary); } + /* append a new diary entry at the end */ + APR_ARRAY_PUSH(diary->entries, h2_push_diary_entry) = *e; + /* Intentional no APLOGNO */ ap_log_perror(APLOG_MARK, GCSLOG_LEVEL, 0, diary->entries->pool, - "push_diary_append: %"APR_UINT64_T_HEX_FMT, ne->hash); + "push_diary_append: %"APR_UINT64_T_HEX_FMT, e->hash); } apr_array_header_t *h2_push_diary_update(h2_session *session, apr_array_header_t *pushes) @@ -691,30 +675,12 @@ apr_array_header_t *h2_push_collect_update(h2_stream *stream, const struct h2_request *req, const struct h2_headers *res) { - h2_session *session = stream->session; - const char *cache_digest = apr_table_get(req->headers, "Cache-Digest"); apr_array_header_t *pushes; - apr_status_t status; - if (cache_digest && session->push_diary) { - status = h2_push_diary_digest64_set(session->push_diary, req->authority, - cache_digest, stream->pool); - if (status != APR_SUCCESS) { - ap_log_cerror(APLOG_MARK, APLOG_DEBUG, status, session->c, - H2_SSSN_LOG(APLOGNO(03057), session, - "push diary set from Cache-Digest: %s"), cache_digest); - } - } pushes = h2_push_collect(stream->pool, req, stream->push_policy, res); return h2_push_diary_update(stream->session, pushes); } -static apr_int32_t h2_log2inv(unsigned char log2) -{ - return log2? (1 << log2) : 1; -} - - typedef struct { h2_push_diary *diary; unsigned char log2p; @@ -830,10 +796,6 @@ apr_status_t h2_push_diary_digest_get(h2_push_diary *diary, apr_pool_t *pool, nelts = diary->entries->nelts; - if (nelts > APR_UINT32_MAX) { - /* should not happen */ - return APR_ENOTIMPL; - } N = ceil_power_of_2(nelts); log2n = h2_log2(N); @@ -895,166 +857,3 @@ apr_status_t h2_push_diary_digest_get(h2_push_diary *diary, apr_pool_t *pool, return APR_SUCCESS; } -typedef struct { - h2_push_diary *diary; - apr_pool_t *pool; - unsigned char log2p; - const unsigned char *data; - apr_size_t datalen; - apr_size_t offset; - unsigned int bit; - apr_uint64_t last_val; -} gset_decoder; - -static int gset_decode_next_bit(gset_decoder *decoder) -{ - if (++decoder->bit >= 8) { - if (++decoder->offset >= decoder->datalen) { - return -1; - } - decoder->bit = 0; - } - return (decoder->data[decoder->offset] & cbit_mask[decoder->bit])? 1 : 0; -} - -static apr_status_t gset_decode_next(gset_decoder *decoder, apr_uint64_t *phash) -{ - apr_uint64_t flex = 0, fixed = 0, delta; - int i; - - /* read 1 bits until we encounter 0, then read log2n(diary-P) bits. - * On a malformed bit-string, this will not fail, but produce results - * which are pbly too large. Luckily, the diary will modulo the hash. - */ - while (1) { - int bit = gset_decode_next_bit(decoder); - if (bit == -1) { - return APR_EINVAL; - } - if (!bit) { - break; - } - ++flex; - } - - for (i = 0; i < decoder->log2p; ++i) { - int bit = gset_decode_next_bit(decoder); - if (bit == -1) { - return APR_EINVAL; - } - fixed = (fixed << 1) | bit; - } - - delta = (flex << decoder->log2p) | fixed; - *phash = delta + decoder->last_val; - decoder->last_val = *phash; - - /* Intentional no APLOGNO */ - ap_log_perror(APLOG_MARK, GCSLOG_LEVEL, 0, decoder->pool, - "h2_push_diary_digest_dec: val=%"APR_UINT64_T_HEX_FMT", delta=%" - APR_UINT64_T_HEX_FMT", flex=%d, fixed=%"APR_UINT64_T_HEX_FMT, - *phash, delta, (int)flex, fixed); - - return APR_SUCCESS; -} - -/** - * Initialize the push diary by a cache digest as described in - * https://datatracker.ietf.org/doc/draft-kazuho-h2-cache-digest/ - * . - * @param diary the diary to set the digest into - * @param data the binary cache digest - * @param len the length of the cache digest - * @return APR_EINVAL if digest was not successfully parsed - */ -apr_status_t h2_push_diary_digest_set(h2_push_diary *diary, const char *authority, - const char *data, apr_size_t len) -{ - gset_decoder decoder; - unsigned char log2n, log2p; - int N, i; - apr_pool_t *pool = diary->entries->pool; - h2_push_diary_entry e; - apr_status_t status = APR_SUCCESS; - - if (len < 2) { - /* at least this should be there */ - return APR_EINVAL; - } - log2n = data[0]; - log2p = data[1]; - diary->mask_bits = log2n + log2p; - if (diary->mask_bits > 64) { - /* cannot handle */ - return APR_ENOTIMPL; - } - - /* whatever is in the digest, it replaces the diary entries */ - apr_array_clear(diary->entries); - if (!authority || !strcmp("*", authority)) { - diary->authority = NULL; - } - else if (!diary->authority || strcmp(diary->authority, authority)) { - diary->authority = apr_pstrdup(diary->entries->pool, authority); - } - - N = h2_log2inv(log2n + log2p); - - decoder.diary = diary; - decoder.pool = pool; - decoder.log2p = log2p; - decoder.data = (const unsigned char*)data; - decoder.datalen = len; - decoder.offset = 1; - decoder.bit = 8; - decoder.last_val = 0; - - diary->N = N; - /* Determine effective N we use for storage */ - if (!N) { - /* a totally empty cache digest. someone tells us that she has no - * entries in the cache at all. Use our own preferences for N+mask - */ - diary->N = diary->NMax; - return APR_SUCCESS; - } - else if (N > diary->NMax) { - /* Store not more than diary is configured to hold. We open us up - * to DOS attacks otherwise. */ - diary->N = diary->NMax; - } - - /* Intentional no APLOGNO */ - ap_log_perror(APLOG_MARK, GCSLOG_LEVEL, 0, pool, - "h2_push_diary_digest_set: N=%d, log2n=%d, " - "diary->mask_bits=%d, dec.log2p=%d", - (int)diary->N, (int)log2n, diary->mask_bits, - (int)decoder.log2p); - - for (i = 0; i < diary->N; ++i) { - if (gset_decode_next(&decoder, &e.hash) != APR_SUCCESS) { - /* the data may have less than N values */ - break; - } - h2_push_diary_append(diary, &e); - } - - /* Intentional no APLOGNO */ - ap_log_perror(APLOG_MARK, GCSLOG_LEVEL, 0, pool, - "h2_push_diary_digest_set: diary now with %d entries, mask_bits=%d", - (int)diary->entries->nelts, diary->mask_bits); - return status; -} - -apr_status_t h2_push_diary_digest64_set(h2_push_diary *diary, const char *authority, - const char *data64url, apr_pool_t *pool) -{ - const char *data; - apr_size_t len = h2_util_base64url_decode(&data, data64url, pool); - /* Intentional no APLOGNO */ - ap_log_perror(APLOG_MARK, GCSLOG_LEVEL, 0, pool, - "h2_push_diary_digest64_set: digest=%s, dlen=%d", - data64url, (int)len); - return h2_push_diary_digest_set(diary, authority, data, len); -} - diff --git a/modules/http2/h2_push.h b/modules/http2/h2_push.h index bc24e68..d061dd8 100644 --- a/modules/http2/h2_push.h +++ b/modules/http2/h2_push.h @@ -35,6 +35,44 @@ typedef enum { H2_PUSH_DIGEST_SHA256 } h2_push_digest_type; +/******************************************************************************* + * push diary + * + * - The push diary keeps track of resources already PUSHed via HTTP/2 on this + * connection. It records a hash value from the absolute URL of the resource + * pushed. + * - Lacking openssl, + * - with openssl, it uses SHA256 to calculate the hash value, otherwise it + * falls back to apr_hashfunc_default() + * - whatever the method to generate the hash, the diary keeps a maximum of 64 + * bits per hash, limiting the memory consumption to about + * H2PushDiarySize * 8 + * bytes. Entries are sorted by most recently used and oldest entries are + * forgotten first. + * - While useful by itself to avoid duplicated PUSHes on the same connection, + * the original idea was that clients provided a 'Cache-Digest' header with + * the values of *their own* cached resources. This was described in + * + * and some subsequent revisions that tweaked values but kept the overall idea. + * - The draft was abandoned by the IETF http-wg, as support from major clients, + * e.g. browsers, was lacking for various reasons. + * - For these reasons, mod_h2 abandoned its support for client supplied values + * but keeps the diary. It seems to provide value for applications using PUSH, + * is configurable in size and defaults to a very moderate amount of memory + * used. + * - The cache digest header is a Golomb Coded Set of hash values, but it may + * limit the amount of bits per hash value even further. For a good description + * of GCS, read here: + * + ******************************************************************************/ + + +/* + * The push diary is based on the abandoned draft + * + * that describes how to use golomb filters. + */ + typedef struct h2_push_diary h2_push_diary; typedef void h2_push_digest_calc(h2_push_diary *diary, apr_uint64_t *phash, h2_push *push); @@ -101,20 +139,4 @@ apr_status_t h2_push_diary_digest_get(h2_push_diary *diary, apr_pool_t *p, int maxP, const char *authority, const char **pdata, apr_size_t *plen); -/** - * Initialize the push diary by a cache digest as described in - * https://datatracker.ietf.org/doc/draft-kazuho-h2-cache-digest/ - * . - * @param diary the diary to set the digest into - * @param authority the authority to set the data for - * @param data the binary cache digest - * @param len the length of the cache digest - * @return APR_EINVAL if digest was not successfully parsed - */ -apr_status_t h2_push_diary_digest_set(h2_push_diary *diary, const char *authority, - const char *data, apr_size_t len); - -apr_status_t h2_push_diary_digest64_set(h2_push_diary *diary, const char *authority, - const char *data64url, apr_pool_t *pool); - #endif /* defined(__mod_h2__h2_push__) */